update commit
This commit is contained in:
parent
ef99678798
commit
a42dc2027b
|
@ -1,4 +1,4 @@
|
||||||
flash_att_v2_commit_cuda := 54e80a3829c6d2337570d01e78ebd9529c02d342
|
flash_att_v2_commit_cuda := 87a1277653fc55cd615f5341255e00c69d5c00a1
|
||||||
flash_att_v2_commit_rocm := 8736558c287ff2ef28b24878e42828c595ac3e69
|
flash_att_v2_commit_rocm := 8736558c287ff2ef28b24878e42828c595ac3e69
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -127,8 +127,6 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def Decode(self, request, context):
|
async def Decode(self, request, context):
|
||||||
from torch.profiler import profile, ProfilerActivity
|
|
||||||
|
|
||||||
start = time.time_ns()
|
start = time.time_ns()
|
||||||
if len(request.batches) == 0:
|
if len(request.batches) == 0:
|
||||||
raise ValueError("Must provide at least one batch")
|
raise ValueError("Must provide at least one batch")
|
||||||
|
@ -151,9 +149,7 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
||||||
batch = batches[0]
|
batch = batches[0]
|
||||||
concat_ns = None
|
concat_ns = None
|
||||||
|
|
||||||
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prefill_prof:
|
|
||||||
generations, next_batch, timings = self.model.generate_token(batch)
|
generations, next_batch, timings = self.model.generate_token(batch)
|
||||||
prefill_prof.export_chrome_trace("new_decode.json")
|
|
||||||
self.cache.set(next_batch)
|
self.cache.set(next_batch)
|
||||||
|
|
||||||
return generate_pb2.DecodeResponse(
|
return generate_pb2.DecodeResponse(
|
||||||
|
|
Loading…
Reference in New Issue