update commit
This commit is contained in:
parent
ef99678798
commit
a42dc2027b
|
@ -1,4 +1,4 @@
|
|||
flash_att_v2_commit_cuda := 54e80a3829c6d2337570d01e78ebd9529c02d342
|
||||
flash_att_v2_commit_cuda := 87a1277653fc55cd615f5341255e00c69d5c00a1
|
||||
flash_att_v2_commit_rocm := 8736558c287ff2ef28b24878e42828c595ac3e69
|
||||
|
||||
|
||||
|
|
|
@ -127,8 +127,6 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
|||
)
|
||||
|
||||
async def Decode(self, request, context):
|
||||
from torch.profiler import profile, ProfilerActivity
|
||||
|
||||
start = time.time_ns()
|
||||
if len(request.batches) == 0:
|
||||
raise ValueError("Must provide at least one batch")
|
||||
|
@ -151,9 +149,7 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
|||
batch = batches[0]
|
||||
concat_ns = None
|
||||
|
||||
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prefill_prof:
|
||||
generations, next_batch, timings = self.model.generate_token(batch)
|
||||
prefill_prof.export_chrome_trace("new_decode.json")
|
||||
self.cache.set(next_batch)
|
||||
|
||||
return generate_pb2.DecodeResponse(
|
||||
|
|
Loading…
Reference in New Issue