update commit

This commit is contained in:
OlivierDehaene 2024-02-27 11:24:07 +01:00
parent ef99678798
commit a42dc2027b
2 changed files with 1 additions and 5 deletions

View File

@ -1,4 +1,4 @@
flash_att_v2_commit_cuda := 54e80a3829c6d2337570d01e78ebd9529c02d342
flash_att_v2_commit_cuda := 87a1277653fc55cd615f5341255e00c69d5c00a1
flash_att_v2_commit_rocm := 8736558c287ff2ef28b24878e42828c595ac3e69

View File

@ -127,8 +127,6 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
)
async def Decode(self, request, context):
from torch.profiler import profile, ProfilerActivity
start = time.time_ns()
if len(request.batches) == 0:
raise ValueError("Must provide at least one batch")
@ -151,9 +149,7 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
batch = batches[0]
concat_ns = None
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prefill_prof:
generations, next_batch, timings = self.model.generate_token(batch)
prefill_prof.export_chrome_trace("new_decode.json")
self.cache.set(next_batch)
return generate_pb2.DecodeResponse(