From a42dc2027b4b4fe4501c2a0601b1a6328c5e6611 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:24:07 +0100 Subject: [PATCH] update commit --- server/Makefile-flash-att-v2 | 2 +- server/text_generation_server/server.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2 index 041564a7..f4d6e24e 100644 --- a/server/Makefile-flash-att-v2 +++ b/server/Makefile-flash-att-v2 @@ -1,4 +1,4 @@ -flash_att_v2_commit_cuda := 54e80a3829c6d2337570d01e78ebd9529c02d342 +flash_att_v2_commit_cuda := 87a1277653fc55cd615f5341255e00c69d5c00a1 flash_att_v2_commit_rocm := 8736558c287ff2ef28b24878e42828c595ac3e69 diff --git a/server/text_generation_server/server.py b/server/text_generation_server/server.py index c5d0affa..fbffe526 100644 --- a/server/text_generation_server/server.py +++ b/server/text_generation_server/server.py @@ -127,8 +127,6 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer): ) async def Decode(self, request, context): - from torch.profiler import profile, ProfilerActivity - start = time.time_ns() if len(request.batches) == 0: raise ValueError("Must provide at least one batch") @@ -151,9 +149,7 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer): batch = batches[0] concat_ns = None - with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prefill_prof: generations, next_batch, timings = self.model.generate_token(batch) - prefill_prof.export_chrome_trace("new_decode.json") self.cache.set(next_batch) return generate_pb2.DecodeResponse(