From 80a69204c1752fb8d754405b1d63cd345fa85e3e Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:01:43 +0100 Subject: [PATCH] fix: slice stopping criteria buffer --- server/text_generation_server/utils/tokens.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/tokens.py b/server/text_generation_server/utils/tokens.py index ff0556df..53722fec 100644 --- a/server/text_generation_server/utils/tokens.py +++ b/server/text_generation_server/utils/tokens.py @@ -112,7 +112,7 @@ class StoppingCriteria: self.stop_sequence_criterias = stop_sequence_criterias self.max_new_tokens = max_new_tokens self.current_tokens = 0 - self.current_output = "" + self.current_output = "test" self.ignore_eos_token = ignore_eos_token def __call__(self, last_token: int, last_output: str) -> Tuple[bool, Optional[str]]: @@ -124,6 +124,10 @@ class StoppingCriteria: return True, FinishReason.FINISH_REASON_EOS_TOKEN self.current_output += last_output + # There is no need to keep an output that is too long + if len(self.current_output) > 300: + # Slice to -200 to avoid doing it all the time + self.current_output = self.current_output[-200:] for stop_sequence_criteria in self.stop_sequence_criterias: if stop_sequence_criteria(self.current_output): return True, FinishReason.FINISH_REASON_STOP_SEQUENCE