fix(server): Use cleanup_tokenization_spaces=False for lossless decoding (#13)
Fixes #12 in the easiest way I could think of.
This commit is contained in:
parent
60472f9d2b
commit
b94f30215f
|
@ -354,7 +354,8 @@ class CausalLM(Model):
|
|||
if stop:
|
||||
# Decode all tokens
|
||||
output_text = self.tokenizer.decode(
|
||||
all_input_ids.squeeze(-1), skip_special_tokens=True
|
||||
all_input_ids.squeeze(-1), skip_special_tokens=True,
|
||||
cleanup_tokenization_spaces=False
|
||||
)
|
||||
# Slice with input_length to remove padding
|
||||
token_ids = all_input_ids[-new_input_length:]
|
||||
|
|
Loading…
Reference in New Issue