fix(server): Use cleanup_tokenization_spaces=False for lossless decoding (#13)
Fixes #12 in the easiest way I could think of.
This commit is contained in:
parent
60472f9d2b
commit
b94f30215f
|
@ -354,7 +354,8 @@ class CausalLM(Model):
|
||||||
if stop:
|
if stop:
|
||||||
# Decode all tokens
|
# Decode all tokens
|
||||||
output_text = self.tokenizer.decode(
|
output_text = self.tokenizer.decode(
|
||||||
all_input_ids.squeeze(-1), skip_special_tokens=True
|
all_input_ids.squeeze(-1), skip_special_tokens=True,
|
||||||
|
cleanup_tokenization_spaces=False
|
||||||
)
|
)
|
||||||
# Slice with input_length to remove padding
|
# Slice with input_length to remove padding
|
||||||
token_ids = all_input_ids[-new_input_length:]
|
token_ids = all_input_ids[-new_input_length:]
|
||||||
|
|
Loading…
Reference in New Issue