fix: only use eos_token_id as pad_token_id if int (#2774)
LLama 3 has a list of values as eos_token_id: "['<|end_of_text|>', '<|eom_id|>', '<|eot_id|>']" This breaks tokenizer since it expects single value. This commit uses tokenizer.eos_token_id instead in such a case. Fixes: #2440 Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
This commit is contained in:
parent
2c74c55637
commit
535149d872
|
@ -630,7 +630,7 @@ class CausalLM(Model):
|
|||
if tokenizer.pad_token_id is None:
|
||||
if model.config.pad_token_id is not None:
|
||||
tokenizer.pad_token_id = model.config.pad_token_id
|
||||
elif model.config.eos_token_id is not None:
|
||||
elif model.config.eos_token_id is not None and isinstance(model.config.eos_token_id, int):
|
||||
tokenizer.pad_token_id = model.config.eos_token_id
|
||||
elif tokenizer.eos_token_id is not None:
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
|
|
Loading…
Reference in New Issue