fix: avoid frequency and repetition penalty on padding tokens (#1765)
This PR resolves an issue with the penalty processors during batched generation where extra padding tokens incorrectly impact the penalty scores. generation is impacted in the case where at least one item in the batch includes a `frequency_penalty` reproduction script below ```python import requests from concurrent import futures import time headers = { "Content-Type": "application/json", } json_data = { "inputs": "[INST] Whats the capitol of France? [/INST]", "parameters": { "max_new_tokens": 100, "seed": 20, "do_sample": False, }, } json_data2 = { "inputs": "<s>[INST]Write a mind bending story: I saw a puppy a cat a rat and a raccoon during my bike ride in the park[/INST]", "parameters": { "max_new_tokens": 100, "seed": 2, "do_sample": False, # OFFENDING LINE "frequency_penalty": 1.05, }, } base_url = "http://localhost:3000/generate" def req(): response = requests.post(base_url, headers=headers, json=json_data) print("[req ]", response.json()) def req2(): response = requests.post(base_url, headers=headers, json=json_data2) print("[req2]", response.json()) n = 1 for i in range(0, 3): print(f"- {n} threads -") with futures.ThreadPoolExecutor(max_workers=n) as executor: executor.submit(req) for i in range(3): executor.submit(req2) n += 1 # - 1 threads - # [req ] {'generated_text': ' The capital of France is Paris.'} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # - 2 threads - # [req ] {'generated_text': ' The capital city'} # [req2] {'generated_text': ' As""%\n================'} # [req2] {'generated_text': ' As""%%$\n================'} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # output with this PR's changes: # - 1 threads - # [req ] {'generated_text': ' The capital of France is Paris.'} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # - 2 threads - # [req ] {'generated_text': ' The capital city'} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} # [req2] {'generated_text': " As you were riding your bicycle through Central Park, enjoying some fresh air on an otherwise gloomy day. You couldn't help but notice that it was eerily quiet for this time of year - usually there would be hordes"} ``` **divergence from expected generation is easier to reproduce with batched grammar requests as they are more sensitive to unexpected outputs. this PR resolves the issue by setting the penalty score to 0 where input ids are padding tokens (0). --------- Co-authored-by: OlivierDehaene <olivier@huggingface.co>
This commit is contained in:
parent
bfddfa5955
commit
23d82b8fb6
|
@ -143,6 +143,8 @@ class FrequencyPenaltyLogitsProcessor(LogitsProcessor):
|
|||
score = torch.gather(scores, 1, input_ids)
|
||||
# if score < 0 then penalty has to be multiplied to reduce the previous token probability
|
||||
score = -torch.where(score < 0, score * self.penalty, score / self.penalty)
|
||||
# set score to 0 where input_ids is a padding token
|
||||
score *= input_ids.ne(0)
|
||||
|
||||
return scores.scatter_add_(1, input_ids, score)
|
||||
|
||||
|
@ -168,6 +170,8 @@ class HeterogeneousFrequencyPenaltyLogitsProcessor(LogitsProcessor):
|
|||
score = -torch.where(
|
||||
score < 0, score * self.penalty_tensor, score / self.penalty_tensor
|
||||
)
|
||||
# set score to 0 where input_ids is a padding token
|
||||
score *= input_ids.ne(0)
|
||||
|
||||
return scores.scatter_add_(1, input_ids, score)
|
||||
|
||||
|
|
Loading…
Reference in New Issue