From f91e9d282d73e09cdb876924412f2ed66212d736 Mon Sep 17 00:00:00 2001 From: zspo Date: Fri, 4 Aug 2023 04:21:33 +0800 Subject: [PATCH] fix build tokenizer in quantize and remove duplicate import (#768) # What does this PR do? Fixes #732 And remove duplicate AutoTokenizer import ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. --- .../utils/gptq/quantize.py | 81 ++++++++++++------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/server/text_generation_server/utils/gptq/quantize.py b/server/text_generation_server/utils/gptq/quantize.py index 3f8e897a..9547d534 100644 --- a/server/text_generation_server/utils/gptq/quantize.py +++ b/server/text_generation_server/utils/gptq/quantize.py @@ -360,15 +360,21 @@ class GPTQ: torch.cuda.empty_cache() -def get_wikitext2(nsamples, seed, seqlen, model_id): +def get_wikitext2(nsamples, seed, seqlen, model_id, trust_remote_code): from datasets import load_dataset traindata = load_dataset("wikitext", "wikitext-2-raw-v1", split="train") testdata = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") - from transformers import AutoTokenizer + try: + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=False, trust_remote_code=trust_remote_code + ) + except: + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=True, trust_remote_code=trust_remote_code + ) - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) trainenc = tokenizer("\n\n".join(traindata["text"]), return_tensors="pt") testenc = tokenizer("\n\n".join(testdata["text"]), return_tensors="pt") @@ -386,18 +392,21 @@ def get_wikitext2(nsamples, seed, seqlen, model_id): return trainloader, testenc -def get_ptb(nsamples, seed, seqlen, model_id): +def get_ptb(nsamples, seed, seqlen, model_id, trust_remote_code): from datasets import load_dataset traindata = load_dataset("ptb_text_only", "penn_treebank", split="train") valdata = load_dataset("ptb_text_only", "penn_treebank", split="validation") - from transformers import AutoTokenizer - try: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=False, trust_remote_code=trust_remote_code + ) except: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=True, trust_remote_code=trust_remote_code + ) + trainenc = tokenizer("\n\n".join(traindata["sentence"]), return_tensors="pt") testenc = tokenizer("\n\n".join(valdata["sentence"]), return_tensors="pt") @@ -415,7 +424,7 @@ def get_ptb(nsamples, seed, seqlen, model_id): return trainloader, testenc -def get_c4(nsamples, seed, seqlen, model_id): +def get_c4(nsamples, seed, seqlen, model_id, trust_remote_code): from datasets import load_dataset traindata = load_dataset( @@ -433,12 +442,14 @@ def get_c4(nsamples, seed, seqlen, model_id): use_auth_token=False, ) - from transformers import AutoTokenizer - try: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=False, trust_remote_code=trust_remote_code + ) except: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=True, trust_remote_code=trust_remote_code + ) import random @@ -481,18 +492,21 @@ def get_c4(nsamples, seed, seqlen, model_id): return trainloader, valenc -def get_ptb_new(nsamples, seed, seqlen, model_id): +def get_ptb_new(nsamples, seed, seqlen, model_id, trust_remote_code): from datasets import load_dataset traindata = load_dataset("ptb_text_only", "penn_treebank", split="train") testdata = load_dataset("ptb_text_only", "penn_treebank", split="test") - from transformers import AutoTokenizer - try: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=False, trust_remote_code=trust_remote_code + ) except: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=True, trust_remote_code=trust_remote_code + ) + trainenc = tokenizer(" ".join(traindata["sentence"]), return_tensors="pt") testenc = tokenizer(" ".join(testdata["sentence"]), return_tensors="pt") @@ -510,7 +524,7 @@ def get_ptb_new(nsamples, seed, seqlen, model_id): return trainloader, testenc -def get_c4_new(nsamples, seed, seqlen, model_id): +def get_c4_new(nsamples, seed, seqlen, model_id, trust_remote_code): from datasets import load_dataset traindata = load_dataset( @@ -526,12 +540,14 @@ def get_c4_new(nsamples, seed, seqlen, model_id): split="validation", ) - from transformers import AutoTokenizer - try: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=False, trust_remote_code=trust_remote_code + ) except: - tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) + tokenizer = AutoTokenizer.from_pretrained( + model_id, use_fast=True, trust_remote_code=trust_remote_code + ) import random @@ -562,17 +578,17 @@ def get_c4_new(nsamples, seed, seqlen, model_id): return trainloader, valenc -def get_loaders(name, nsamples=128, seed=0, seqlen=2048, model_id=""): +def get_loaders(name, nsamples=128, seed=0, seqlen=2048, model_id="", trust_remote_code=False): if "wikitext2" in name: - return get_wikitext2(nsamples, seed, seqlen, model_id) + return get_wikitext2(nsamples, seed, seqlen, model_id, trust_remote_code) if "ptb" in name: if "new" in name: - return get_ptb_new(nsamples, seed, seqlen, model_id) - return get_ptb(nsamples, seed, seqlen, model_id) + return get_ptb_new(nsamples, seed, seqlen, model_id, trust_remote_code) + return get_ptb(nsamples, seed, seqlen, model_id, trust_remote_code) if "c4" in name: if "new" in name: - return get_c4_new(nsamples, seed, seqlen, model_id) - return get_c4(nsamples, seed, seqlen, model_id) + return get_c4_new(nsamples, seed, seqlen, model_id, trust_remote_code) + return get_c4(nsamples, seed, seqlen, model_id, trust_remote_code) def find_layers(module, layers=(nn.Conv2d, nn.Linear), name=""): @@ -906,7 +922,12 @@ def quantize( seed = None dataloader, testloader = get_loaders( - dataset, nsamples=nsamples, seed=seed, model_id=model_id, seqlen=model.seqlen + dataset, + nsamples=nsamples, + seed=seed, + model_id=model_id, + seqlen=model.seqlen, + trust_remote_code=trust_remote_code ) tick = time.time()