fix build tokenizer in quantize and remove duplicate import (#768)

# What does this PR do? Fixes #732 And remove duplicate AutoTokenizer import ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR.
2023-08-04 04:21:33 +08:00 · 2023-08-04 04:21:33 +08:00 · f91e9d282d
parent 6ec5288ab7
commit f91e9d282d
1 changed files with 51 additions and 30 deletions
--- a/server/text_generation_server/utils/gptq/quantize.py
+++ b/server/text_generation_server/utils/gptq/quantize.py
@ -360,15 +360,21 @@ class GPTQ:
        torch.cuda.empty_cache()


-def get_wikitext2(nsamples, seed, seqlen, model_id):
+def get_wikitext2(nsamples, seed, seqlen, model_id, trust_remote_code):
    from datasets import load_dataset

    traindata = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
    testdata = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")

-    from transformers import AutoTokenizer
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=False, trust_remote_code=trust_remote_code
+        )
+    except:
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=True, trust_remote_code=trust_remote_code
+        )

-    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
    trainenc = tokenizer("\n\n".join(traindata["text"]), return_tensors="pt")
    testenc = tokenizer("\n\n".join(testdata["text"]), return_tensors="pt")

@ -386,18 +392,21 @@ def get_wikitext2(nsamples, seed, seqlen, model_id):
    return trainloader, testenc


-def get_ptb(nsamples, seed, seqlen, model_id):
+def get_ptb(nsamples, seed, seqlen, model_id, trust_remote_code):
    from datasets import load_dataset

    traindata = load_dataset("ptb_text_only", "penn_treebank", split="train")
    valdata = load_dataset("ptb_text_only", "penn_treebank", split="validation")

-    from transformers import AutoTokenizer
-
    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=False, trust_remote_code=trust_remote_code
+        )
    except:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=True, trust_remote_code=trust_remote_code
+        )
+
    trainenc = tokenizer("\n\n".join(traindata["sentence"]), return_tensors="pt")
    testenc = tokenizer("\n\n".join(valdata["sentence"]), return_tensors="pt")

@ -415,7 +424,7 @@ def get_ptb(nsamples, seed, seqlen, model_id):
    return trainloader, testenc


-def get_c4(nsamples, seed, seqlen, model_id):
+def get_c4(nsamples, seed, seqlen, model_id, trust_remote_code):
    from datasets import load_dataset

    traindata = load_dataset(
@ -433,12 +442,14 @@ def get_c4(nsamples, seed, seqlen, model_id):
        use_auth_token=False,
    )

-    from transformers import AutoTokenizer
-
    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=False, trust_remote_code=trust_remote_code
+        )
    except:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=True, trust_remote_code=trust_remote_code
+        )

    import random

@ -481,18 +492,21 @@ def get_c4(nsamples, seed, seqlen, model_id):
    return trainloader, valenc


-def get_ptb_new(nsamples, seed, seqlen, model_id):
+def get_ptb_new(nsamples, seed, seqlen, model_id, trust_remote_code):
    from datasets import load_dataset

    traindata = load_dataset("ptb_text_only", "penn_treebank", split="train")
    testdata = load_dataset("ptb_text_only", "penn_treebank", split="test")

-    from transformers import AutoTokenizer
-
    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=False, trust_remote_code=trust_remote_code
+        )
    except:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=True, trust_remote_code=trust_remote_code
+        )
+
    trainenc = tokenizer(" ".join(traindata["sentence"]), return_tensors="pt")
    testenc = tokenizer(" ".join(testdata["sentence"]), return_tensors="pt")

@ -510,7 +524,7 @@ def get_ptb_new(nsamples, seed, seqlen, model_id):
    return trainloader, testenc


-def get_c4_new(nsamples, seed, seqlen, model_id):
+def get_c4_new(nsamples, seed, seqlen, model_id, trust_remote_code):
    from datasets import load_dataset

    traindata = load_dataset(
@ -526,12 +540,14 @@ def get_c4_new(nsamples, seed, seqlen, model_id):
        split="validation",
    )

-    from transformers import AutoTokenizer
-
    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=False, trust_remote_code=trust_remote_code
+        )
    except:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id, use_fast=True, trust_remote_code=trust_remote_code
+        )

    import random

@ -562,17 +578,17 @@ def get_c4_new(nsamples, seed, seqlen, model_id):
    return trainloader, valenc


-def get_loaders(name, nsamples=128, seed=0, seqlen=2048, model_id=""):
+def get_loaders(name, nsamples=128, seed=0, seqlen=2048, model_id="", trust_remote_code=False):
    if "wikitext2" in name:
-        return get_wikitext2(nsamples, seed, seqlen, model_id)
+        return get_wikitext2(nsamples, seed, seqlen, model_id, trust_remote_code)
    if "ptb" in name:
        if "new" in name:
-            return get_ptb_new(nsamples, seed, seqlen, model_id)
-        return get_ptb(nsamples, seed, seqlen, model_id)
+            return get_ptb_new(nsamples, seed, seqlen, model_id, trust_remote_code)
+        return get_ptb(nsamples, seed, seqlen, model_id, trust_remote_code)
    if "c4" in name:
        if "new" in name:
-            return get_c4_new(nsamples, seed, seqlen, model_id)
-        return get_c4(nsamples, seed, seqlen, model_id)
+            return get_c4_new(nsamples, seed, seqlen, model_id, trust_remote_code)
+        return get_c4(nsamples, seed, seqlen, model_id, trust_remote_code)


 def find_layers(module, layers=(nn.Conv2d, nn.Linear), name=""):
@ -906,7 +922,12 @@ def quantize(
    seed = None

    dataloader, testloader = get_loaders(
-        dataset, nsamples=nsamples, seed=seed, model_id=model_id, seqlen=model.seqlen
+        dataset,
+        nsamples=nsamples,
+        seed=seed,
+        model_id=model_id,
+        seqlen=model.seqlen,
+        trust_remote_code=trust_remote_code
    )

    tick = time.time()