From 0038e6020fec4650ce6c35de37440ab7c7bb206e Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 1 May 2024 21:48:06 +0200 Subject: [PATCH] Adding scripts to prepare load data. (#1841) # What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. --- .gitignore | 1 + load_tests/Makefile | 9 ++++++ load_tests/filter.py | 26 +++++++++++++++ load_tests/orca.py | 27 ++++++++++++++++ load_tests/starcoder_load.js | 63 ------------------------------------ 5 files changed, 63 insertions(+), 63 deletions(-) create mode 100644 load_tests/Makefile create mode 100644 load_tests/filter.py create mode 100644 load_tests/orca.py delete mode 100644 load_tests/starcoder_load.js diff --git a/.gitignore b/.gitignore index 2ac2f6b4..e9ad1808 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ server/exllama_kernels/exllama_kernels/hip_buffers.cuh server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp data/ +load_tests/*.json diff --git a/load_tests/Makefile b/load_tests/Makefile new file mode 100644 index 00000000..9199aa3b --- /dev/null +++ b/load_tests/Makefile @@ -0,0 +1,9 @@ + +ShareGPT_V3_unfiltered_cleaned_split.json: + wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + +prepare_share: ShareGPT_V3_unfiltered_cleaned_split.json + python filter.py + +prepare_orca: + python orca.py diff --git a/load_tests/filter.py b/load_tests/filter.py new file mode 100644 index 00000000..a00226ed --- /dev/null +++ b/load_tests/filter.py @@ -0,0 +1,26 @@ +import json + + +def main(): + with open("./ShareGPT_V3_unfiltered_cleaned_split.json", "r") as f: + data = json.load(f) + + # Select only the first 2k conversations that start with a human. + max = 2000 + conversations = [] + for conversation in data: + conv = conversation.get("conversations") + if conv and conv[0]["from"] == "human": + # Trim the rest of the output + conversation["conversations"] = conversation["conversations"][:1] + conversations.append(conversation) + + if len(conversation) >= max: + break + + with open("./small.json", "w") as f: + data = json.dump(conversations, f, indent=4) + + +if __name__ == "__main__": + main() diff --git a/load_tests/orca.py b/load_tests/orca.py new file mode 100644 index 00000000..e607d27c --- /dev/null +++ b/load_tests/orca.py @@ -0,0 +1,27 @@ +import json +import datasets +import tqdm + + +def main(): + dataset = datasets.load_dataset("Open-Orca/OpenOrca", split="train") + # Select only the first 2k conversations that start with a human. + max = min(2000, len(dataset)) + conversations = [] + for item in tqdm.tqdm(dataset, total=max): + conversation = { + "conversations": [ + {"from": "human", "value": item["question"]}, + ], + "id": item["id"], + } + conversations.append(conversation) + if len(conversations) >= max: + break + + with open("./small.json", "w") as f: + data = json.dump(conversations, f, indent=4) + + +if __name__ == "__main__": + main() diff --git a/load_tests/starcoder_load.js b/load_tests/starcoder_load.js deleted file mode 100644 index 2f6cb3d6..00000000 --- a/load_tests/starcoder_load.js +++ /dev/null @@ -1,63 +0,0 @@ -import {check} from 'k6'; -import http from 'k6/http'; -import {Trend} from 'k6/metrics'; - -const host = __ENV.HOST || '127.0.0.1:3000'; - -const totalTime = new Trend('total_time', true); -const validationTime = new Trend('validation_time', true); -const queueTime = new Trend('queue_time', true); -const inferenceTime = new Trend('inference_time', true); -const timePerToken = new Trend('time_per_token', true); - -const example = { - payload: JSON.stringify({ - inputs: '# This is a fibonacci function written in the Python programming language.' + - 'def fibonacci', - parameters: { - details: true, - max_new_tokens: 60, - temperature: 0.2, - top_p: 0.95, - seed: 0, - }, - }), - generated_tokens: 60 -}; - -export const options = { - thresholds: { - http_req_failed: ['rate==0'], - time_per_token: ['p(95)<90'], - queue_time: ['p(95)<1500'], - }, - scenarios: { - load_test: { - executor: 'constant-arrival-rate', - duration: '60s', - preAllocatedVUs: 100, - rate: 10, - timeUnit: '1s', - }, - }, -}; - -export default function () { - const headers = {'Content-Type': 'application/json'}; - const res = http.post(`http://${host}/generate`, example.payload, { - headers, - }); - - check(res, { - 'Post status is 200': (r) => res.status === 200, - 'Post response generated tokens': (r) => res.status === 200 && res.json().details.generated_tokens === example.generated_tokens, - }); - - if (res.status === 200) { - totalTime.add(res.headers["X-Total-Time"]); - validationTime.add(res.headers["X-Validation-Time"]); - queueTime.add(res.headers["X-Queue-Time"]); - inferenceTime.add(res.headers["X-Inference-Time"]); - timePerToken.add(res.headers["X-Time-Per-Token"]); - } -}