Adding scripts to prepare load data. (#1841)

# What does this PR do?   Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR.
2024-05-01 21:48:06 +02:00 · 2024-05-01 21:48:06 +02:00 · 0038e6020f
parent 27b3a2c9fc
commit 0038e6020f
5 changed files with 63 additions and 63 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,3 +13,4 @@ server/exllama_kernels/exllama_kernels/hip_buffers.cuh
 server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp

 data/
+load_tests/*.json
--- a/load_tests/Makefile
+++ b/load_tests/Makefile
@ -0,0 +1,9 @@
+
+ShareGPT_V3_unfiltered_cleaned_split.json:
+	wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
+
+prepare_share: ShareGPT_V3_unfiltered_cleaned_split.json
+	python filter.py
+
+prepare_orca:
+	python orca.py
--- a/load_tests/filter.py
+++ b/load_tests/filter.py
@ -0,0 +1,26 @@
+import json
+
+
+def main():
+    with open("./ShareGPT_V3_unfiltered_cleaned_split.json", "r") as f:
+        data = json.load(f)
+
+    # Select only the first 2k conversations that start with a human.
+    max = 2000
+    conversations = []
+    for conversation in data:
+        conv = conversation.get("conversations")
+        if conv and conv[0]["from"] == "human":
+            # Trim the rest of the output
+            conversation["conversations"] = conversation["conversations"][:1]
+            conversations.append(conversation)
+
+            if len(conversation) >= max:
+                break
+
+    with open("./small.json", "w") as f:
+        data = json.dump(conversations, f, indent=4)
+
+
+if __name__ == "__main__":
+    main()
--- a/load_tests/orca.py
+++ b/load_tests/orca.py
@ -0,0 +1,27 @@
+import json
+import datasets
+import tqdm
+
+
+def main():
+    dataset = datasets.load_dataset("Open-Orca/OpenOrca", split="train")
+    # Select only the first 2k conversations that start with a human.
+    max = min(2000, len(dataset))
+    conversations = []
+    for item in tqdm.tqdm(dataset, total=max):
+        conversation = {
+            "conversations": [
+                {"from": "human", "value": item["question"]},
+            ],
+            "id": item["id"],
+        }
+        conversations.append(conversation)
+        if len(conversations) >= max:
+            break
+
+    with open("./small.json", "w") as f:
+        data = json.dump(conversations, f, indent=4)
+
+
+if __name__ == "__main__":
+    main()
--- a/load_tests/starcoder_load.js
+++ b/load_tests/starcoder_load.js
@ -1,63 +0,0 @@
-import {check} from 'k6';
-import http from 'k6/http';
-import {Trend} from 'k6/metrics';
-
-const host = __ENV.HOST || '127.0.0.1:3000';
-
-const totalTime = new Trend('total_time', true);
-const validationTime = new Trend('validation_time', true);
-const queueTime = new Trend('queue_time', true);
-const inferenceTime = new Trend('inference_time', true);
-const timePerToken = new Trend('time_per_token', true);
-
-const example = {
-    payload: JSON.stringify({
-        inputs: '# This is a fibonacci function written in the Python programming language.' +
-            'def fibonacci',
-        parameters: {
-            details: true,
-            max_new_tokens: 60,
-            temperature: 0.2,
-            top_p: 0.95,
-            seed: 0,
-        },
-    }),
-    generated_tokens: 60
-};
-
-export const options = {
-    thresholds: {
-        http_req_failed: ['rate==0'],
-        time_per_token: ['p(95)<90'],
-        queue_time: ['p(95)<1500'],
-    },
-    scenarios: {
-        load_test: {
-            executor: 'constant-arrival-rate',
-            duration: '60s',
-            preAllocatedVUs: 100,
-            rate: 10,
-            timeUnit: '1s',
-        },
-    },
-};
-
-export default function () {
-    const headers = {'Content-Type': 'application/json'};
-    const res = http.post(`http://${host}/generate`, example.payload, {
-        headers,
-    });
-
-    check(res, {
-        'Post status is 200': (r) => res.status === 200,
-        'Post response generated tokens': (r) => res.status === 200 && res.json().details.generated_tokens === example.generated_tokens,
-    });
-
-    if (res.status === 200) {
-        totalTime.add(res.headers["X-Total-Time"]);
-        validationTime.add(res.headers["X-Validation-Time"]);
-        queueTime.add(res.headers["X-Queue-Time"]);
-        inferenceTime.add(res.headers["X-Inference-Time"]);
-        timePerToken.add(res.headers["X-Time-Per-Token"]);
-    }
-}