From d71237fc8b6fd82407b0e27b58cd32c61f2ab6cb Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Mon, 14 Aug 2023 13:47:32 +0200
Subject: [PATCH] Have snippets in Python/JavaScript in quicktour (#809)

![Screenshot from 2023-08-10
14-20-25](https://github.com/huggingface/text-generation-inference/assets/7246357/e16d0d41-be63-4d06-8093-30540df91419)

---------

Co-authored-by: Merve Noyan <merveenoyan@gmail.com>
---
 docs/source/quicktour.md | 65 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 4 deletions(-)
diff --git a/docs/source/quicktour.md b/docs/source/quicktour.md
index 5b37c03f..4ba2be40 100644
--- a/docs/source/quicktour.md
+++ b/docs/source/quicktour.md
@@ -17,15 +17,72 @@ To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvi
 
 </Tip>
 
-Once TGI is running, you can use the `generate` endpoint by doing requests. To learn more about how to query the endpoints, check the [Consuming TGI](./basic_tutorials/consuming_tgi) section.
+Once TGI is running, you can use the `generate` endpoint by doing requests. To learn more about how to query the endpoints, check the [Consuming TGI](./basic_tutorials/consuming_tgi) section, where we show examples with utility libraries and UIs. Below you can see a simple snippet to query the endpoint.
 
-```bash
-curl 127.0.0.1:8080/generate -X POST -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' -H 'Content-Type: application/json'
+
+<inferencesnippet>
+<python>
+
+```python
+import requests
+
+headers = {
+    "Content-Type": "application/json",
+}
+
+data = {
+    'inputs': 'What is Deep Learning?',
+    'parameters': {
+        'max_new_tokens': 20,
+    },
+}
+
+response = requests.post('http://127.0.0.1:8080/generate', headers=headers, json=data)
+print(response.json())
+# {'generated_text': '\n\nDeep Learning is a subset of Machine Learning that is concerned with the development of algorithms that can'}
 ```
+</python>
+<js>
+
+```js
+async function query() {
+    const response = await fetch(
+        'http://127.0.0.1:8080/generate', 
+        {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json'},
+            body: JSON.stringify({
+                'inputs': 'What is Deep Learning?',
+                'parameters': {
+                    'max_new_tokens': 20
+                }
+            })
+        }
+    );
+}
+
+query().then((response) => {
+    console.log(JSON.stringify(response));
+});
+/// {"generated_text":"\n\nDeep Learning is a subset of Machine Learning that is concerned with the development of algorithms that can"}
+```
+
+</js>
+<curl>
+
+```curl
+curl 127.0.0.1:8080/generate \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
+    -H 'Content-Type: application/json'
+```
+
+</curl>
+</inferencesnippet>
 
 <Tip>
 
-To see all possible flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
+To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
 
 ```bash
 docker run ghcr.io/huggingface/text-generation-inference:1.0.0 --help