adjust some things

2023-09-12 01:10:58 -06:00 · 2023-09-12 01:10:58 -06:00 · 57ccedcfb9
parent a84386c311
commit 57ccedcfb9
5 changed files with 4 additions and 9 deletions
--- a/README.md
+++ b/README.md
@ -10,7 +10,7 @@ The purpose of this server is to abstract your LLM backend from your frontend AP
 2. `python3 -m venv venv`
 3. `source venv/bin/activate`
 4. `pip install -r requirements.txt`
-5. `wget https://git.evulid.cc/attachments/89c87201-58b1-4e28-b8fd-d0b323c810c4 -O vllm_gptq-0.1.3-py3-none-any.whl && pip install vllm_gptq-0.1.3-py3-none-any.whl`
+5. `wget https://git.evulid.cc/attachments/89c87201-58b1-4e28-b8fd-d0b323c810c4 -O /tmp/vllm_gptq-0.1.3-py3-none-any.whl && pip install /tmp/vllm_gptq-0.1.3-py3-none-any.whl && rm /tmp/vllm_gptq-0.1.3-py3-none-any.whl`
 6. `python3 server.py`

 An example systemctl service file is provided in `other/local-llm.service`.
--- a/llm_server/config.py
+++ b/llm_server/config.py
@ -22,7 +22,7 @@ config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middlewar
 mode_ui_names = {
    'oobabooga': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'),
    'hf-textgen': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'),
-    'vllm': ('Chat Completion', 'Reverse Proxy', 'N/A'),
+    'vllm': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'),
 }


--- a/server.py
+++ b/server.py
@ -10,7 +10,7 @@ try:
    import vllm
 except ModuleNotFoundError as e:
    print('Could not import vllm-gptq:', e)
-    print('Please see vllm.md for install instructions')
+    print('Please see README.md for install instructions.')
    sys.exit(1)

 import config
--- a/templates/home.html
+++ b/templates/home.html
@ -87,17 +87,12 @@
            <strong>Instructions:</strong>
            <ol>
                <li>Set your API type to <kbd>{{ mode_name }}</kbd></li>
-                {% if not ws_client_api %}
-                <li>Set <kbd>Chat Completion Source</kbd> to <kbd>OpenAI</kbd>.</li>
-                {% endif %}
                <li>Enter <kbd>{{ client_api }}</kbd> in the <kbd>{{ api_input_textbox }}</kbd> textbox.</li>
-                {% if ws_client_api %}
                <li>Enter <kbd>{{ ws_client_api }}</kbd> in the <kbd>{{ streaming_input_textbox }}</kbd> textbox.
                </li>
-                <li>If using a token, check the <kbd>Mancer AI</kbd> checkbox and enter your token in the <kbd>Mancer
+                <li>If you have a token, check the <kbd>Mancer AI</kbd> checkbox and enter your token in the <kbd>Mancer
                    API key</kbd> textbox.
                </li>
-                {% endif %}
                <li>Click <kbd>Connect</kbd> to test the connection.</li>
                <li>Open your preset config and set <kbd>Context Size</kbd> to {{ context_size }}.</li>
                <li>Follow this guide to get set up: <a href="https://rentry.org/freellamas" target="_blank">rentry.org/freellamas</a>
--- a/vllm-server.md
+++ b/vllm-server.md