From 57ccedcfb953437fa95ead0ee1084dd6fd20d32d Mon Sep 17 00:00:00 2001 From: Cyberes Date: Tue, 12 Sep 2023 01:10:58 -0600 Subject: [PATCH] adjust some things --- README.md | 2 +- llm_server/config.py | 2 +- server.py | 2 +- templates/home.html | 7 +------ vllm.md => vllm-server.md | 0 5 files changed, 4 insertions(+), 9 deletions(-) rename vllm.md => vllm-server.md (100%) diff --git a/README.md b/README.md index 73af42c..3b2828a 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ The purpose of this server is to abstract your LLM backend from your frontend AP 2. `python3 -m venv venv` 3. `source venv/bin/activate` 4. `pip install -r requirements.txt` -5. `wget https://git.evulid.cc/attachments/89c87201-58b1-4e28-b8fd-d0b323c810c4 -O vllm_gptq-0.1.3-py3-none-any.whl && pip install vllm_gptq-0.1.3-py3-none-any.whl` +5. `wget https://git.evulid.cc/attachments/89c87201-58b1-4e28-b8fd-d0b323c810c4 -O /tmp/vllm_gptq-0.1.3-py3-none-any.whl && pip install /tmp/vllm_gptq-0.1.3-py3-none-any.whl && rm /tmp/vllm_gptq-0.1.3-py3-none-any.whl` 6. `python3 server.py` An example systemctl service file is provided in `other/local-llm.service`. diff --git a/llm_server/config.py b/llm_server/config.py index edbb221..f379260 100644 --- a/llm_server/config.py +++ b/llm_server/config.py @@ -22,7 +22,7 @@ config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middlewar mode_ui_names = { 'oobabooga': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'), 'hf-textgen': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'), - 'vllm': ('Chat Completion', 'Reverse Proxy', 'N/A'), + 'vllm': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'), } diff --git a/server.py b/server.py index d92a21e..9c2ac1f 100644 --- a/server.py +++ b/server.py @@ -10,7 +10,7 @@ try: import vllm except ModuleNotFoundError as e: print('Could not import vllm-gptq:', e) - print('Please see vllm.md for install instructions') + print('Please see README.md for install instructions.') sys.exit(1) import config diff --git a/templates/home.html b/templates/home.html index 2fbb823..530e4a4 100644 --- a/templates/home.html +++ b/templates/home.html @@ -87,17 +87,12 @@ Instructions:
  1. Set your API type to {{ mode_name }}
  2. - {% if not ws_client_api %} -
  3. Set Chat Completion Source to OpenAI.
  4. - {% endif %}
  5. Enter {{ client_api }} in the {{ api_input_textbox }} textbox.
  6. - {% if ws_client_api %}
  7. Enter {{ ws_client_api }} in the {{ streaming_input_textbox }} textbox.
  8. -
  9. If using a token, check the Mancer AI checkbox and enter your token in the Mancer +
  10. If you have a token, check the Mancer AI checkbox and enter your token in the Mancer API key textbox.
  11. - {% endif %}
  12. Click Connect to test the connection.
  13. Open your preset config and set Context Size to {{ context_size }}.
  14. Follow this guide to get set up: rentry.org/freellamas diff --git a/vllm.md b/vllm-server.md similarity index 100% rename from vllm.md rename to vllm-server.md