adjust some things
This commit is contained in:
parent
a84386c311
commit
57ccedcfb9
|
@ -10,7 +10,7 @@ The purpose of this server is to abstract your LLM backend from your frontend AP
|
|||
2. `python3 -m venv venv`
|
||||
3. `source venv/bin/activate`
|
||||
4. `pip install -r requirements.txt`
|
||||
5. `wget https://git.evulid.cc/attachments/89c87201-58b1-4e28-b8fd-d0b323c810c4 -O vllm_gptq-0.1.3-py3-none-any.whl && pip install vllm_gptq-0.1.3-py3-none-any.whl`
|
||||
5. `wget https://git.evulid.cc/attachments/89c87201-58b1-4e28-b8fd-d0b323c810c4 -O /tmp/vllm_gptq-0.1.3-py3-none-any.whl && pip install /tmp/vllm_gptq-0.1.3-py3-none-any.whl && rm /tmp/vllm_gptq-0.1.3-py3-none-any.whl`
|
||||
6. `python3 server.py`
|
||||
|
||||
An example systemctl service file is provided in `other/local-llm.service`.
|
||||
|
|
|
@ -22,7 +22,7 @@ config_required_vars = ['token_limit', 'concurrent_gens', 'mode', 'llm_middlewar
|
|||
mode_ui_names = {
|
||||
'oobabooga': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'),
|
||||
'hf-textgen': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'),
|
||||
'vllm': ('Chat Completion', 'Reverse Proxy', 'N/A'),
|
||||
'vllm': ('Text Gen WebUI (ooba)', 'Blocking API url', 'Streaming API url'),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ try:
|
|||
import vllm
|
||||
except ModuleNotFoundError as e:
|
||||
print('Could not import vllm-gptq:', e)
|
||||
print('Please see vllm.md for install instructions')
|
||||
print('Please see README.md for install instructions.')
|
||||
sys.exit(1)
|
||||
|
||||
import config
|
||||
|
|
|
@ -87,17 +87,12 @@
|
|||
<strong>Instructions:</strong>
|
||||
<ol>
|
||||
<li>Set your API type to <kbd>{{ mode_name }}</kbd></li>
|
||||
{% if not ws_client_api %}
|
||||
<li>Set <kbd>Chat Completion Source</kbd> to <kbd>OpenAI</kbd>.</li>
|
||||
{% endif %}
|
||||
<li>Enter <kbd>{{ client_api }}</kbd> in the <kbd>{{ api_input_textbox }}</kbd> textbox.</li>
|
||||
{% if ws_client_api %}
|
||||
<li>Enter <kbd>{{ ws_client_api }}</kbd> in the <kbd>{{ streaming_input_textbox }}</kbd> textbox.
|
||||
</li>
|
||||
<li>If using a token, check the <kbd>Mancer AI</kbd> checkbox and enter your token in the <kbd>Mancer
|
||||
<li>If you have a token, check the <kbd>Mancer AI</kbd> checkbox and enter your token in the <kbd>Mancer
|
||||
API key</kbd> textbox.
|
||||
</li>
|
||||
{% endif %}
|
||||
<li>Click <kbd>Connect</kbd> to test the connection.</li>
|
||||
<li>Open your preset config and set <kbd>Context Size</kbd> to {{ context_size }}.</li>
|
||||
<li>Follow this guide to get set up: <a href="https://rentry.org/freellamas" target="_blank">rentry.org/freellamas</a>
|
||||
|
|
Reference in New Issue