get working with ooba again, give up on dockerfile

2023-09-11 09:51:01 -06:00 · 2023-09-11 09:51:01 -06:00 · c14cc51f09
parent 4c49aa525a
commit c14cc51f09
4 changed files with 28 additions and 19 deletions
--- a/llm_server/llm/info.py
+++ b/llm_server/llm/info.py
@ -7,17 +7,16 @@ def get_running_model():
    if opts.mode == 'oobabooga':
        try:
            backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
+            r_json = backend_response.json()
+            return r_json['result'], None
        except Exception as e:
            return False, e
    elif opts.mode == 'hf-textgen':
        try:
            backend_response = requests.get(f'{opts.backend_url}/info', verify=opts.verify_ssl)
+            r_json = backend_response.json()
+            return r_json['model_id'].replace('/', '_'), None
        except Exception as e:
            return False, e
    else:
        raise Exception
-    try:
-        r_json = backend_response.json()
-        return r_json['model_id'].replace('/', '_'), None
-    except Exception as e:
-        return False, e
--- a/llm_server/routes/cache.py
+++ b/llm_server/routes/cache.py
@ -1,8 +1,10 @@
 import json
+import sys

 from flask_caching import Cache
 from redis import Redis
 from redis.typing import FieldT
+import redis as redis_pkg

 cache = Cache(config={'CACHE_TYPE': 'RedisCache', 'CACHE_REDIS_URL': 'redis://localhost:6379/0', 'CACHE_KEY_PREFIX': 'local-llm'})

@ -17,6 +19,12 @@ class RedisWrapper:
    def __init__(self, prefix, **kwargs):
        self.redis = Redis(**kwargs)
        self.prefix = prefix
+        try:
+            self.set('check_connected', 1)
+        except redis_pkg.exceptions.ConnectionError as e:
+            print('Failed to connect to the Redis server:', e)
+            print('Did you install and start Redis?')
+            sys.exit(1)

    def set(self, key, value):
        return self.redis.set(f"{self.prefix}:{key}", value)
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -44,8 +44,9 @@ RUN cargo build --release

 # Python builder
 # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
-FROM ubuntu:20.04 as dev-base
+FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as dev-base
 # FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-build
+# ubuntu:20.04

 ARG PYTORCH_VERSION=2.0.1
 ARG PYTHON_VERSION=3.9
@ -114,12 +115,16 @@ RUN git submodule update --init --recursive
 RUN echo $PYTORCH_VERSION > version.txt

 # Install things for building PyTorch
-RUN /opt/conda/bin/conda install -y mkl mkl-include cudnn && \
-    /opt/conda/bin/conda install libgcc && \
+RUN /opt/conda/bin/conda install -y mkl mkl-include cudnn libgcc && \
    /opt/conda/bin/conda install -c conda-forge libstdcxx-ng=12 && \
    /opt/conda/bin/conda install -c pytorch magma-cuda118 && \
    /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0"  cuda==11.8

+RUN find / -name libcudart_static*
+
+RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \
+    /opt/conda/bin/python -mpip install -r requirements.txt
+
 # https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/reqs/train-conda-build.requirements.txt
 # TODO: remove what we don't need
 RUN /opt/conda/bin/conda install -y \
@ -157,10 +162,6 @@ ENV LD_PRELOAD=/opt/conda/lib/libiomp5.so:${LD_PRELOAD}
 ENV LD_PRELOAD=/opt/conda/lib/libjemalloc.so:${LD_PRELOAD}
 ENV MALLOC_CONF="background_thread:true,metadata_thp:auto,dirty_decay_ms:30000,muzzy_decay_ms:30000"

-RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \
-    /opt/conda/bin/python -mpip install -r requirements.txt
-RUN /opt/conda/bin/conda clean -ya
-
 RUN cat version.txt

 RUN make triton
@ -191,13 +192,13 @@ RUN --mount=type=cache,target=/opt/ccache \
    USE_NNPACK=ON \
    USE_OPENMP=ON \
    USE_ROCM=OFF \
-     BUILD_TEST=0 \
-     CMAKE_ARGS='-DDISABLE_AVX2:BOOL=TRUE -DCXX_AVX2_FOUND:BOOL=FALSE -DC_AVX2_FOUND:BOOL=FALSE -DDISABLE_AVX512F:BOOL=TRUE' \
-     TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
-     CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
-     python setup.py install && \
-     cd .. && \
-     rm -rf pytorch
+    BUILD_TEST=0 \
+    CMAKE_ARGS='-DDISABLE_AVX2:BOOL=TRUE -DCXX_AVX2_FOUND:BOOL=FALSE -DC_AVX2_FOUND:BOOL=FALSE -DDISABLE_AVX512F:BOOL=TRUE' \
+    TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
+    CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
+    python setup.py install && \
+    cd .. && \
+    rm -rf pytorch


 # Make sure we built everything properly. Build will fail if CUDA isn't available.
--- a/server.py
+++ b/server.py
@ -43,6 +43,7 @@ init_db()

 if config['mode'] not in ['oobabooga', 'hf-textgen']:
    print('Unknown mode:', config['mode'])
+    sys.exit(1)
 opts.mode = config['mode']
 opts.auth_required = config['auth_required']
 opts.log_prompts = config['log_prompts']