From 40313a73648465085d388a3275486b1d7e9fc8f4 Mon Sep 17 00:00:00 2001 From: Matt Curfman Date: Fri, 16 Aug 2024 22:41:15 -0700 Subject: [PATCH] Revert to ipex-llm version of ollama for gpu acceleration --- Dockerfile | 16 +++++++++------- README.md | 7 +++++-- _init.sh | 1 + _run.sh | 2 ++ 4 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 _init.sh create mode 100644 _run.sh diff --git a/Dockerfile b/Dockerfile index 898177e..0cf23a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,8 @@ RUN apt update && \ gnupg \ wget \ curl \ + python3 \ + python3-pip \ ocl-icd-libopencl1 # Intel GPU compute user-space drivers @@ -34,21 +36,21 @@ RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-P tee /etc/apt/sources.list.d/oneAPI.list && \ apt update && \ apt install --no-install-recommends -q -y \ - intel-oneapi-runtime-libs + intel-basekit # Required oneAPI environment variables ENV USE_XETLA=OFF ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 ENV SYCL_CACHE_PERSISTENT=1 -# Ollama -RUN curl -fsSL https://ollama.com/install.sh | sh +COPY _init.sh /usr/share/lib/init_workspace.sh +COPY _run.sh /usr/share/lib/run_workspace.sh + +# Ollama via ipex-llm +RUN pip3 install --pre --upgrade ipex-llm[cpp] ENV OLLAMA_NUM_GPU=999 ENV OLLAMA_HOST=0.0.0.0:11434 -# https://github.com/ollama/ollama/issues/1590 -ENV OLLAMA_INTEL_GPU=1 - -ENTRYPOINT ["/usr/local/bin/ollama", "serve"] +ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"] diff --git a/README.md b/README.md index 1677412..ab4bd9d 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,9 @@ $ docker-compose -f docker-compose-wsl2.yml up Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM +# Known issues +* Little effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU + # References -* https://github.com/ollama/ollama/issues/1590 -* https://github.com/ollama/ollama/pull/3278 +* https://dgpu-docs.intel.com/driver/client/overview.html +* https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html diff --git a/_init.sh b/_init.sh new file mode 100644 index 0000000..7408d56 --- /dev/null +++ b/_init.sh @@ -0,0 +1 @@ +source /opt/intel/oneapi/setvars.sh diff --git a/_run.sh b/_run.sh new file mode 100644 index 0000000..b766a5c --- /dev/null +++ b/_run.sh @@ -0,0 +1,2 @@ +source /usr/share/lib/init_workspace.sh +/usr/local/lib/python3.10/dist-packages/bigdl/cpp/libs/ollama serve