From e3c4f50b1f3d3a483139d37d44e5b326aeb2d8c8 Mon Sep 17 00:00:00 2001 From: mattcurf Date: Sun, 5 May 2024 12:48:38 -0700 Subject: [PATCH 1/3] Workaround for Kernel 6.8 regression --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 4a313a3..27cad69 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,6 +40,8 @@ ENV SYCL_CACHE_PERSISTENT=1 ENV ZES_ENABLE_SYSMAN=1 ENV OLLAMA_NUM_GPU=999 ENV OLLAMA_HOST 0.0.0.0:11434 +ENV NEOReadDebugKeys=1 +ENV OverrideGpuAddressSpace=48 COPY _init.sh /usr/share/lib/init_workspace.sh COPY _run.sh /usr/share/lib/run_workspace.sh From e2f4a81fef736cc165ace4d42f10b6d00a4d18e5 Mon Sep 17 00:00:00 2001 From: Matt Curfman Date: Fri, 10 May 2024 19:29:28 -0700 Subject: [PATCH 2/3] Install GPU runtime from git repo --- Dockerfile | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 27cad69..f16c0e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,13 +8,23 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ apt-utils \ software-properties-common \ gnupg \ - wget + wget \ + ocl-icd-libopencl1 # Install Intel GPU user-space driver apt repo -RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ - echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ - tee /etc/apt/sources.list.d/intel-gpu-jammy.list +#RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ +# gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ +# echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ +# tee /etc/apt/sources.list.d/intel-gpu-jammy.list +# Note: Intel's apt repo does not contain the latest GPU runtime supporting kernel 6.8, so install the packages directly from their compute +# runtime repo +RUN cd /tmp && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-core_1.0.16510.2_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-opencl_1.0.16510.2_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-level-zero-gpu_1.3.29138.7_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-opencl-icd_24.13.29138.7_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/libigdgmm12_22.3.18_amd64.deb && \ + dpkg -i *.deb # Install oneAPI apt repo RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ @@ -30,7 +40,6 @@ RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ apt-get install --no-install-recommends -q -y \ - intel-opencl-icd intel-level-zero-gpu level-zero \ intel-basekit=2024.0.1-43 \ conda @@ -40,8 +49,6 @@ ENV SYCL_CACHE_PERSISTENT=1 ENV ZES_ENABLE_SYSMAN=1 ENV OLLAMA_NUM_GPU=999 ENV OLLAMA_HOST 0.0.0.0:11434 -ENV NEOReadDebugKeys=1 -ENV OverrideGpuAddressSpace=48 COPY _init.sh /usr/share/lib/init_workspace.sh COPY _run.sh /usr/share/lib/run_workspace.sh From 164881bc71e5448a72e3fd0d3712855ff7374f42 Mon Sep 17 00:00:00 2001 From: Matt Curfman Date: Thu, 23 May 2024 20:59:10 -0700 Subject: [PATCH 3/3] Cleanups --- Dockerfile | 101 +++++++++++++++++++++++++++++------------------------ README.md | 6 ++-- _init.sh | 1 - _run.sh | 2 +- 4 files changed, 60 insertions(+), 50 deletions(-) diff --git a/Dockerfile b/Dockerfile index f16c0e6..6badd3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,66 +1,77 @@ -FROM ubuntu:jammy - +FROM ubuntu:22.04 +ENV DEBIAN_FRONTEND noninteractive ENV TZ=america/los_angeles -# Install prerequisite packages -RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ - apt-get install --no-install-recommends -q -y \ - apt-utils \ +# #1 install base packages +#----------------------- +RUN apt update && \ + apt install --no-install-recommends -q -y \ software-properties-common \ + ca-certificates \ gnupg \ wget \ - ocl-icd-libopencl1 + python3 \ + python3-pip -# Install Intel GPU user-space driver apt repo -#RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ -# gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ -# echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ -# tee /etc/apt/sources.list.d/intel-gpu-jammy.list -# Note: Intel's apt repo does not contain the latest GPU runtime supporting kernel 6.8, so install the packages directly from their compute +# #2 Install Intel GPU compute user-space drivers +#----------------------- + RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ + gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ + tee /etc/apt/sources.list.d/intel-gpu-jammy.list +# apt update && \ +# apt install --no-install-recommends -q -y \ +# intel-opencl-icd intel-level-zero-gpu level-zero +# *Defect/Workaround*: Intel's apt repo does not contain the latest GPU runtime supporting kernel 6.8, so install the packages directly from their compute # runtime repo -RUN cd /tmp && \ - wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-core_1.0.16510.2_amd64.deb && \ - wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-opencl_1.0.16510.2_amd64.deb && \ - wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-level-zero-gpu_1.3.29138.7_amd64.deb && \ - wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-opencl-icd_24.13.29138.7_amd64.deb && \ - wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/libigdgmm12_22.3.18_amd64.deb && \ - dpkg -i *.deb +# https://github.com/intel/compute-runtime/issues/710 +RUN apt update && \ + apt install --no-install-recommends -q -y \ + ocl-icd-libopencl1 \ + clinfo && \ + cd /tmp && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-core_1.0.16510.2_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-opencl_1.0.16510.2_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-level-zero-gpu_1.3.29138.7_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-opencl-icd_24.13.29138.7_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/libigdgmm12_22.3.18_amd64.deb && \ + dpkg -i *.deb && \ + apt install --no-install-recommends -q -y \ + level-zero -# Install oneAPI apt repo +# Required compute runtime level-zero variables +ENV ZES_ENABLE_SYSMAN=1 + +# #3 Install oneAPI +#----------------------- +# *Defect/Workaround*: Intel's oneAPI MKL changed the linkage model, breaking pytorch wheel. Downgrade to oneAPI 2024.0 instead +# Source: https://github.com/pytorch/pytorch/issues/123097 RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ - gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \ + gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \ echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ - tee /etc/apt/sources.list.d/oneAPI.list - -# Install Conda apt repo -RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \ - gpg --dearmor --output /usr/share/keyrings/conda-archive-keyring.gpg && \ - echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | \ - tee /etc/apt/sources.list.d/conda.list - -RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ - apt-get install --no-install-recommends -q -y \ - intel-basekit=2024.0.1-43 \ - conda + tee /etc/apt/sources.list.d/oneAPI.list && \ + apt update && \ + apt install --no-install-recommends -q -y \ + intel-oneapi-common-vars=2024.0.0-49406 \ + intel-oneapi-common-oneapi-vars=2024.0.0-49406 \ + intel-oneapi-mkl=2024.0.0-49656 \ + intel-oneapi-tcm-1.0=1.0.0-435 \ + intel-oneapi-dnnl=2024.0.0-49521 +# Required oneAPI environment variables ENV USE_XETLA=OFF ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 ENV SYCL_CACHE_PERSISTENT=1 -ENV ZES_ENABLE_SYSMAN=1 -ENV OLLAMA_NUM_GPU=999 -ENV OLLAMA_HOST 0.0.0.0:11434 COPY _init.sh /usr/share/lib/init_workspace.sh COPY _run.sh /usr/share/lib/run_workspace.sh -RUN /bin/bash -c "source /usr/share/lib/init_workspace.sh && \ - conda create -n llm-cpp python=3.11 && \ - conda activate llm-cpp && \ - pip install --pre --upgrade ipex-llm[cpp] && \ - mkdir /workspace && \ - cd /workspace && \ - init-llama-cpp && \ - init-ollama" +# #3 Ollama specific dependencies +#----------------------- +RUN pip3 install --pre --upgrade ipex-llm[cpp] + +ENV OLLAMA_NUM_GPU=999 +ENV OLLAMA_HOST 0.0.0.0:11434 ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"] diff --git a/README.md b/README.md index 72ea7b7..7855512 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ Using Ollama for Intel based GPUs is not as straight forward as other natively O ![screenshot](doc/screenshot.png) # Prerequisites -* Ubuntu 23.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 23.10), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer) +* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer) * Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows) -* Intel ARC series GPU (tested with Intel ARC A770 16GB) +* Intel ARC series GPU (tested with Intel ARC A770 16GB and Intel(R) Core(TM) Ultra 5 125H integrated GPU) # Usage @@ -31,7 +31,7 @@ $ docker-compose -f docker-compose-wsl2.yml up Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM # Known issues -* No effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU +* Little effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU # References * https://dgpu-docs.intel.com/driver/client/overview.html diff --git a/_init.sh b/_init.sh index fbb9b47..7408d56 100644 --- a/_init.sh +++ b/_init.sh @@ -1,2 +1 @@ source /opt/intel/oneapi/setvars.sh -source /opt/conda/etc/profile.d/conda.sh diff --git a/_run.sh b/_run.sh index 41a5cb0..b766a5c 100644 --- a/_run.sh +++ b/_run.sh @@ -1,2 +1,2 @@ source /usr/share/lib/init_workspace.sh -/workspace/ollama serve +/usr/local/lib/python3.10/dist-packages/bigdl/cpp/libs/ollama serve