From e3c4f50b1f3d3a483139d37d44e5b326aeb2d8c8 Mon Sep 17 00:00:00 2001
From: mattcurf <mattcurf@tecnovelty.com>
Date: Sun, 5 May 2024 12:48:38 -0700
Subject: [PATCH 1/3] Workaround for Kernel 6.8 regression

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 4a313a3..27cad69 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,6 +40,8 @@ ENV SYCL_CACHE_PERSISTENT=1
 ENV ZES_ENABLE_SYSMAN=1
 ENV OLLAMA_NUM_GPU=999
 ENV OLLAMA_HOST 0.0.0.0:11434
+ENV NEOReadDebugKeys=1
+ENV OverrideGpuAddressSpace=48
 
 COPY _init.sh /usr/share/lib/init_workspace.sh
 COPY _run.sh /usr/share/lib/run_workspace.sh

From e2f4a81fef736cc165ace4d42f10b6d00a4d18e5 Mon Sep 17 00:00:00 2001
From: Matt Curfman <matt@tecnovelty.com>
Date: Fri, 10 May 2024 19:29:28 -0700
Subject: [PATCH 2/3] Install GPU runtime from git repo

---
 Dockerfile | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 27cad69..f16c0e6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,13 +8,23 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
     apt-utils \
     software-properties-common \
     gnupg \
-    wget
+    wget \
+    ocl-icd-libopencl1
 
 # Install Intel GPU user-space driver apt repo 
-RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
-   gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
-   echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
-   tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+#RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+#   gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
+#   echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
+#   tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+# Note: Intel's apt repo does not contain the latest GPU runtime supporting kernel 6.8, so install the packages directly from their compute
+# runtime repo
+RUN cd /tmp && \
+  wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-core_1.0.16510.2_amd64.deb && \
+  wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-opencl_1.0.16510.2_amd64.deb && \
+  wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-level-zero-gpu_1.3.29138.7_amd64.deb && \
+  wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-opencl-icd_24.13.29138.7_amd64.deb && \
+  wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/libigdgmm12_22.3.18_amd64.deb && \
+  dpkg -i *.deb
 
 # Install oneAPI apt repo
 RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
@@ -30,7 +40,6 @@ RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \
 
 RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
   apt-get install --no-install-recommends -q -y \
-    intel-opencl-icd intel-level-zero-gpu level-zero \
     intel-basekit=2024.0.1-43 \
     conda
 
@@ -40,8 +49,6 @@ ENV SYCL_CACHE_PERSISTENT=1
 ENV ZES_ENABLE_SYSMAN=1
 ENV OLLAMA_NUM_GPU=999
 ENV OLLAMA_HOST 0.0.0.0:11434
-ENV NEOReadDebugKeys=1
-ENV OverrideGpuAddressSpace=48
 
 COPY _init.sh /usr/share/lib/init_workspace.sh
 COPY _run.sh /usr/share/lib/run_workspace.sh

From 164881bc71e5448a72e3fd0d3712855ff7374f42 Mon Sep 17 00:00:00 2001
From: Matt Curfman <matt@tecnovelty.com>
Date: Thu, 23 May 2024 20:59:10 -0700
Subject: [PATCH 3/3] Cleanups

---
 Dockerfile | 101 +++++++++++++++++++++++++++++------------------------
 README.md  |   6 ++--
 _init.sh   |   1 -
 _run.sh    |   2 +-
 4 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f16c0e6..6badd3f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,66 +1,77 @@
-FROM ubuntu:jammy
-
+FROM ubuntu:22.04
+ENV DEBIAN_FRONTEND noninteractive
 ENV TZ=america/los_angeles
 
-# Install prerequisite packages
-RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
-    apt-get install --no-install-recommends -q -y \
-    apt-utils \
+# #1 install base packages
+#-----------------------
+RUN apt update && \
+    apt install --no-install-recommends -q -y \
     software-properties-common \
+    ca-certificates \
     gnupg \
     wget \
-    ocl-icd-libopencl1
+    python3 \
+    python3-pip
 
-# Install Intel GPU user-space driver apt repo 
-#RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
-#   gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
-#   echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
-#   tee /etc/apt/sources.list.d/intel-gpu-jammy.list
-# Note: Intel's apt repo does not contain the latest GPU runtime supporting kernel 6.8, so install the packages directly from their compute
+# #2 Install Intel GPU compute user-space drivers
+#-----------------------
+  RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+  gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
+  echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
+  tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+# apt update && \
+#  apt install --no-install-recommends -q -y \
+#    intel-opencl-icd intel-level-zero-gpu level-zero
+# *Defect/Workaround*: Intel's apt repo does not contain the latest GPU runtime supporting kernel 6.8, so install the packages directly from their compute
 # runtime repo
-RUN cd /tmp && \
-  wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-core_1.0.16510.2_amd64.deb && \
-  wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-opencl_1.0.16510.2_amd64.deb && \
-  wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-level-zero-gpu_1.3.29138.7_amd64.deb && \
-  wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-opencl-icd_24.13.29138.7_amd64.deb && \
-  wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/libigdgmm12_22.3.18_amd64.deb && \
-  dpkg -i *.deb
+# https://github.com/intel/compute-runtime/issues/710
+RUN apt update && \
+ apt install --no-install-recommends -q -y \
+   ocl-icd-libopencl1 \
+   clinfo && \
+ cd /tmp && \
+ wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-core_1.0.16510.2_amd64.deb && \
+ wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-opencl_1.0.16510.2_amd64.deb && \
+ wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-level-zero-gpu_1.3.29138.7_amd64.deb && \
+ wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-opencl-icd_24.13.29138.7_amd64.deb && \
+ wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/libigdgmm12_22.3.18_amd64.deb && \
+ dpkg -i *.deb && \
+ apt install --no-install-recommends -q -y \
+   level-zero 
 
-# Install oneAPI apt repo
+# Required compute runtime level-zero variables
+ENV ZES_ENABLE_SYSMAN=1
+
+# #3 Install oneAPI 
+#-----------------------
+# *Defect/Workaround*: Intel's oneAPI MKL changed the linkage model, breaking pytorch wheel.  Downgrade to oneAPI 2024.0 instead
+# Source: https://github.com/pytorch/pytorch/issues/123097
 RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
-   gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \ 
+   gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \
    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
-   tee /etc/apt/sources.list.d/oneAPI.list
-
-# Install Conda apt repo
-RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \
-   gpg --dearmor --output /usr/share/keyrings/conda-archive-keyring.gpg && \
-   echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | \
-   tee /etc/apt/sources.list.d/conda.list 
-
-RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
-  apt-get install --no-install-recommends -q -y \
-    intel-basekit=2024.0.1-43 \
-    conda
+   tee /etc/apt/sources.list.d/oneAPI.list && \
+  apt update && \
+  apt install --no-install-recommends -q -y \
+  intel-oneapi-common-vars=2024.0.0-49406 \
+  intel-oneapi-common-oneapi-vars=2024.0.0-49406 \
+  intel-oneapi-mkl=2024.0.0-49656 \
+  intel-oneapi-tcm-1.0=1.0.0-435 \
+  intel-oneapi-dnnl=2024.0.0-49521 
 
+# Required oneAPI environment variables
 ENV USE_XETLA=OFF
 ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 ENV SYCL_CACHE_PERSISTENT=1
-ENV ZES_ENABLE_SYSMAN=1
-ENV OLLAMA_NUM_GPU=999
-ENV OLLAMA_HOST 0.0.0.0:11434
 
 COPY _init.sh /usr/share/lib/init_workspace.sh
 COPY _run.sh /usr/share/lib/run_workspace.sh
 
-RUN /bin/bash -c "source /usr/share/lib/init_workspace.sh && \
-   conda create -n llm-cpp python=3.11 && \
-   conda activate llm-cpp && \ 
-   pip install --pre --upgrade ipex-llm[cpp] && \
-   mkdir /workspace && \
-   cd /workspace && \
-   init-llama-cpp && \
-   init-ollama" 
+# #3 Ollama specific dependencies
+#-----------------------
+RUN pip3 install --pre --upgrade ipex-llm[cpp] 
+
+ENV OLLAMA_NUM_GPU=999
+ENV OLLAMA_HOST 0.0.0.0:11434
 
 ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"]
 
diff --git a/README.md b/README.md
index 72ea7b7..7855512 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,9 @@ Using Ollama for Intel based GPUs is not as straight forward as other natively O
 ![screenshot](doc/screenshot.png)
 
 # Prerequisites
-* Ubuntu 23.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 23.10), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer)
+* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer)
 * Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows)
-* Intel ARC series GPU (tested with Intel ARC A770 16GB)
+* Intel ARC series GPU (tested with Intel ARC A770 16GB and Intel(R) Core(TM) Ultra 5 125H integrated GPU)
  
 # Usage
 
@@ -31,7 +31,7 @@ $ docker-compose -f docker-compose-wsl2.yml up
 Then launch your web browser to http://localhost:3000 to launch the web ui.  Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM
 
 # Known issues
-* No effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU
+* Little effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU
 
 # References
 * https://dgpu-docs.intel.com/driver/client/overview.html 
diff --git a/_init.sh b/_init.sh
index fbb9b47..7408d56 100644
--- a/_init.sh
+++ b/_init.sh
@@ -1,2 +1 @@
 source /opt/intel/oneapi/setvars.sh
-source /opt/conda/etc/profile.d/conda.sh
diff --git a/_run.sh b/_run.sh
index 41a5cb0..b766a5c 100644
--- a/_run.sh
+++ b/_run.sh
@@ -1,2 +1,2 @@
 source /usr/share/lib/init_workspace.sh
-/workspace/ollama serve
+/usr/local/lib/python3.10/dist-packages/bigdl/cpp/libs/ollama serve