Initial version

2024-04-29 17:19:07 -07:00
parent aa3be9a0ab
commit 2daa02e8f4
5 changed files with 125 additions and 1 deletions
--- a/58
+++ b/58
@@ -0,0 +1,58 @@
+FROM ubuntu:jammy
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=america/los_angeles
+
+# Install prerequisite packages
+RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
+    apt-get install --no-install-recommends -q -y \
+    apt-utils \
+    software-properties-common \
+    gnupg \
+    wget
+
+# Install Intel GPU user-space driver apt repo 
+RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+   gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
+   echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
+   tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+
+# Install oneAPI apt repo
+RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
+   gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \ 
+   echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
+   tee /etc/apt/sources.list.d/oneAPI.list
+
+# Install Conda apt repo
+RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \
+   gpg --dearmor --output /usr/share/keyrings/conda-archive-keyring.gpg && \
+   echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | \
+   tee /etc/apt/sources.list.d/conda.list 
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
+  apt-get install --no-install-recommends -q -y \
+    intel-opencl-icd intel-level-zero-gpu level-zero \
+    intel-basekit=2024.0.1-43 \
+    conda
+
+ENV USE_XETLA=OFF
+ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
+ENV SYCL_CACHE_PERSISTENT=1
+ENV ZES_ENABLE_SYSMAN=1
+ENV OLLAMA_NUM_GPU=999
+ENV OLLAMA_HOST 0.0.0.0:11434
+
+COPY _init.sh /usr/share/lib/init_workspace.sh
+COPY _run.sh /usr/share/lib/run_workspace.sh
+
+RUN /bin/bash -c "source /usr/share/lib/init_workspace.sh && \
+   conda create -n llm-cpp python=3.11 && \
+   conda activate llm-cpp && \ 
+   pip install --pre --upgrade ipex-llm[cpp] && \
+   mkdir /workspace && \
+   cd /workspace && \
+   init-llama-cpp && \
+   init-ollama" 
+
+ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"]
+
--- a/README.md
+++ b/README.md
@@ -1 +1,28 @@
 # ollama-intel-gpu
+
+Using Ollama for Intel based GPUs is not as straight forward as other natively Ollama supported platforms.  As a workaround, this repo provides a quick sample showing the use of Ollama built with support for Intel ARC GPU based from the information provided by the references bellow.
+
+# Prerequisites
+* Ubuntu 23.04 or newer (for Intel ARC GPU kernel driver support)
+* Installed Docker and Docker-compose tools
+* Intel ARC series GPU (tested with Intel ARC A770 16GB)
+ 
+# Usage
+
+The following will build the Ollama with Intel ARC GPU support, and compose those with the public docker image based on OpenWEB UI from https://github.com/open-webui/open-webui
+
+```bash
+$ git clone https://github.com/mattcurf/ollama-intel-gpu
+$ cd ollama-intel-gpu
+$ docker-compose up 
+```
+
+Then launch your web browser to http://localhost:3000 to launch the web ui.  Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM
+
+# Known issues
+* It should be easy to adopt/refactor this for support on Windows WSL2, but this was not the target for this repo
+* No effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU
+
+# References
+* https://dgpu-docs.intel.com/driver/client/overview.html 
+* https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html 
--- a/_init.sh
+++ b/_init.sh
@@ -0,0 +1,2 @@
+source /opt/intel/oneapi/setvars.sh
+source /opt/conda/etc/profile.d/conda.sh
--- a/_run.sh
+++ b/_run.sh
@@ -0,0 +1,4 @@
+#source /opt/intel/oneapi/setvars.sh
+#source /opt/conda/etc/profile.d/conda.sh
+source /usr/share/lib/init_workspace.sh
+/workspace/ollama serve
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,33 @@
+version: "3.9"
+services:
+  ollama-intel-gpu:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: ollama-intel-gpu
+    image: ollama-intel-gpu:latest
+    restart: always
+    devices:
+      - /dev/dri:/dev/dri
+    volumes:
+      - /tmp/.X11-unix:/tmp/.X11-unix
+      - ollama-intel-gpu:/root/.ollama
+    environment:
+      - DISPLAY=${DISPLAY}
+  ollama-webui:
+    image: ghcr.io/open-webui/open-webui
+    container_name: ollama-webui
+    volumes:
+      - ollama-webui:/app/backend/data
+    depends_on:
+      - ollama-intel-gpu
+    ports:
+      - ${OLLAMA_WEBUI_PORT-3000}:8080
+    environment:
+      - OLLAMA_BASE_URL=http://ollama-intel-gpu:11434
+    extra_hosts:
+      - host.docker.internal:host-gateway
+    restart: unless-stopped
+volumes:
+  ollama-webui: {}
+  ollama-intel-gpu: {}