diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..fcad2e1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,58 @@ +FROM ubuntu:jammy + +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=america/los_angeles + +# Install prerequisite packages +RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ + apt-get install --no-install-recommends -q -y \ + apt-utils \ + software-properties-common \ + gnupg \ + wget + +# Install Intel GPU user-space driver apt repo +RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ + gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ + tee /etc/apt/sources.list.d/intel-gpu-jammy.list + +# Install oneAPI apt repo +RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ + gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \ + echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ + tee /etc/apt/sources.list.d/oneAPI.list + +# Install Conda apt repo +RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \ + gpg --dearmor --output /usr/share/keyrings/conda-archive-keyring.gpg && \ + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | \ + tee /etc/apt/sources.list.d/conda.list + +RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ + apt-get install --no-install-recommends -q -y \ + intel-opencl-icd intel-level-zero-gpu level-zero \ + intel-basekit=2024.0.1-43 \ + conda + +ENV USE_XETLA=OFF +ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +ENV SYCL_CACHE_PERSISTENT=1 +ENV ZES_ENABLE_SYSMAN=1 +ENV OLLAMA_NUM_GPU=999 +ENV OLLAMA_HOST 0.0.0.0:11434 + +COPY _init.sh /usr/share/lib/init_workspace.sh +COPY _run.sh /usr/share/lib/run_workspace.sh + +RUN /bin/bash -c "source /usr/share/lib/init_workspace.sh && \ + conda create -n llm-cpp python=3.11 && \ + conda activate llm-cpp && \ + pip install --pre --upgrade ipex-llm[cpp] && \ + mkdir /workspace && \ + cd /workspace && \ + init-llama-cpp && \ + init-ollama" + +ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"] + diff --git a/README.md b/README.md index 970de84..60506af 100644 --- a/README.md +++ b/README.md @@ -1 +1,28 @@ -# ollama-intel-gpu \ No newline at end of file +# ollama-intel-gpu + +Using Ollama for Intel based GPUs is not as straight forward as other natively Ollama supported platforms. As a workaround, this repo provides a quick sample showing the use of Ollama built with support for Intel ARC GPU based from the information provided by the references bellow. + +# Prerequisites +* Ubuntu 23.04 or newer (for Intel ARC GPU kernel driver support) +* Installed Docker and Docker-compose tools +* Intel ARC series GPU (tested with Intel ARC A770 16GB) + +# Usage + +The following will build the Ollama with Intel ARC GPU support, and compose those with the public docker image based on OpenWEB UI from https://github.com/open-webui/open-webui + +```bash +$ git clone https://github.com/mattcurf/ollama-intel-gpu +$ cd ollama-intel-gpu +$ docker-compose up +``` + +Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM + +# Known issues +* It should be easy to adopt/refactor this for support on Windows WSL2, but this was not the target for this repo +* No effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU + +# References +* https://dgpu-docs.intel.com/driver/client/overview.html +* https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html diff --git a/_init.sh b/_init.sh new file mode 100644 index 0000000..fbb9b47 --- /dev/null +++ b/_init.sh @@ -0,0 +1,2 @@ +source /opt/intel/oneapi/setvars.sh +source /opt/conda/etc/profile.d/conda.sh diff --git a/_run.sh b/_run.sh new file mode 100644 index 0000000..231b228 --- /dev/null +++ b/_run.sh @@ -0,0 +1,4 @@ +#source /opt/intel/oneapi/setvars.sh +#source /opt/conda/etc/profile.d/conda.sh +source /usr/share/lib/init_workspace.sh +/workspace/ollama serve diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..f06a16d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,33 @@ +version: "3.9" +services: + ollama-intel-gpu: + build: + context: . + dockerfile: Dockerfile + container_name: ollama-intel-gpu + image: ollama-intel-gpu:latest + restart: always + devices: + - /dev/dri:/dev/dri + volumes: + - /tmp/.X11-unix:/tmp/.X11-unix + - ollama-intel-gpu:/root/.ollama + environment: + - DISPLAY=${DISPLAY} + ollama-webui: + image: ghcr.io/open-webui/open-webui + container_name: ollama-webui + volumes: + - ollama-webui:/app/backend/data + depends_on: + - ollama-intel-gpu + ports: + - ${OLLAMA_WEBUI_PORT-3000}:8080 + environment: + - OLLAMA_BASE_URL=http://ollama-intel-gpu:11434 + extra_hosts: + - host.docker.internal:host-gateway + restart: unless-stopped +volumes: + ollama-webui: {} + ollama-intel-gpu: {}