Merge pull request #11 from mattcurf/ipex_intel_image

Update Dockerfile to use Intel public ipex container
2024-11-27 17:33:38 -08:00
parent eb2389665f c74f6f2216
commit 91d2045f7c
4 changed files with 7 additions and 69 deletions
--- a/70
+++ b/70
@@ -1,68 +1,12 @@
-FROM ubuntu:22.04
-ENV DEBIAN_FRONTEND=noninteractive
-ENV TZ=america/los_angeles
+FROM intelanalytics/ipex-llm-inference-cpp-xpu:latest

-# Base packages
-RUN apt update && \
-    apt install --no-install-recommends -q -y \
-    software-properties-common \
-    ca-certificates \
-    gnupg \
-    wget \
-    curl \
-    python3 \
-    python3-pip \
-    ocl-icd-libopencl1
-
-# Intel GPU compute user-space drivers
-RUN mkdir -p /tmp/gpu && \
- cd /tmp/gpu && \
- wget https://github.com/oneapi-src/level-zero/releases/download/v1.18.3/level-zero_1.18.3+u22.04_amd64.deb && \
- wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \
- wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \
- wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \
- wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \
- wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \
- dpkg -i *.deb && \
- rm *.deb
-
-# Required compute runtime level-zero variables
 ENV ZES_ENABLE_SYSMAN=1
-
-# oneAPI 
-RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
-   gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \
-   echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
-   tee /etc/apt/sources.list.d/oneAPI.list && \
-  apt update && \
-  apt install --no-install-recommends -q -y \
-    intel-oneapi-common-vars=2024.0.0-49406 \
-    intel-oneapi-common-oneapi-vars=2024.0.0-49406 \
-    intel-oneapi-compiler-dpcpp-cpp=2024.0.2-49895 \
-    intel-oneapi-dpcpp-ct=2024.0.0-49381 \
-    intel-oneapi-mkl=2024.0.0-49656 \
-    intel-oneapi-mpi=2021.11.0-49493 \
-    intel-oneapi-dal=2024.0.1-25 \
-    intel-oneapi-ippcp=2021.9.1-5 \
-    intel-oneapi-ipp=2021.10.1-13 \
-    intel-oneapi-tlt=2024.0.0-352 \
-    intel-oneapi-ccl=2021.11.2-5 \
-    intel-oneapi-dnnl=2024.0.0-49521 \
-    intel-oneapi-tcm-1.0=1.0.0-435
-
-# Required oneAPI environment variables
-ENV USE_XETLA=OFF
-ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-ENV SYCL_CACHE_PERSISTENT=1
-
-COPY _init.sh /usr/share/lib/init_workspace.sh
-COPY _run.sh /usr/share/lib/run_workspace.sh
-
-# Ollama via ipex-llm 
-RUN pip3 install --pre --upgrade ipex-llm[cpp] 
-
-ENV OLLAMA_NUM_GPU=999
 ENV OLLAMA_HOST=0.0.0.0:11434

-ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"]
+RUN mkdir -p /llm/ollama; \
+    cd /llm/ollama; \
+    init-ollama;

+WORKDIR /llm/ollama
+
+ENTRYPOINT ["./ollama", "serve"]
--- a/README.md
+++ b/README.md
@@ -30,9 +30,6 @@ $ docker-compose -f docker-compose-wsl2.yml up

 Then launch your web browser to http://localhost:3000 to launch the web ui.  Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM

-# Known issues
-* Little effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU
-
 # References
 * https://dgpu-docs.intel.com/driver/client/overview.html
 * https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html
--- a/_init.sh
+++ b/_init.sh
@@ -1 +0,0 @@
-source /opt/intel/oneapi/setvars.sh
--- a/_run.sh
+++ b/_run.sh
@@ -1,2 +0,0 @@
-source /usr/share/lib/init_workspace.sh
-/usr/local/lib/python3.10/dist-packages/bigdl/cpp/libs/ollama serve