Merge pull request #2 from mattcurf/kernel-6-8-wa

Kernel 6 8 wa
This commit is contained in:
Matt Curfman 2024-05-23 21:14:15 -07:00 committed by GitHub
commit 65184dc2a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 62 additions and 43 deletions

View File

@ -1,57 +1,77 @@
FROM ubuntu:jammy FROM ubuntu:22.04
ENV DEBIAN_FRONTEND noninteractive
ENV TZ=america/los_angeles ENV TZ=america/los_angeles
# Install prerequisite packages # #1 install base packages
RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ #-----------------------
apt-get install --no-install-recommends -q -y \ RUN apt update && \
apt-utils \ apt install --no-install-recommends -q -y \
software-properties-common \ software-properties-common \
ca-certificates \
gnupg \ gnupg \
wget wget \
python3 \
python3-pip
# Install Intel GPU user-space driver apt repo # #2 Install Intel GPU compute user-space drivers
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ #-----------------------
gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
tee /etc/apt/sources.list.d/intel-gpu-jammy.list echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
tee /etc/apt/sources.list.d/intel-gpu-jammy.list
# apt update && \
# apt install --no-install-recommends -q -y \
# intel-opencl-icd intel-level-zero-gpu level-zero
# *Defect/Workaround*: Intel's apt repo does not contain the latest GPU runtime supporting kernel 6.8, so install the packages directly from their compute
# runtime repo
# https://github.com/intel/compute-runtime/issues/710
RUN apt update && \
apt install --no-install-recommends -q -y \
ocl-icd-libopencl1 \
clinfo && \
cd /tmp && \
wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-core_1.0.16510.2_amd64.deb && \
wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.16510.2/intel-igc-opencl_1.0.16510.2_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-level-zero-gpu_1.3.29138.7_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/intel-opencl-icd_24.13.29138.7_amd64.deb && \
wget https://github.com/intel/compute-runtime/releases/download/24.13.29138.7/libigdgmm12_22.3.18_amd64.deb && \
dpkg -i *.deb && \
apt install --no-install-recommends -q -y \
level-zero
# Install oneAPI apt repo # Required compute runtime level-zero variables
ENV ZES_ENABLE_SYSMAN=1
# #3 Install oneAPI
#-----------------------
# *Defect/Workaround*: Intel's oneAPI MKL changed the linkage model, breaking pytorch wheel. Downgrade to oneAPI 2024.0 instead
# Source: https://github.com/pytorch/pytorch/issues/123097
RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \ gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
tee /etc/apt/sources.list.d/oneAPI.list tee /etc/apt/sources.list.d/oneAPI.list && \
apt update && \
# Install Conda apt repo apt install --no-install-recommends -q -y \
RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \ intel-oneapi-common-vars=2024.0.0-49406 \
gpg --dearmor --output /usr/share/keyrings/conda-archive-keyring.gpg && \ intel-oneapi-common-oneapi-vars=2024.0.0-49406 \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | \ intel-oneapi-mkl=2024.0.0-49656 \
tee /etc/apt/sources.list.d/conda.list intel-oneapi-tcm-1.0=1.0.0-435 \
intel-oneapi-dnnl=2024.0.0-49521
RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
apt-get install --no-install-recommends -q -y \
intel-opencl-icd intel-level-zero-gpu level-zero \
intel-basekit=2024.0.1-43 \
conda
# Required oneAPI environment variables
ENV USE_XETLA=OFF ENV USE_XETLA=OFF
ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
ENV SYCL_CACHE_PERSISTENT=1 ENV SYCL_CACHE_PERSISTENT=1
ENV ZES_ENABLE_SYSMAN=1
ENV OLLAMA_NUM_GPU=999
ENV OLLAMA_HOST 0.0.0.0:11434
COPY _init.sh /usr/share/lib/init_workspace.sh COPY _init.sh /usr/share/lib/init_workspace.sh
COPY _run.sh /usr/share/lib/run_workspace.sh COPY _run.sh /usr/share/lib/run_workspace.sh
RUN /bin/bash -c "source /usr/share/lib/init_workspace.sh && \ # #3 Ollama specific dependencies
conda create -n llm-cpp python=3.11 && \ #-----------------------
conda activate llm-cpp && \ RUN pip3 install --pre --upgrade ipex-llm[cpp]
pip install --pre --upgrade ipex-llm[cpp] && \
mkdir /workspace && \ ENV OLLAMA_NUM_GPU=999
cd /workspace && \ ENV OLLAMA_HOST 0.0.0.0:11434
init-llama-cpp && \
init-ollama"
ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"] ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"]

View File

@ -6,9 +6,9 @@ Using Ollama for Intel based GPUs is not as straight forward as other natively O
![screenshot](doc/screenshot.png) ![screenshot](doc/screenshot.png)
# Prerequisites # Prerequisites
* Ubuntu 23.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 23.10), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer) * Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer)
* Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows) * Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows)
* Intel ARC series GPU (tested with Intel ARC A770 16GB) * Intel ARC series GPU (tested with Intel ARC A770 16GB and Intel(R) Core(TM) Ultra 5 125H integrated GPU)
# Usage # Usage
@ -31,7 +31,7 @@ $ docker-compose -f docker-compose-wsl2.yml up
Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM
# Known issues # Known issues
* No effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU * Little effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU
# References # References
* https://dgpu-docs.intel.com/driver/client/overview.html * https://dgpu-docs.intel.com/driver/client/overview.html

View File

@ -1,2 +1 @@
source /opt/intel/oneapi/setvars.sh source /opt/intel/oneapi/setvars.sh
source /opt/conda/etc/profile.d/conda.sh

View File

@ -1,2 +1,2 @@
source /usr/share/lib/init_workspace.sh source /usr/share/lib/init_workspace.sh
/workspace/ollama serve /usr/local/lib/python3.10/dist-packages/bigdl/cpp/libs/ollama serve