Initial version
This commit is contained in:
		
							
								
								
									
										58
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| FROM ubuntu:jammy | ||||
|  | ||||
| ENV DEBIAN_FRONTEND=noninteractive | ||||
| ENV TZ=america/los_angeles | ||||
|  | ||||
| # Install prerequisite packages | ||||
| RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ | ||||
|     apt-get install --no-install-recommends -q -y \ | ||||
|     apt-utils \ | ||||
|     software-properties-common \ | ||||
|     gnupg \ | ||||
|     wget | ||||
|  | ||||
| # Install Intel GPU user-space driver apt repo  | ||||
| RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ | ||||
|    gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ | ||||
|    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ | ||||
|    tee /etc/apt/sources.list.d/intel-gpu-jammy.list | ||||
|  | ||||
| # Install oneAPI apt repo | ||||
| RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ | ||||
|    gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \  | ||||
|    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ | ||||
|    tee /etc/apt/sources.list.d/oneAPI.list | ||||
|  | ||||
| # Install Conda apt repo | ||||
| RUN wget -qO - https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | \ | ||||
|    gpg --dearmor --output /usr/share/keyrings/conda-archive-keyring.gpg && \ | ||||
|    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | \ | ||||
|    tee /etc/apt/sources.list.d/conda.list  | ||||
|  | ||||
| RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ | ||||
|   apt-get install --no-install-recommends -q -y \ | ||||
|     intel-opencl-icd intel-level-zero-gpu level-zero \ | ||||
|     intel-basekit=2024.0.1-43 \ | ||||
|     conda | ||||
|  | ||||
| ENV USE_XETLA=OFF | ||||
| ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 | ||||
| ENV SYCL_CACHE_PERSISTENT=1 | ||||
| ENV ZES_ENABLE_SYSMAN=1 | ||||
| ENV OLLAMA_NUM_GPU=999 | ||||
| ENV OLLAMA_HOST 0.0.0.0:11434 | ||||
|  | ||||
| COPY _init.sh /usr/share/lib/init_workspace.sh | ||||
| COPY _run.sh /usr/share/lib/run_workspace.sh | ||||
|  | ||||
| RUN /bin/bash -c "source /usr/share/lib/init_workspace.sh && \ | ||||
|    conda create -n llm-cpp python=3.11 && \ | ||||
|    conda activate llm-cpp && \  | ||||
|    pip install --pre --upgrade ipex-llm[cpp] && \ | ||||
|    mkdir /workspace && \ | ||||
|    cd /workspace && \ | ||||
|    init-llama-cpp && \ | ||||
|    init-ollama"  | ||||
|  | ||||
| ENTRYPOINT ["/bin/bash", "/usr/share/lib/run_workspace.sh"] | ||||
|  | ||||
							
								
								
									
										27
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								README.md
									
									
									
									
									
								
							| @@ -1 +1,28 @@ | ||||
| # ollama-intel-gpu | ||||
|  | ||||
| Using Ollama for Intel based GPUs is not as straight forward as other natively Ollama supported platforms.  As a workaround, this repo provides a quick sample showing the use of Ollama built with support for Intel ARC GPU based from the information provided by the references bellow. | ||||
|  | ||||
| # Prerequisites | ||||
| * Ubuntu 23.04 or newer (for Intel ARC GPU kernel driver support) | ||||
| * Installed Docker and Docker-compose tools | ||||
| * Intel ARC series GPU (tested with Intel ARC A770 16GB) | ||||
|   | ||||
| # Usage | ||||
|  | ||||
| The following will build the Ollama with Intel ARC GPU support, and compose those with the public docker image based on OpenWEB UI from https://github.com/open-webui/open-webui | ||||
|  | ||||
| ```bash | ||||
| $ git clone https://github.com/mattcurf/ollama-intel-gpu | ||||
| $ cd ollama-intel-gpu | ||||
| $ docker-compose up  | ||||
| ``` | ||||
|  | ||||
| Then launch your web browser to http://localhost:3000 to launch the web ui.  Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM | ||||
|  | ||||
| # Known issues | ||||
| * It should be easy to adopt/refactor this for support on Windows WSL2, but this was not the target for this repo | ||||
| * No effort has been made to prune the packages pulled into the Ollama docker image for Intel GPU | ||||
|  | ||||
| # References | ||||
| * https://dgpu-docs.intel.com/driver/client/overview.html  | ||||
| * https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html  | ||||
|   | ||||
							
								
								
									
										2
									
								
								_init.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								_init.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| source /opt/intel/oneapi/setvars.sh | ||||
| source /opt/conda/etc/profile.d/conda.sh | ||||
							
								
								
									
										4
									
								
								_run.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								_run.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| #source /opt/intel/oneapi/setvars.sh | ||||
| #source /opt/conda/etc/profile.d/conda.sh | ||||
| source /usr/share/lib/init_workspace.sh | ||||
| /workspace/ollama serve | ||||
							
								
								
									
										33
									
								
								docker-compose.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								docker-compose.yml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| version: "3.9" | ||||
| services: | ||||
|   ollama-intel-gpu: | ||||
|     build: | ||||
|       context: . | ||||
|       dockerfile: Dockerfile | ||||
|     container_name: ollama-intel-gpu | ||||
|     image: ollama-intel-gpu:latest | ||||
|     restart: always | ||||
|     devices: | ||||
|       - /dev/dri:/dev/dri | ||||
|     volumes: | ||||
|       - /tmp/.X11-unix:/tmp/.X11-unix | ||||
|       - ollama-intel-gpu:/root/.ollama | ||||
|     environment: | ||||
|       - DISPLAY=${DISPLAY} | ||||
|   ollama-webui: | ||||
|     image: ghcr.io/open-webui/open-webui | ||||
|     container_name: ollama-webui | ||||
|     volumes: | ||||
|       - ollama-webui:/app/backend/data | ||||
|     depends_on: | ||||
|       - ollama-intel-gpu | ||||
|     ports: | ||||
|       - ${OLLAMA_WEBUI_PORT-3000}:8080 | ||||
|     environment: | ||||
|       - OLLAMA_BASE_URL=http://ollama-intel-gpu:11434 | ||||
|     extra_hosts: | ||||
|       - host.docker.internal:host-gateway | ||||
|     restart: unless-stopped | ||||
| volumes: | ||||
|   ollama-webui: {} | ||||
|   ollama-intel-gpu: {} | ||||
		Reference in New Issue
	
	Block a user