Merge branch 'dev'

This commit is contained in:
Daniele Viti 2023-12-31 11:24:40 +01:00
commit 9edc181e49
9 changed files with 97 additions and 27 deletions

View file

@ -85,17 +85,29 @@ If you don't have Ollama installed yet, you can use the provided bash script for
For cpu-only container For cpu-only container
```bash ```bash
chmod +x run-compose.sh && ./run-compose.sh ./run-compose.sh
``` ```
For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html))
Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU
```bash ```bash
chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1] ./run-compose.sh --enable-gpu
``` ```
Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example:
```bash ```bash
./run-compose.sh --build --enable-gpu[count=1] ./run-compose.sh --enable-gpu --build
```
### Installing Both Ollama and Ollama Web UI Using Docker Compose
To install using docker compose script as CPU-only installation simply run this command
```bash
docker compose up -d
```
for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia)
```bash
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d
``` ```
### Installing Both Ollama and Ollama Web UI Using Kustomize ### Installing Both Ollama and Ollama Web UI Using Kustomize

View file

@ -4,6 +4,7 @@ metadata:
name: ollama-service name: ollama-service
namespace: {{ .Values.namespace }} namespace: {{ .Values.namespace }}
spec: spec:
type: {{ .Values.ollama.service.type }}
selector: selector:
app: ollama app: ollama
ports: ports:

View file

@ -19,15 +19,32 @@ spec:
image: {{ .Values.ollama.image }} image: {{ .Values.ollama.image }}
ports: ports:
- containerPort: {{ .Values.ollama.servicePort }} - containerPort: {{ .Values.ollama.servicePort }}
resources: env:
limits: {{- if .Values.ollama.gpu.enabled }}
cpu: {{ .Values.ollama.resources.limits.cpu }} - name: PATH
memory: {{ .Values.ollama.resources.limits.memory }} value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }} - name: LD_LIBRARY_PATH
value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
{{- end}}
{{- if .Values.ollama.resources }}
resources: {{- toYaml .Values.ollama.resources | nindent 10 }}
{{- end }}
volumeMounts: volumeMounts:
- name: ollama-volume - name: ollama-volume
mountPath: /root/.ollama mountPath: /root/.ollama
tty: true tty: true
{{- with .Values.ollama.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
tolerations:
{{- if .Values.ollama.gpu.enabled }}
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
{{- end }}
volumeClaimTemplates: volumeClaimTemplates:
- metadata: - metadata:
name: ollama-volume name: ollama-volume
@ -35,4 +52,4 @@ spec:
accessModes: [ "ReadWriteOnce" ] accessModes: [ "ReadWriteOnce" ]
resources: resources:
requests: requests:
storage: 1Gi storage: {{ .Values.ollama.volumeSize }}

View file

@ -15,14 +15,24 @@ spec:
spec: spec:
containers: containers:
- name: ollama-webui - name: ollama-webui
image: ghcr.io/ollama-webui/ollama-webui:main image: {{ .Values.webui.image }}
ports: ports:
- containerPort: 8080 - containerPort: 8080
resources: {{- if .Values.webui.resources }}
limits: resources: {{- toYaml .Values.webui.resources | nindent 10 }}
cpu: "500m" {{- end }}
memory: "500Mi" volumeMounts:
- name: webui-volume
mountPath: /app/backend/data
env: env:
- name: OLLAMA_API_BASE_URL - name: OLLAMA_API_BASE_URL
value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api" value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api"
tty: true tty: true
{{- with .Values.webui.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: webui-volume
persistentVolumeClaim:
claimName: ollama-webui-pvc

View file

@ -1,11 +1,13 @@
{{- if .Values.webui.ingress.enabled }}
apiVersion: networking.k8s.io/v1 apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
name: ollama-webui-ingress name: ollama-webui-ingress
namespace: {{ .Values.namespace }} namespace: {{ .Values.namespace }}
#annotations: {{- if .Values.webui.ingress.annotations }}
# Use appropriate annotations for your Ingress controller, e.g., for NGINX: annotations:
# nginx.ingress.kubernetes.io/rewrite-target: / {{ toYaml .Values.webui.ingress.annotations | trimSuffix "\n" | indent 4 }}
{{- end }}
spec: spec:
rules: rules:
- host: {{ .Values.webui.ingress.host }} - host: {{ .Values.webui.ingress.host }}
@ -18,3 +20,4 @@ spec:
name: ollama-webui-service name: ollama-webui-service
port: port:
number: {{ .Values.webui.servicePort }} number: {{ .Values.webui.servicePort }}
{{- end }}

View file

@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app: ollama-webui
name: ollama-webui-pvc
namespace: {{ .Values.namespace }}
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: {{ .Values.webui.volumeSize }}

View file

@ -4,7 +4,7 @@ metadata:
name: ollama-webui-service name: ollama-webui-service
namespace: {{ .Values.namespace }} namespace: {{ .Values.namespace }}
spec: spec:
type: NodePort # Use LoadBalancer if you're on a cloud that supports it type: {{ .Values.webui.service.type }} # Default: NodePort # Use LoadBalancer if you're on a cloud that supports it
selector: selector:
app: ollama-webui app: ollama-webui
ports: ports:

View file

@ -10,6 +10,12 @@ ollama:
memory: "2Gi" memory: "2Gi"
nvidia.com/gpu: "0" nvidia.com/gpu: "0"
volumeSize: 1Gi volumeSize: 1Gi
nodeSelector: {}
tolerations: []
service:
type: ClusterIP
gpu:
enabled: false
webui: webui:
replicaCount: 1 replicaCount: 1
@ -20,4 +26,13 @@ webui:
cpu: "500m" cpu: "500m"
memory: "500Mi" memory: "500Mi"
ingress: ingress:
enabled: true
annotations:
# Use appropriate annotations for your Ingress controller, e.g., for NGINX:
# nginx.ingress.kubernetes.io/rewrite-target: /
host: ollama.minikube.local host: ollama.minikube.local
volumeSize: 1Gi
nodeSelector: {}
tolerations: []
service:
type: NodePort

View file

@ -80,12 +80,12 @@ usage() {
echo " -h, --help Show this help message." echo " -h, --help Show this help message."
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " ./$0 --drop" echo " $0 --drop"
echo " ./$0 --enable-gpu[count=1]" echo " $0 --enable-gpu[count=1]"
echo " ./$0 --enable-api[port=11435]" echo " $0 --enable-api[port=11435]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]"
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build"
echo "" echo ""
echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration."
echo "About the gpu to use, the script automatically detects it using the "lspci" command." echo "About the gpu to use, the script automatically detects it using the "lspci" command."
@ -234,4 +234,4 @@ else
echo "Aborted." echo "Aborted."
fi fi
echo echo