Merge branch 'dev'

2023-12-31 11:24:40 +01:00 · 2023-12-31 11:24:40 +01:00 · 9edc181e49
commit 9edc181e49
parent 7354e8ebae d8aa169748
9 changed files with 97 additions and 27 deletions
--- a/README.md
+++ b/README.md
@ -85,17 +85,29 @@ If you don't have Ollama installed yet, you can use the provided bash script for
 For cpu-only container
 ```bash
-chmod +x run-compose.sh && ./run-compose.sh
+./run-compose.sh
 ```
-For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html))
+For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html))
 Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU
 ```bash
-chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1]
+./run-compose.sh --enable-gpu
 ```
 Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example:
 ```bash
-./run-compose.sh --build --enable-gpu[count=1]
+./run-compose.sh --enable-gpu --build
 ```
 ### Installing Both Ollama and Ollama Web UI Using Docker Compose
 To install using docker compose script as CPU-only installation simply run this command
 ```bash
 docker compose up -d
 ```
 for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia)
 ```bash
 docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d
 ```
 ### Installing Both Ollama and Ollama Web UI Using Kustomize
--- a/kubernetes/helm/templates/ollama-service.yaml
+++ b/kubernetes/helm/templates/ollama-service.yaml
@ -4,6 +4,7 @@ metadata:
  name: ollama-service
  namespace: {{ .Values.namespace }}
 spec:
  type: {{ .Values.ollama.service.type }}
  selector:
    app: ollama
  ports:
--- a/kubernetes/helm/templates/ollama-statefulset.yaml
+++ b/kubernetes/helm/templates/ollama-statefulset.yaml
@ -19,15 +19,32 @@ spec:
        image: {{ .Values.ollama.image }}
        ports:
        - containerPort: {{ .Values.ollama.servicePort }}
-        resources:
+        env:
-          limits:
+        {{- if .Values.ollama.gpu.enabled }}
-            cpu: {{ .Values.ollama.resources.limits.cpu }}
+          - name: PATH
-            memory: {{ .Values.ollama.resources.limits.memory }}
+            value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-            nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }}
+          - name: LD_LIBRARY_PATH
            value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
          - name: NVIDIA_DRIVER_CAPABILITIES
            value: compute,utility
        {{- end}}
        {{- if .Values.ollama.resources }}
        resources: {{- toYaml .Values.ollama.resources | nindent 10 }}
        {{- end }}
        volumeMounts:
        - name: ollama-volume
          mountPath: /root/.ollama
        tty: true
      {{- with .Values.ollama.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      tolerations:
        {{- if .Values.ollama.gpu.enabled }}
        - key: nvidia.com/gpu
          operator: Exists
          effect: NoSchedule
        {{- end }}
  volumeClaimTemplates:
  - metadata:
      name: ollama-volume
@ -35,4 +52,4 @@ spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
-          storage: 1Gi
+          storage: {{ .Values.ollama.volumeSize }}
--- a/kubernetes/helm/templates/webui-deployment.yaml
+++ b/kubernetes/helm/templates/webui-deployment.yaml
@ -15,14 +15,24 @@ spec:
    spec:
      containers:
      - name: ollama-webui
-        image: ghcr.io/ollama-webui/ollama-webui:main
+        image: {{ .Values.webui.image }}
        ports:
        - containerPort: 8080
-        resources:
+        {{- if .Values.webui.resources }}
-          limits:
+        resources: {{- toYaml .Values.webui.resources | nindent 10 }}
-            cpu: "500m"
+        {{- end }}
-            memory: "500Mi"
+        volumeMounts:
        - name: webui-volume
          mountPath: /app/backend/data
        env:
        - name: OLLAMA_API_BASE_URL
          value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api"
        tty: true
      {{- with .Values.webui.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      volumes:
      - name: webui-volume
        persistentVolumeClaim:
          claimName: ollama-webui-pvc
--- a/kubernetes/helm/templates/webui-ingress.yaml
+++ b/kubernetes/helm/templates/webui-ingress.yaml
@ -1,11 +1,13 @@
 {{- if .Values.webui.ingress.enabled }}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: ollama-webui-ingress
  namespace: {{ .Values.namespace }}
-  #annotations:
+{{- if .Values.webui.ingress.annotations }}
-    # Use appropriate annotations for your Ingress controller, e.g., for NGINX:
+  annotations:
-    # nginx.ingress.kubernetes.io/rewrite-target: /
+{{ toYaml .Values.webui.ingress.annotations | trimSuffix "\n" | indent 4 }}
 {{- end }}
 spec:
  rules:
  - host: {{ .Values.webui.ingress.host }}
@ -18,3 +20,4 @@ spec:
            name: ollama-webui-service
            port:
              number: {{ .Values.webui.servicePort }}
 {{- end }}
--- a/kubernetes/helm/templates/webui-pvc.yaml
+++ b/kubernetes/helm/templates/webui-pvc.yaml
@ -0,0 +1,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  labels:
    app: ollama-webui
  name: ollama-webui-pvc
  namespace: {{ .Values.namespace }}  
 spec:
  accessModes: [ "ReadWriteOnce" ]
  resources:
    requests:
      storage: {{ .Values.webui.volumeSize }}
--- a/kubernetes/helm/templates/webui-service.yaml
+++ b/kubernetes/helm/templates/webui-service.yaml
@ -4,7 +4,7 @@ metadata:
  name: ollama-webui-service
  namespace: {{ .Values.namespace }}
 spec:
-  type: NodePort  # Use LoadBalancer if you're on a cloud that supports it
+  type: {{ .Values.webui.service.type }} # Default: NodePort  # Use LoadBalancer if you're on a cloud that supports it
  selector:
    app: ollama-webui
  ports:
--- a/kubernetes/helm/values.yaml
+++ b/kubernetes/helm/values.yaml
@ -10,6 +10,12 @@ ollama:
      memory: "2Gi"
      nvidia.com/gpu: "0"
  volumeSize: 1Gi
  nodeSelector: {}
  tolerations: []
  service:
    type: ClusterIP
  gpu:
    enabled: false
 webui:
  replicaCount: 1
@ -20,4 +26,13 @@ webui:
      cpu: "500m"
      memory: "500Mi"
  ingress:
    enabled: true
    annotations:
    # Use appropriate annotations for your Ingress controller, e.g., for NGINX:
      # nginx.ingress.kubernetes.io/rewrite-target: /
    host: ollama.minikube.local
  volumeSize: 1Gi
  nodeSelector: {}
  tolerations: []
  service:
    type: NodePort
--- a/run-compose.sh
+++ b/run-compose.sh
@ -80,12 +80,12 @@ usage() {
    echo "  -h, --help                 Show this help message."
    echo ""
    echo "Examples:"
-    echo "  ./$0 --drop"
+    echo "  $0 --drop"
-    echo "  ./$0 --enable-gpu[count=1]"
+    echo "  $0 --enable-gpu[count=1]"
-    echo "  ./$0 --enable-api[port=11435]"
+    echo "  $0 --enable-api[port=11435]"
-    echo "  ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]"
+    echo "  $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]"
-    echo "  ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]"
+    echo "  $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]"
-    echo "  ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build"
+    echo "  $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build"
    echo ""
    echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration."
    echo "About the gpu to use, the script automatically detects it using the "lspci" command."