llama-server helm release

No introduction found. Create it?

Name	Repo	Stars	Version	Timestamp
llama-server	joryirving/home-ops	216	5.0.1	7 hours ago
llama-server	joryirving/home-ops	216	5.0.1	7 hours ago

Name

Repo

Stars

Version

Timestamp

llama-server

joryirving/home-ops

216

5.0.1

7 hours ago

llama-server

joryirving/home-ops

216

5.0.1

7 hours ago

Key	Types
service.app.ports.http.port (3) `8080`	number
service.app.ports.http.targetPort (1) `8080`	number
service.app.annotations."lbipam.cilium.io/ips" (2) `10.69.10.20`	string
service.app.externalTrafficPolicy (2) `Cluster`	string
service.app.type (2) `LoadBalancer`	string
service.app.controller (1) `app`	string
controllers.llama-server.annotations."reloader.stakater.com/auto" (2) `true`	string
controllers.llama-server.containers.app.args[] (2) - --host - "0.0.0.0" - --port - "8080" - --alias - self-hosted - --model - /models/gemma4-26b/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf - --mmproj - /models/gemma4-26b/mmproj-F16.gguf - --ctx-size - "262144" - --n-gpu-layers - "99" - --flash-attn - on - --parallel - "3" - --cont-batching - -sps - "0.90" - --no-context-shift - --cache-prompt - --kv-unified - --ctx-checkpoints - "4" - --checkpoint-every-n-tokens - "16384" - --image-min-tokens - "1120" - --image-max-tokens - "1120" - --metrics - --temp - "0.4" - --top-p - "0.92" - --top-k - "40" - --min-p - "0" - --batch-size - "4096" - --ubatch-size - "2048" - --threads - "12" - --threads-batch - "16" - --no-mmap	string
controllers.llama-server.containers.app.command[] (2) `- llama-server`	string
controllers.llama-server.containers.app.image.pullPolicy (2) `Always`	string
controllers.llama-server.containers.app.image.repository (2) `docker.io/kyuz0/amd-strix-halo-toolboxes`	string
controllers.llama-server.containers.app.image.tag (2) `rocm-7.2.3@sha256:a07e7576a800a8f77cd6ae1a4522d6c7e005f49cf047470a5183a2e1a4031135`	string
controllers.llama-server.containers.app.probes.liveness.custom (2) `true`	boolean
controllers.llama-server.containers.app.probes.liveness.enabled (2) `true`	boolean
controllers.llama-server.containers.app.probes.liveness.spec.failureThreshold (2) `6`	number
controllers.llama-server.containers.app.probes.liveness.spec.httpGet.path (2) `/health`	string
controllers.llama-server.containers.app.probes.liveness.spec.httpGet.port (2) `8080`	number
controllers.llama-server.containers.app.probes.liveness.spec.initialDelaySeconds (2) `30`	number
controllers.llama-server.containers.app.probes.liveness.spec.periodSeconds (2) `30`	number
controllers.llama-server.containers.app.probes.readiness.custom (2) `true`	boolean
controllers.llama-server.containers.app.probes.readiness.enabled (2) `true`	boolean
controllers.llama-server.containers.app.probes.readiness.spec.failureThreshold (2) `6`	number
controllers.llama-server.containers.app.probes.readiness.spec.httpGet.path (2) `/health`	string
controllers.llama-server.containers.app.probes.readiness.spec.httpGet.port (2) `8080`	number
controllers.llama-server.containers.app.probes.readiness.spec.initialDelaySeconds (2) `20`	number
controllers.llama-server.containers.app.probes.readiness.spec.periodSeconds (2) `10`	number
controllers.llama-server.containers.app.probes.startup.custom (2) `true`	boolean
controllers.llama-server.containers.app.probes.startup.enabled (2) `true`	boolean
controllers.llama-server.containers.app.probes.startup.spec.failureThreshold (2) `360`	number
controllers.llama-server.containers.app.probes.startup.spec.httpGet.path (2) `/health`	string
controllers.llama-server.containers.app.probes.startup.spec.httpGet.port (2) `8080`	number
controllers.llama-server.containers.app.probes.startup.spec.periodSeconds (2) `10`	number
controllers.llama-server.containers.app.resources.limits.memory (2) `80Gi`	string
controllers.llama-server.containers.app.resources.requests.cpu (2) `1`	number, string
controllers.llama-server.containers.app.resources.requests.memory (2) `20Gi`	string
controllers.llama-server.containers.app.securityContext.privileged (2) `true`	boolean
controllers.llama-server.initContainers.model-download.command[] (2) - /bin/sh - -ec - set -euo pipefail if [ ! -s /models/gemma4-26b/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf ] \|\| [ ! -s /models/gemma4-26b/mmproj-F16.gguf ]; then pip install --no-cache-dir "huggingface_hub[hf_transfer]" mkdir -p /models/gemma4-26b rm -rf /models/gemma4-26b/.cache/huggingface/download/gemma-4-26B-A4B-UD-Q5_K_XL.gguf.lock hf download \ unsloth/gemma-4-26B-A4B-it-GGUF \ gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf \ mmproj-F16.gguf \ --local-dir /models/gemma4-26b echo "Gemma-4-26B-A4B-Q5_K_XL download complete" else echo "Gemma-4-26B-A4B-Q5_K_XL already downloaded, skipping" fi	string
controllers.llama-server.initContainers.model-download.env.HF_HUB_ENABLE_HF_TRANSFER (2) `1`	string
controllers.llama-server.initContainers.model-download.envFrom[].secretRef.name (2) `huggingface`	string
controllers.llama-server.initContainers.model-download.image.repository (2) `python`	string
controllers.llama-server.initContainers.model-download.image.tag (2) `3.14-slim`	string
controllers.llama-server.initContainers.model-download.resources.limits.memory (2) `10Gi`	string
controllers.llama-server.initContainers.model-download.resources.requests.cpu (2) `250m`	string
controllers.llama-server.initContainers.model-download.resources.requests.memory (2) `512Mi`	string
controllers.app.annotations."reloader.stakater.com/auto" (1) `true`	string
controllers.app.containers.app.args[] (1) `- --host - "0.0.0.0" - --port - "8080" - --models-preset - /app/models.ini - --models-max - "1" - --parallel - "1" - --metrics - --slots`	string
controllers.app.containers.app.env.LLAMA_CACHE (1) `/cache`	string
controllers.app.containers.app.env.TZ (1) `${TIMEZONE}`	string
controllers.app.containers.app.image.repository (1) `ghcr.io/ggml-org/llama.cpp`	string
controllers.app.containers.app.image.tag (1) `server-cuda13@sha256:1b3694b9a8f68b95e4667c8b598c3e46b69dc2702ab227ebe2b2bd2282bb34d6`	string
controllers.app.containers.app.probes.liveness.custom (1) `true`	boolean
controllers.app.containers.app.probes.liveness.enabled (1) `true`	boolean
controllers.app.containers.app.probes.liveness.spec.failureThreshold (1) `3`	number
controllers.app.containers.app.probes.liveness.spec.httpGet.path (1) `/health`	string
controllers.app.containers.app.probes.liveness.spec.httpGet.port (1) `8080`	number
controllers.app.containers.app.probes.liveness.spec.initialDelaySeconds (1) `30`	number
controllers.app.containers.app.probes.liveness.spec.periodSeconds (1) `30`	number
controllers.app.containers.app.probes.liveness.spec.timeoutSeconds (1) `5`	number
controllers.app.containers.app.probes.readiness.custom (1) `true`	boolean
controllers.app.containers.app.probes.readiness.enabled (1) `true`	boolean
controllers.app.containers.app.probes.readiness.spec.failureThreshold (1) `3`	number
controllers.app.containers.app.probes.readiness.spec.httpGet.path (1) `/health`	string
controllers.app.containers.app.probes.readiness.spec.httpGet.port (1) `8080`	number
controllers.app.containers.app.probes.readiness.spec.initialDelaySeconds (1) `5`	number
controllers.app.containers.app.probes.readiness.spec.periodSeconds (1) `10`	number
controllers.app.containers.app.probes.readiness.spec.timeoutSeconds (1) `3`	number
controllers.app.containers.app.probes.startup.custom (1) `true`	boolean
controllers.app.containers.app.probes.startup.enabled (1) `true`	boolean
controllers.app.containers.app.probes.startup.spec.failureThreshold (1) `12`	number
controllers.app.containers.app.probes.startup.spec.httpGet.path (1) `/health`	string
controllers.app.containers.app.probes.startup.spec.httpGet.port (1) `8080`	number
controllers.app.containers.app.probes.startup.spec.initialDelaySeconds (1) `5`	number
controllers.app.containers.app.probes.startup.spec.periodSeconds (1) `5`	number
controllers.app.containers.app.probes.startup.spec.timeoutSeconds (1) `3`	number
controllers.app.containers.app.resources.limits.cpu (1) `8`	number
controllers.app.containers.app.resources.limits.memory (1) `26Gi`	string
controllers.app.containers.app.resources.limits."nvidia.com/gpu" (1) `1`	number
controllers.app.containers.app.resources.requests.cpu (1) `1`	number
controllers.app.containers.app.resources.requests.memory (1) `6Gi`	string
controllers.app.containers.app.resources.requests."nvidia.com/gpu" (1) `1`	number
defaultPodOptions.hostIPC (2) `true`	boolean
defaultPodOptions.nodeSelector."topology.kubernetes.io/gpus" (2) `amd`	string
defaultPodOptions.tolerations[].effect (2) `NoSchedule`	string
defaultPodOptions.tolerations[].key (2) `llm-workload`	string
defaultPodOptions.tolerations[].operator (2) `Exists`	string
defaultPodOptions.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[].matchExpressions[].key (1) `nvidia.com/gpu`	string
defaultPodOptions.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[].matchExpressions[].operator (1) `In`	string
defaultPodOptions.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[].matchExpressions[].values[] (1) `- true`	string
defaultPodOptions.runtimeClassName (1) `nvidia`	string
persistence.dev-dri.globalMounts[].path (2) `/dev/dri`	string
persistence.dev-dri.hostPath (2) `/dev/dri`	string
persistence.dev-dri.type (2) `hostPath`	string
persistence.dev-kfd.globalMounts[].path (2) `/dev/kfd`	string
persistence.dev-kfd.hostPath (2) `/dev/kfd`	string
persistence.dev-kfd.type (2) `hostPath`	string
persistence.models.existingClaim (2) `llm-models`	string
persistence.models.globalMounts[].path (2) `/models`	string
persistence.cache.enabled (1) `true`	boolean
persistence.cache.existingClaim (1) `llama-server`	string
persistence.cache.globalMounts[].path (1) `/cache`	string
persistence.config.globalMounts[].path (1) `/app/models.ini`	string
persistence.config.globalMounts[].readOnly (1) `true`	boolean
persistence.config.globalMounts[].subPath (1) `models.ini`	string
persistence.config.name (1) `llama-server-config`	string
persistence.config.type (1) `configMap`	string
route.app.hostnames[] (1) `- {{ .Release.Name }}.${SECRET_DOMAIN}`	string
route.app.parentRefs[].name (1) `envoy-internal`	string
route.app.parentRefs[].namespace (1) `network`	string
route.app.rules[].backendRefs[].identifier (1) `app`	string
route.app.rules[].backendRefs[].port (1) `80`	number

Key

Types

service.app.ports.http.port (3)

number

service.app.ports.http.targetPort (1)

number

service.app.annotations."lbipam.cilium.io/ips" (2)

10.69.10.20

string

service.app.externalTrafficPolicy (2)

Cluster

string

service.app.type (2)

LoadBalancer

string

service.app.controller (1)

app

string

controllers.llama-server.annotations."reloader.stakater.com/auto" (2)

true

string

controllers.llama-server.containers.app.args[] (2)

- --host
- "0.0.0.0"
- --port
- "8080"
- --alias
- self-hosted
- --model
- /models/gemma4-26b/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf
- --mmproj
- /models/gemma4-26b/mmproj-F16.gguf
- --ctx-size
- "262144"
- --n-gpu-layers
- "99"
- --flash-attn
- on
- --parallel
- "3"
- --cont-batching
- -sps
- "0.90"
- --no-context-shift
- --cache-prompt
- --kv-unified
- --ctx-checkpoints
- "4"
- --checkpoint-every-n-tokens
- "16384"
- --image-min-tokens
- "1120"
- --image-max-tokens
- "1120"
- --metrics
- --temp
- "0.4"
- --top-p
- "0.92"
- --top-k
- "40"
- --min-p
- "0"
- --batch-size
- "4096"
- --ubatch-size
- "2048"
- --threads
- "12"
- --threads-batch
- "16"
- --no-mmap

string

controllers.llama-server.containers.app.command[] (2)

- llama-server

string

controllers.llama-server.containers.app.image.pullPolicy (2)

Always

string

controllers.llama-server.containers.app.image.repository (2)

docker.io/kyuz0/amd-strix-halo-toolboxes

string

controllers.llama-server.containers.app.image.tag (2)

rocm-7.2.3@sha256:a07e7576a800a8f77cd6ae1a4522d6c7e005f49cf047470a5183a2e1a4031135

string

controllers.llama-server.containers.app.probes.liveness.custom (2)

true

boolean

controllers.llama-server.containers.app.probes.liveness.enabled (2)

true

boolean

controllers.llama-server.containers.app.probes.liveness.spec.failureThreshold (2)

number

controllers.llama-server.containers.app.probes.liveness.spec.httpGet.path (2)

/health

string

controllers.llama-server.containers.app.probes.liveness.spec.httpGet.port (2)

number

controllers.llama-server.containers.app.probes.liveness.spec.initialDelaySeconds (2)

number

controllers.llama-server.containers.app.probes.liveness.spec.periodSeconds (2)

number

controllers.llama-server.containers.app.probes.readiness.custom (2)

true

boolean

controllers.llama-server.containers.app.probes.readiness.enabled (2)

true

boolean

controllers.llama-server.containers.app.probes.readiness.spec.failureThreshold (2)

number

controllers.llama-server.containers.app.probes.readiness.spec.httpGet.path (2)

/health

string

controllers.llama-server.containers.app.probes.readiness.spec.httpGet.port (2)

number

controllers.llama-server.containers.app.probes.readiness.spec.initialDelaySeconds (2)

number

controllers.llama-server.containers.app.probes.readiness.spec.periodSeconds (2)

number

controllers.llama-server.containers.app.probes.startup.custom (2)

true

boolean

controllers.llama-server.containers.app.probes.startup.enabled (2)

true

boolean

controllers.llama-server.containers.app.probes.startup.spec.failureThreshold (2)

number

controllers.llama-server.containers.app.probes.startup.spec.httpGet.path (2)

/health

string

controllers.llama-server.containers.app.probes.startup.spec.httpGet.port (2)

number

controllers.llama-server.containers.app.probes.startup.spec.periodSeconds (2)

number

controllers.llama-server.containers.app.resources.limits.memory (2)

80Gi

string

controllers.llama-server.containers.app.resources.requests.cpu (2)

number, string

controllers.llama-server.containers.app.resources.requests.memory (2)

20Gi

string

controllers.llama-server.containers.app.securityContext.privileged (2)

true

boolean

controllers.llama-server.initContainers.model-download.command[] (2)

- /bin/sh
- -ec
- set -euo pipefail

if [ ! -s /models/gemma4-26b/gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf ] || [ ! -s /models/gemma4-26b/mmproj-F16.gguf ]; then
  pip install --no-cache-dir "huggingface_hub[hf_transfer]"
  mkdir -p /models/gemma4-26b
  rm -rf /models/gemma4-26b/.cache/huggingface/download/gemma-4-26B-A4B-UD-Q5_K_XL.gguf.lock
  hf download \
    unsloth/gemma-4-26B-A4B-it-GGUF \
    gemma-4-26B-A4B-it-UD-Q5_K_XL.gguf \
    mmproj-F16.gguf \
    --local-dir /models/gemma4-26b
  echo "Gemma-4-26B-A4B-Q5_K_XL download complete"
else
  echo "Gemma-4-26B-A4B-Q5_K_XL already downloaded, skipping"
fi

string

controllers.llama-server.initContainers.model-download.env.HF_HUB_ENABLE_HF_TRANSFER (2)

string

controllers.llama-server.initContainers.model-download.envFrom[].secretRef.name (2)

huggingface

string

controllers.llama-server.initContainers.model-download.image.repository (2)

python

string

controllers.llama-server.initContainers.model-download.image.tag (2)

3.14-slim

string

controllers.llama-server.initContainers.model-download.resources.limits.memory (2)

10Gi

string

controllers.llama-server.initContainers.model-download.resources.requests.cpu (2)

250m

string

controllers.llama-server.initContainers.model-download.resources.requests.memory (2)

512Mi

string

controllers.app.annotations."reloader.stakater.com/auto" (1)

true

string

controllers.app.containers.app.args[] (1)

- --host
- "0.0.0.0"
- --port
- "8080"
- --models-preset
- /app/models.ini
- --models-max
- "1"
- --parallel
- "1"
- --metrics
- --slots

string

controllers.app.containers.app.env.LLAMA_CACHE (1)

/cache

string

controllers.app.containers.app.env.TZ (1)

${TIMEZONE}

string

controllers.app.containers.app.image.repository (1)

ghcr.io/ggml-org/llama.cpp

string

controllers.app.containers.app.image.tag (1)

server-cuda13@sha256:1b3694b9a8f68b95e4667c8b598c3e46b69dc2702ab227ebe2b2bd2282bb34d6

string

controllers.app.containers.app.probes.liveness.custom (1)

true

boolean

controllers.app.containers.app.probes.liveness.enabled (1)

true

boolean

controllers.app.containers.app.probes.liveness.spec.failureThreshold (1)

number

controllers.app.containers.app.probes.liveness.spec.httpGet.path (1)

/health

string

controllers.app.containers.app.probes.liveness.spec.httpGet.port (1)

number

controllers.app.containers.app.probes.liveness.spec.initialDelaySeconds (1)

number

controllers.app.containers.app.probes.liveness.spec.periodSeconds (1)

number

controllers.app.containers.app.probes.liveness.spec.timeoutSeconds (1)

number

controllers.app.containers.app.probes.readiness.custom (1)

true

boolean

controllers.app.containers.app.probes.readiness.enabled (1)

true

boolean

controllers.app.containers.app.probes.readiness.spec.failureThreshold (1)

number

controllers.app.containers.app.probes.readiness.spec.httpGet.path (1)

/health

string

controllers.app.containers.app.probes.readiness.spec.httpGet.port (1)

number

controllers.app.containers.app.probes.readiness.spec.initialDelaySeconds (1)

number

controllers.app.containers.app.probes.readiness.spec.periodSeconds (1)

number

controllers.app.containers.app.probes.readiness.spec.timeoutSeconds (1)

number

controllers.app.containers.app.probes.startup.custom (1)

true

boolean

controllers.app.containers.app.probes.startup.enabled (1)

true

boolean

controllers.app.containers.app.probes.startup.spec.failureThreshold (1)

number

controllers.app.containers.app.probes.startup.spec.httpGet.path (1)

/health

string

controllers.app.containers.app.probes.startup.spec.httpGet.port (1)

number

controllers.app.containers.app.probes.startup.spec.initialDelaySeconds (1)

number

controllers.app.containers.app.probes.startup.spec.periodSeconds (1)

number

controllers.app.containers.app.probes.startup.spec.timeoutSeconds (1)

number

controllers.app.containers.app.resources.limits.cpu (1)

number

controllers.app.containers.app.resources.limits.memory (1)

26Gi

string

controllers.app.containers.app.resources.limits."nvidia.com/gpu" (1)

number

controllers.app.containers.app.resources.requests.cpu (1)

number

controllers.app.containers.app.resources.requests.memory (1)

6Gi

string

controllers.app.containers.app.resources.requests."nvidia.com/gpu" (1)

number

defaultPodOptions.hostIPC (2)

true

boolean

defaultPodOptions.nodeSelector."topology.kubernetes.io/gpus" (2)

amd

string

defaultPodOptions.tolerations[].effect (2)

NoSchedule

string

defaultPodOptions.tolerations[].key (2)

llm-workload

string

defaultPodOptions.tolerations[].operator (2)

Exists

string

defaultPodOptions.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[].matchExpressions[].key (1)

nvidia.com/gpu

string

defaultPodOptions.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[].matchExpressions[].operator (1)

In

string

defaultPodOptions.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[].matchExpressions[].values[] (1)

- true

string

defaultPodOptions.runtimeClassName (1)

nvidia

string

persistence.dev-dri.globalMounts[].path (2)

/dev/dri

string

persistence.dev-dri.hostPath (2)

/dev/dri

string

persistence.dev-dri.type (2)

hostPath

string

persistence.dev-kfd.globalMounts[].path (2)

/dev/kfd

string

persistence.dev-kfd.hostPath (2)

/dev/kfd

string

persistence.dev-kfd.type (2)

hostPath

string

persistence.models.existingClaim (2)

llm-models

string

persistence.models.globalMounts[].path (2)

/models

string

persistence.cache.enabled (1)

true

boolean

persistence.cache.existingClaim (1)

llama-server

string

persistence.cache.globalMounts[].path (1)

/cache

string

persistence.config.globalMounts[].path (1)

/app/models.ini

string

persistence.config.globalMounts[].readOnly (1)

true

boolean

persistence.config.globalMounts[].subPath (1)

models.ini

string

persistence.config.name (1)

llama-server-config

string

persistence.config.type (1)

configMap

string

route.app.hostnames[] (1)

- {{ .Release.Name }}.${SECRET_DOMAIN}

string

route.app.parentRefs[].name (1)

envoy-internal

string

route.app.parentRefs[].namespace (1)

network

string

route.app.rules[].backendRefs[].identifier (1)

app

string

route.app.rules[].backendRefs[].port (1)

number

llama-server helm

Install

Examples

Top Repositories (2 out of 3)

Values