aiben / helm /h2ogpt-chart /values.yaml
abugaber's picture
Upload folder using huggingface_hub
3943768 verified
nameOverride: ""
fullnameOverride: ""
namespaceOverride: ""
h2ogpt:
enabled: true
stack:
# -- Run h2oGPT and vLLM on same pod.
enabled: false
replicaCount: 1
imagePullSecrets:
image:
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
tag:
pullPolicy: IfNotPresent
initImage:
repository:
tag:
pullPolicy:
# extra volumes, for more certs, mount under /etc/ssl/more-certs
extraVolumes: []
extraVolumeMounts: []
podAffinity:
# -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
# hostname:
# zone:
storage:
size: 128Gi
class:
useEphemeral: true
externalLLM:
enabled: false
secret:
modelLock:
openAIAzure:
enabled: false
openAI:
enabled: False
replicate:
enabled: false
visionModels:
enabled: false
# -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model
# -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
visibleModels: []
rotateAlignResizeImage: false
# -- Example configs to use when not using Model Lock and External LLM
# overrideConfig:
# base_model: h2oai/h2ogpt-4096-llama2-7b-chat
# use_safetensors: True
# prompt_type: llama2
# save_dir: /workspace/save/
# use_gpu_id: False
# score_model: None
# max_max_new_tokens: 2048
# max_new_tokens: 1024
overrideConfig:
visible_login_tab: False
visible_system_tab: False
visible_models_tab: False
visible_hosts_tab: False
# change below to valid vision model or remove this entry
#visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']"
rotate_align_resize_image: False
concurrency_count: 100
top_k_docs_max_show: 100
num_async: 10
# change below to valid directory or remove this entry
#save_dir: "/docker_logs"
score_model: "None"
enable_tts: False
enable_stt: False
enable_transcriptions: False
embedding_gpu_id: "cpu"
hf_embedding_model: "fake"
openai_server: True
share: False
enforce_h2ogpt_api_key: True
enforce_h2ogpt_ui_key: False
# change to something secure for ui access to backend
#h2ogpt_api_keys: "['api_key_change_me']"
metadata_in_context: ""
# change or remove if using model hub
#use_auth_token: "hf_xxxxx"
# change below to first visible model or remove this entry
#visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']"
# change so ui or api cannot access without this password
#admin_pass: "admin_password_change_me"
service:
type: NodePort
webPort: 80
openaiPort: 5000
functionPort: 5002
agentsPort: 5004
gptPort: 8888
webServiceAnnotations: {}
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser:
runAsGroup:
fsGroup:
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
resources:
nodeSelector:
tolerations:
env: {}
podAnnotations: {}
podLabels: {}
autoscaling: {}
tgi:
enabled: false
replicaCount: 1
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: 0.9.3
pullPolicy: IfNotPresent
podAffinity:
# -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
# hostname:
# zone:
storage:
size: 512Gi
class:
useEphemeral: true
overrideConfig:
hfSecret:
containerArgs:
service:
type: ClusterIP
port: 8080
updateStrategy:
type: RollingUpdate
podSecurityContext:
securityContext:
resources:
nodeSelector:
tolerations:
env: {}
podAnnotations: {}
podLabels: {}
autoscaling: {}
vllm:
enabled: false
replicaCount: 1
image:
repository: vllm/vllm-openai
tag: latest
pullPolicy: IfNotPresent
podAffinity:
# -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
# hostname:
# zone:
imagePullSecrets:
storage:
size: 512Gi
class:
useEphemeral: true
overrideConfig:
containerArgs:
- "--model"
- h2oai/h2ogpt-4096-llama2-7b-chat
- "--tokenizer"
- hf-internal-testing/llama-tokenizer
- "--tensor-parallel-size"
- 2
- "--seed"
- 1234
- "--trust-remote-code"
service:
type: ClusterIP
port: 5000
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser:
runAsGroup:
fsGroup:
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
env:
VLLM_NO_USAGE_STATS: "1"
DO_NOT_TRACK: "1"
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}
lmdeploy:
enabled: false
replicaCount: 1
image:
repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy
tag:
pullPolicy: IfNotPresent
podAffinity:
# -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
# hostname:
# zone:
storage:
size: 512Gi
class:
useEphemeral: true
overrideConfig:
hfSecret:
containerArgs:
- "OpenGVLab/InternVL-Chat-V1-5"
service:
type: ClusterIP
port: 23333
updateStrategy:
type: RollingUpdate
podSecurityContext:
securityContext:
resources:
nodeSelector:
tolerations:
env: {}
podAnnotations: {}
podLabels: {}
autoscaling: {}
# -- CA certs
caCertificates: ""