nameOverride: "" fullnameOverride: "" namespaceOverride: "" h2ogpt: enabled: true stack: # -- Run h2oGPT and vLLM on same pod. enabled: false replicaCount: 1 imagePullSecrets: image: repository: gcr.io/vorvan/h2oai/h2ogpt-runtime tag: pullPolicy: IfNotPresent initImage: repository: tag: pullPolicy: # extra volumes, for more certs, mount under /etc/ssl/more-certs extraVolumes: [] extraVolumeMounts: [] podAffinity: # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: storage: size: 128Gi class: useEphemeral: true externalLLM: enabled: false secret: modelLock: openAIAzure: enabled: false openAI: enabled: False replicate: enabled: false visionModels: enabled: false # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] visibleModels: [] rotateAlignResizeImage: false # -- Example configs to use when not using Model Lock and External LLM # overrideConfig: # base_model: h2oai/h2ogpt-4096-llama2-7b-chat # use_safetensors: True # prompt_type: llama2 # save_dir: /workspace/save/ # use_gpu_id: False # score_model: None # max_max_new_tokens: 2048 # max_new_tokens: 1024 overrideConfig: visible_login_tab: False visible_system_tab: False visible_models_tab: False visible_hosts_tab: False # change below to valid vision model or remove this entry #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']" rotate_align_resize_image: False concurrency_count: 100 top_k_docs_max_show: 100 num_async: 10 # change below to valid directory or remove this entry #save_dir: "/docker_logs" score_model: "None" enable_tts: False enable_stt: False enable_transcriptions: False embedding_gpu_id: "cpu" hf_embedding_model: "fake" openai_server: True share: False enforce_h2ogpt_api_key: True enforce_h2ogpt_ui_key: False # change to something secure for ui access to backend #h2ogpt_api_keys: "['api_key_change_me']" metadata_in_context: "" # change or remove if using model hub #use_auth_token: "hf_xxxxx" # change below to first visible model or remove this entry #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']" # change so ui or api cannot access without this password #admin_pass: "admin_password_change_me" service: type: NodePort webPort: 80 openaiPort: 5000 functionPort: 5002 agentsPort: 5004 gptPort: 8888 webServiceAnnotations: {} updateStrategy: type: RollingUpdate podSecurityContext: runAsNonRoot: true runAsUser: runAsGroup: fsGroup: securityContext: runAsNonRoot: true allowPrivilegeEscalation: false capabilities: drop: - ALL seccompProfile: type: RuntimeDefault resources: nodeSelector: tolerations: env: {} podAnnotations: {} podLabels: {} autoscaling: {} tgi: enabled: false replicaCount: 1 image: repository: ghcr.io/huggingface/text-generation-inference tag: 0.9.3 pullPolicy: IfNotPresent podAffinity: # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: storage: size: 512Gi class: useEphemeral: true overrideConfig: hfSecret: containerArgs: service: type: ClusterIP port: 8080 updateStrategy: type: RollingUpdate podSecurityContext: securityContext: resources: nodeSelector: tolerations: env: {} podAnnotations: {} podLabels: {} autoscaling: {} vllm: enabled: false replicaCount: 1 image: repository: vllm/vllm-openai tag: latest pullPolicy: IfNotPresent podAffinity: # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: imagePullSecrets: storage: size: 512Gi class: useEphemeral: true overrideConfig: containerArgs: - "--model" - h2oai/h2ogpt-4096-llama2-7b-chat - "--tokenizer" - hf-internal-testing/llama-tokenizer - "--tensor-parallel-size" - 2 - "--seed" - 1234 - "--trust-remote-code" service: type: ClusterIP port: 5000 updateStrategy: type: RollingUpdate podSecurityContext: runAsNonRoot: true runAsUser: runAsGroup: fsGroup: securityContext: runAsNonRoot: true allowPrivilegeEscalation: false capabilities: drop: - ALL seccompProfile: env: VLLM_NO_USAGE_STATS: "1" DO_NOT_TRACK: "1" resources: nodeSelector: tolerations: podAnnotations: {} podLabels: {} autoscaling: {} lmdeploy: enabled: false replicaCount: 1 image: repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy tag: pullPolicy: IfNotPresent podAffinity: # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: storage: size: 512Gi class: useEphemeral: true overrideConfig: hfSecret: containerArgs: - "OpenGVLab/InternVL-Chat-V1-5" service: type: ClusterIP port: 23333 updateStrategy: type: RollingUpdate podSecurityContext: securityContext: resources: nodeSelector: tolerations: env: {} podAnnotations: {} podLabels: {} autoscaling: {} # -- CA certs caCertificates: ""