diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..472338f843d024e95386b6c4998a74a217d6f46f
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,17 @@
+.git
+.npm
+.dockerignore
+.pytest_cache
+.cache
+.local
+.github
+.nv
+.benchmarks
+.bash_history
+.gitignore
+h2ogpt.egg-info
+venv
+build
+dist
+prebuilt_deps
+Dockerfile
\ No newline at end of file
diff --git a/.env b/.env
new file mode 100644
index 0000000000000000000000000000000000000000..816579700495d96cd927d7be24f2ded059037ae4
--- /dev/null
+++ b/.env
@@ -0,0 +1,16 @@
+
+# H2OGPT
+
+H2OGPT_PORT=7860
+H2OGPT_BASE_MODEL=h2oai/h2ogpt-4096-llama2-7b-chat
+H2OGPT_ARGS="/workspace/generate.py --base_model=${H2OGPT_BASE_MODEL} --use_safetensors=True --prompt_type=llama2 --save_dir=/workspace/save/ --use_gpu_id=False --score_model=None --max_max_new_tokens=2048 --max_new_tokens=1024"
+
+# VLLM
+
+VLLM_TOKENIZER=hf-internal-testing/llama-tokenizer
+H2OGPT_VLLM_ARGS="--model=${H2OGPT_BASE_MODEL} --tokenizer=${VLLM_TOKENIZER} --tensor-parallel-size=2 --seed=1234 --trust-remote-code --download-dir=/workspace/.cache/huggingface/hub"
+
+# CPU models
+
+MODEL_PATH_LLAMA=https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf
+H2OGPT_CPU_ARGS="/workspace/generate.py --base_model=llama --model_path_llama=${MODEL_PATH_LLAMA} --max_seq_len=4096"
diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..5348a6019e62d392f63de57f81844cfff494a53e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+data/demo.png filter=lfs diff=lfs merge=lfs -text
+docs/aws_sagemaker_endpoint_setup.pdf filter=lfs diff=lfs merge=lfs -text
+tests/CityofTshwaneWater.pdf filter=lfs diff=lfs merge=lfs -text
+tests/ocr2.png filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/python-package-publish.yml b/.github/workflows/python-package-publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..885da0c9c3e5ad6c2f4937615f0fc878b6f6e12f
--- /dev/null
+++ b/.github/workflows/python-package-publish.yml
@@ -0,0 +1,57 @@
+name: Build & Publish h2oGPT Python wheel to PYPI
+
+on:
+ workflow_dispatch:
+ inputs:
+ pypi-index:
+ type: choice
+ description: PyPI index that needed to be published
+ required: true
+ default: Test-PyPI
+ options:
+ - PyPI
+ - Test-PyPI
+ version:
+ description: |
+ Override the current version for the python package for dev purposes when uploading to Test-PyPI
+ type: string
+
+jobs:
+ build_and_upload:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3.5.3
+
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+
+ - name: Install Dependencies
+ run: |
+ python3.10 -m pip install --upgrade pip
+ python3.10 -m pip install setuptools wheel twine --upgrade
+
+ - name: Modify Version
+ if: ${{ inputs.version != ''}}
+ run: |
+ echo ${{ inputs.version}} > version.txt
+ echo "h2ogpt-wheel-version = $(cat version.txt)"
+
+ - name: Build Wheel
+ run: make clean dist
+
+ - name: Publish to Test-PyPI
+ if: ${{ inputs.pypi-index == 'Test-PyPI' }}
+ run: |
+ twine upload -r testpypi dist/*
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+
+ - name: Publish to PyPI
+ if: ${{ inputs.pypi-index == 'PyPI' }}
+ run: |
+ twine upload dist/*
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
diff --git a/.github/workflows/snyk-scan.yml b/.github/workflows/snyk-scan.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3ac2eb7fda343301f1bcdb6f514fabb02ed93058
--- /dev/null
+++ b/.github/workflows/snyk-scan.yml
@@ -0,0 +1,76 @@
+name: Snyk Security Vulnerability Scan
+
+on:
+ workflow_dispatch:
+ pull_request:
+ push:
+ tags:
+ - 'v[0-9]+.[0-9]+.[0-9]+'
+ branches:
+ - main
+
+jobs:
+ snyk_scan_test:
+ if: ${{ github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }}
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@master
+ - uses: snyk/actions/setup@master
+
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+
+ - name: Check changed Deps files
+ uses: tj-actions/changed-files@v35
+ id: changed-files
+ with:
+ files: | # This will match all the files with below patterns
+ requirements.txt
+
+ - name: Scan python dependencies
+ if: contains(steps.changed-files.outputs.all_changed_and_modified_files, 'requirements.txt')
+ env:
+ SNYK_TOKEN: '${{ secrets.SNYK_TOKEN }}'
+ run: |
+ head -n 41 requirements.txt > temp-requirements.txt #remove test deps
+ python3.10 -m pip install -r temp-requirements.txt
+ snyk test \
+ -d \
+ --file=temp-requirements.txt \
+ --package-manager=pip \
+ --command=python3.10 \
+ --skip-unresolved \
+ --severity-threshold=high
+
+ snyk_scan_monitor:
+ if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch'}}
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@master
+ - uses: snyk/actions/setup@master
+
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+
+ - name: Extract github branch/tag name
+ shell: bash
+ run: echo "ref=$(echo ${GITHUB_REF##*/})" >> $GITHUB_OUTPUT
+ id: extract_ref
+
+ - name: Monitor python dependencies
+ env:
+ SNYK_TOKEN: '${{ secrets.SNYK_TOKEN }}'
+ run: |
+ head -n 41 requirements.txt > temp-requirements.txt #remove test deps
+ python3.10 -m pip install -r temp-requirements.txt
+ snyk monitor \
+ -d \
+ --file=temp-requirements.txt \
+ --command=python3.10 \
+ --package-manager=pip \
+ --skip-unresolved \
+ --remote-repo-url=h2ogpt/${{ steps.extract_ref.outputs.ref }} \
+ --org=h2o-gpt \
+ --project-name=H2O-GPT/h2ogpt/${{ steps.extract_ref.outputs.ref }}/requirements.txt
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..8a6fd7650a08329e01a29e3515a2e12730c066cd
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,44 @@
+out/
+7B/
+13B/
+__pycache__/
+checkpoint**
+minimal-llama**
+upload.py
+lora-**
+*ckpt
+wandb
+evaluate.py
+test_data.json
+todo.txt
+.neptune/
+*.bin
+db_dir_UserData
+temp_path_do_doc1
+offline_folder
+flagged_data_points
+.pytest_cache
+user_path
+user_path_test
+build
+h2ogpt.egg-info
+dist
+.idea
+.cache
+.local
+.bash_history
+.benchmarks
+Dockerfile-runner.dockerfile
+build_info.txt
+prebuilt_deps
+Dockerfile_deps
+
+# IDEs
+.idea/
+
+# virtual envs
+venv
+
+# Mac one click installer
+Tesseract-OCR/
+poppler/
diff --git a/.ipynb_checkpoints/generate-checkpoint.py b/.ipynb_checkpoints/generate-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2c333e6a29beb449674746cd2f333f55c3fe34b
--- /dev/null
+++ b/.ipynb_checkpoints/generate-checkpoint.py
@@ -0,0 +1,16 @@
+import os
+import sys
+
+if os.path.dirname(os.path.abspath(__file__)) not in sys.path:
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+from src.gen import main
+from src.utils import H2O_Fire
+
+
+def entrypoint_main():
+ H2O_Fire(main)
+
+
+if __name__ == "__main__":
+ entrypoint_main()
diff --git a/.ipynb_checkpoints/requirements-checkpoint.txt b/.ipynb_checkpoints/requirements-checkpoint.txt
new file mode 100644
index 0000000000000000000000000000000000000000..446c219d2e74588453e534bf25a532eda2252b04
--- /dev/null
+++ b/.ipynb_checkpoints/requirements-checkpoint.txt
@@ -0,0 +1,77 @@
+# for generate (gradio server) and finetune
+datasets==2.13.0
+sentencepiece==0.1.99
+gradio==3.50.2
+sse_starlette==1.8.2
+huggingface_hub==0.19.4
+appdirs==1.4.4
+fire==0.5.0
+docutils==0.20.1
+torch==2.1.2; sys_platform != "darwin" and platform_machine != "arm64"
+torch==2.1.2; sys_platform == "darwin" and platform_machine == "arm64"
+evaluate==0.4.0
+rouge_score==0.1.2
+sacrebleu==2.3.1
+scikit-learn==1.2.2
+# optional (need to uncomment code in gradio_runner.py for import of better_profanity)
+# alt-profanity-check==1.2.2
+# better-profanity==0.7.0
+numpy==1.24.3
+pandas==2.0.2
+matplotlib==3.7.1
+loralib==0.1.1
+bitsandbytes==0.41.3
+accelerate==0.25.0
+peft==0.7.1
+transformers==4.36.2
+tokenizers==0.15.0
+APScheduler==3.10.1
+
+# optional for generate
+pynvml==11.5.0
+psutil==5.9.5
+boto3==1.26.101
+botocore==1.29.101
+
+# optional for finetune
+tensorboard==2.13.0
+neptune==1.2.0
+
+# for gradio client
+gradio_client==0.6.1
+beautifulsoup4==4.12.2
+markdown==3.4.3
+
+# data and testing
+pytest==7.2.2
+pytest-xdist==3.2.1
+nltk==3.8.1
+textstat==0.7.3
+# pandoc==2.3
+pypandoc==1.11; sys_platform == "darwin" and platform_machine == "arm64"
+pypandoc_binary==1.11; platform_machine == "x86_64"
+pypandoc_binary==1.11; platform_system == "Windows"
+python-magic-bin==0.4.14; platform_system == "Windows"
+openpyxl==3.1.2
+lm_dataformat==0.0.20
+bioc==2.0
+
+# falcon
+einops==0.6.1
+instructorembedding==1.0.1
+
+# for gpt4all .env file, but avoid worrying about imports
+python-dotenv==1.0.0
+
+text-generation==0.6.1
+# for tokenization when don't have HF tokenizer
+tiktoken==0.5.2
+
+requests>=2.31.0
+httpx==0.24.1
+urllib3>=1.26.16
+filelock>=3.12.2
+joblib>=1.3.1
+tqdm>=4.65.0
+tabulate>=0.9.0
+packaging>=23.1
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..fbd7ba5e7f96962063b9ca2cdeb032a4b5b6b94b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,35 @@
+# devel needed for bitsandbytes requirement of libcudart.so, otherwise runtime sufficient
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+ENV PATH="/h2ogpt_conda/bin:${PATH}"
+ARG PATH="/h2ogpt_conda/bin:${PATH}"
+
+ENV HOME=/workspace
+ENV CUDA_HOME=/usr/local/cuda-11.8
+ENV VLLM_CACHE=/workspace/.vllm_cache
+ENV TIKTOKEN_CACHE_DIR=/workspace/tiktoken_cache
+
+WORKDIR /workspace
+
+COPY . /workspace/
+
+RUN cd /workspace && ./docker_build_script_ubuntu.sh
+
+RUN chmod -R a+rwx /workspace
+
+ARG user=h2ogpt
+ARG group=h2ogpt
+ARG uid=1000
+ARG gid=1000
+
+RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/bash ${user}
+
+EXPOSE 8888
+EXPOSE 7860
+EXPOSE 5000
+
+USER h2ogpt
+
+ENTRYPOINT ["python3.10"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..25ae4110625608b553d170b6bb5c439215503afe
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..b0f04ecc910974fbd7aaffdfa07c0852279b7587
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,116 @@
+all: clean dist
+
+PACKAGE_VERSION := `cat version.txt | tr -d '\n'`
+BUILD_TAG := $(shell git describe --always --dirty)
+DOCKER_TEST_IMAGE := harbor.h2o.ai/h2ogpt/test-image:$(BUILD_TAG)
+PYTHON_BINARY ?= `which python`
+DEFAULT_MARKERS ?= "not need_tokens and not need_gpu"
+
+.PHONY: venv dist test publish docker_build build_info.txt
+
+clean:
+ rm -rf dist build h2ogpt.egg-info
+
+venv:
+ $(PYTHON_BINARY) -m virtualenv -p $(PYTHON_BINARY) venv
+
+install:
+ $(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl
+
+install-%:
+ $(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl[$*]
+
+dist:
+ $(PYTHON_BINARY) setup.py bdist_wheel
+
+test:
+ $(PYTHON_BINARY) -m pip install requirements-parser
+ $(PYTHON_BINARY) -m pytest tests --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"
+
+test_imports:
+ $(PYTHON_BINARY) -m pytest tests/test_imports.py --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"
+
+publish:
+ echo "Publishing not implemented yet."
+
+build_info.txt:
+ @rm -rf build_info.txt
+ @echo "commit=\"$(shell git rev-parse HEAD)\"" >> $@
+ @echo "branch=\"`git rev-parse HEAD | git branch -a --contains | grep -v detached | sed -e 's~remotes/origin/~~g' -e 's~^ *~~' | sort | uniq | tr '*\n' ' '`\"" >> $@
+ @echo "describe=\"`git describe --always --dirty`\"" >> $@
+ @echo "build_os=\"`uname -a`\"" >> $@
+ @echo "build_machine=\"`hostname`\"" >> $@
+ @echo "build_date=\"$(shell date "+%Y%m%d")\"" >> $@
+ @echo "build_user=\"`id -u -n`\"" >> $@
+ @echo "base_version=\"$(PACKAGE_VERSION)\"" >> $@
+
+git_hash.txt:
+ @echo "$(shell git rev-parse HEAD)" >> $@
+
+# Deprecated for now, no 0.4.1 on pypi, use release binary wheel that has no CUDA errors anymore
+docker_build_deps:
+ @cp docker_build_script_ubuntu.sh docker_build_script_ubuntu.sh.back
+ @sed -i '/# Install prebuilt dependencies/,$$d' docker_build_script_ubuntu.sh
+ @docker build -t h2ogpt-deps-builder -f Dockerfile .
+ @mv docker_build_script_ubuntu.sh.back docker_build_script_ubuntu.sh
+ @mkdir -p prebuilt_deps
+ @docker run \
+ --rm \
+ -it \
+ --entrypoint bash \
+ --runtime nvidia \
+ -v `pwd`:/dot \
+ -v /etc/passwd:/etc/passwd:ro \
+ -v /etc/group:/etc/group:ro \
+ -u `id -u`:`id -g` \
+ h2ogpt-deps-builder -c " \
+ mkdir -p /dot/prebuilt_deps && cd /dot/prebuilt_deps && \
+ GITHUB_ACTIONS=true python3.10 -m pip install auto-gptq==0.4.2 --no-cache-dir --use-deprecated=legacy-resolver && \
+ python3.10 -m pip wheel auto-gptq==0.4.2 \
+ "
+ @docker run \
+ --rm \
+ -it \
+ --entrypoint bash \
+ -v `pwd`:/dot \
+ quay.io/pypa/manylinux2014_x86_64 -c " \
+ ln -s /usr/local/bin/python3.10 /usr/local/bin/python3 && cd /tmp && \
+ git clone https://github.com/h2oai/duckdb.git && \
+ cd duckdb && \
+ git checkout dcd8c1ffc53dd020623630efb99ba6a3a4cbc5ad && \
+ BUILD_PYTHON=1 make release && \
+ cd tools/pythonpkg && \
+ python3.10 setup.py bdist_wheel && \
+ cp dist/duckdb-0.*.whl /dot/prebuilt_deps \
+ "
+ s3cmd put prebuilt_deps/auto_gptq-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl s3://artifacts.h2o.ai/deps/h2ogpt/ && \
+ s3cmd setacl s3://artifacts.h2o.ai/deps/h2ogpt/auto_gptq-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl --acl-public
+ s3cmd put prebuilt_deps/duckdb-0.8.2.dev4026+gdcd8c1ffc5-cp310-cp310-linux_x86_64.whl s3://artifacts.h2o.ai/deps/h2ogpt/ && \
+ s3cmd setacl s3://artifacts.h2o.ai/deps/h2ogpt/duckdb-0.8.2.dev4026+gdcd8c1ffc5-cp310-cp310-linux_x86_64.whl --acl-public
+
+docker_build: build_info.txt
+ifeq ($(shell curl --connect-timeout 4 --write-out %{http_code} -sS --output /dev/null -X GET http://harbor.h2o.ai/api/v2.0/projects/h2ogpt/repositories/test-image/artifacts/$(BUILD_TAG)/tags),200)
+ @echo "Image already pushed to Harbor: $(DOCKER_TEST_IMAGE)"
+else
+ DOCKER_BUILDKIT=1 docker build -t $(DOCKER_TEST_IMAGE) -f Dockerfile .
+ docker push $(DOCKER_TEST_IMAGE)
+endif
+
+just_docker_build: build_info.txt
+ DOCKER_BUILDKIT=1 docker build -t $(DOCKER_TEST_IMAGE) -f Dockerfile .
+
+docker_build_runner: docker_build
+ -docker pull $(DOCKER_TEST_IMAGE)
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(BUILD_TAG)
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:latest
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(BUILD_TAG)
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:latest
+ifdef BUILD_ID
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)
+endif
+
+print-%:
+ @echo $($*)
diff --git a/README.md b/README.md
index 7c52ecc5c44c9b501e22875cfac517c2c14b5403..c2b432e913a2d08e77a1c1d986ebbec22fe80933 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,317 @@
---
-title: Test
-emoji: ⚡
-colorFrom: yellow
-colorTo: yellow
+title: test
+app_file: generate.py
sdk: gradio
-sdk_version: 4.15.0
-app_file: app.py
-pinned: false
+sdk_version: 3.50.2
---
+# h2oGPT
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Turn ★ into ⭐ (top-right corner) if you like the project!
+
+Query and summarize your documents or just chat with local private GPT LLMs using h2oGPT, an Apache V2 open-source project.
+
+- **Private** offline database of any documents [(PDFs, Excel, Word, Images, Video Frames, Youtube, Audio, Code, Text, MarkDown, etc.)](docs/README_LangChain.md#supported-datatypes)
+ - **Persistent** database (Chroma, Weaviate, or in-memory FAISS) using accurate embeddings (instructor-large, all-MiniLM-L6-v2, etc.)
+ - **Efficient** use of context using instruct-tuned LLMs (no need for LangChain's few-shot approach)
+ - **Parallel** summarization and extraction, reaching an output of 80 tokens per second with the 13B LLaMa2 model
+ - **HYDE** (Hypothetical Document Embeddings) for enhanced retrieval based upon LLM responses
+- **Variety** of models supported (LLaMa2, Mistral, Falcon, Vicuna, WizardLM. With AutoGPTQ, 4-bit/8-bit, LORA, etc.)
+ - **GPU** support from HF and LLaMa.cpp GGML models, and **CPU** support using HF, LLaMa.cpp, and GPT4ALL models
+ - **Attention Sinks** for [arbitrarily long](https://github.com/tomaarsen/attention_sinks) generation (LLaMa-2, Mistral, MPT, Pythia, Falcon, etc.)
+- **UI** or CLI with streaming of all models
+ - **Upload** and **View** documents through the UI (control multiple collaborative or personal collections)
+ - **Vision LLaVa** Model and **Stable Diffusion** Image Generation
+ - **Voice STT** using Whisper with streaming audio conversion
+ - **Voice TTS** using MIT-Licensed Microsoft Speech T5 with multiple voices and Streaming audio conversion
+ - **Voice TTS** using MPL2-Licensed TTS including Voice Cloning and Streaming audio conversion
+ - **AI Assistant Voice Control Mode** for hands-free control of h2oGPT chat
+ - **Bake-off** UI mode against many models at the same time
+ - **Easy Download** of model artifacts and control over models like LLaMa.cpp through the UI
+ - **Authentication** in the UI by user/password
+ - **State Preservation** in the UI by user/password
+- **Linux, Docker, macOS, and Windows** support
+ - [**Easy Windows Installer**](#windows-1011-64-bit-with-full-document-qa-capability) for Windows 10 64-bit (CPU/CUDA)
+ - [**Easy macOS Installer**](#macos-cpum1m2-with-full-document-qa-capability) for macOS (CPU/M1/M2)
+- **Inference Servers** support (HF TGI server, vLLM, Gradio, ExLLaMa, Replicate, OpenAI, Azure OpenAI, Anthropic)
+- **OpenAI-compliant**
+ - Server Proxy API (h2oGPT acts as drop-in-replacement to OpenAI server)
+ - Python client API (to talk to Gradio server)
+- **Web-Search** integration with Chat and Document Q/A
+- **Agents** for Search, Document Q/A, Python Code, CSV frames (Experimental, best with OpenAI currently)
+- **Evaluate** performance using reward models
+- **Quality** maintained with over 1000 unit and integration tests taking over 4 GPU-hours
+
+### Get Started
+
+[![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-docker?style=flat-square)](https://raw.githubusercontent.com/h2oai/h2ogpt/main/LICENSE)
+[![Linux](https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_LINUX.md)
+[![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_MACOS.md)
+[![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_WINDOWS.md)
+[![Docker](https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_DOCKER.md)
+
+
+To quickly try out h2oGPT with limited document Q/A capability, create a fresh Python 3.10 environment and run:
+* CPU or MAC (M1/M2):
+ ```bash
+ # for windows/mac use "set" or relevant environment setting mechanism
+ export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
+ ```
+* Linux/Windows CUDA:
+ ```bash
+ # for windows/mac use "set" or relevant environment setting mechanism
+ export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu118"
+ ```
+Then run the following commands on any system:
+ ```bash
+ git clone https://github.com/h2oai/h2ogpt.git
+ cd h2ogpt
+ pip install -r requirements.txt
+ pip install -r reqs_optional/requirements_optional_langchain.txt
+ pip install -r reqs_optional/requirements_optional_gpt4all.txt
+ pip install -r reqs_optional/requirements_optional_langchain.urls.txt
+ # GPL, only run next line if that is ok:
+ # pip install -r reqs_optional/requirements_optional_langchain.gpllike.txt
+
+ python generate.py --base_model=TheBloke/zephyr-7B-beta-GGUF --prompt_type=zephyr --max_seq_len=4096
+ ```
+Next, go to your browser by visiting [http://127.0.0.1:7860](http://127.0.0.1:7860) or [http://localhost:7860](http://localhost:7860). Choose 13B for a better model than 7B.
+If you encounter issues with `llama-cpp-python` or other packages that try to compile and fail, try binary wheels for your platform as linked in the detailed instructions below. For AVX1 or AMD ROC systems, edit `reqs_optional/requirements_optional_gpt4all.txt` to choose valid packages.
+
+We recommend quantized models for most small-GPU systems, e.g. [LLaMa-2-7B-Chat-GGUF](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf) for 9GB+ GPU memory or larger models like [LLaMa-2-13B-Chat-GGUF](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-13b-chat.Q6_K.gguf) if you have 16GB+ GPU memory.
+
+---
+
+Note that for all platforms, some packages such as DocTR, Unstructured, BLIP, Stable Diffusion, etc. download models at runtime that appear to delay operations in the UI. The progress appears in the console logs.
+
+#### Windows 10/11 64-bit with full document Q/A capability
+ * One-Click Installer
+ * CPU or GPU: Download [h2oGPT Windows Installer](https://h2o-release.s3.amazonaws.com/h2ogpt/Jan2024/h2oGPT_0.0.1.exe) (1.3GB file)
+ * Once installed, feel free to change start directory for icon from `%HOMEDRIVE%\%HOMEPATH%` to (e.g.) `%HOMEDRIVE%\%HOMEPATH%\h2ogpt_data` so all created files (like database) go there. All paths saved are relative to this path.
+ * CPU: Click the h2oGPT icon in the Start menu. Give it about 15 seconds to open in a browser if many optional packages are included. By default, the browser will launch with the actual local IP address, not localhost.
+ * GPU: Before starting, run the following commands (replace `pseud` with your user):
+ ```
+ C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe -m pip uninstall -y torch
+ C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/torch-2.1.2%2Bcu118-cp310-cp310-win_amd64.whl
+ ```
+ Now click the h2oGPT icon in the Start menu. Give it about 20 seconds to open in a browser if many optional packages are included. By default, the browser will launch with the actual local IP address, not localhost.
+ * To debug any issues, run the following (replace `pseud` with your user):
+ ```
+ C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe "C:\Users\pseud\AppData\Local\Programs\h2oGPT\h2oGPT.launch.pyw"
+ ```
+ Any start-up exceptions are appended to log, e.g. `C:\Users\pseud\h2ogpt_exception.log`.
+ * To control startup, tweak the python startup file, e.g. for user `pseud`: `C:\Users\pseud\AppData\Local\Programs\h2oGPT\pkgs\win_run_app.py`
+ * In this Python code, set ENVs anywhere before main_h2ogpt() is called
+ * E.g. `os.environ['name'] = 'value'`, e.g. `os.environ['n_jobs'] = '10'` (must be always a string).
+ * Environment variables can be changed, e.g.:
+ * `n_jobs`: number of cores for various tasks
+ * `OMP_NUM_THREADS` thread count for LLaMa
+ * `CUDA_VISIBLE_DEVICES` which GPUs are used. Recommend set to single fast GPU, e.g. `CUDA_VISIBLE_DEVICES=0` if have multiple GPUs. Note that UI cannot control which GPUs (or CPU mode) for LLaMa models.
+ * Any CLI argument from `python generate.py --help` with environment variable set as `h2ogpt_x`, e.g. `h2ogpt_h2ocolors` to `False`.
+ * Set env `h2ogpt_server_name` to actual IP address for LAN to see app, e.g. `h2ogpt_server_name` to `192.168.1.172` and allow access through firewall if have Windows Defender activated.
+ * One can tweak installed h2oGPT code at, e.g. `C:\Users\pseud\AppData\Local\Programs\h2oGPT`.
+ * To terminate the app, go to System Tab and click Admin and click Shutdown h2oGPT.
+ * If startup fails, run as console and check for errors, e.g. and kill any old Python processes.
+
+ * [Full Windows 10/11 Manual Installation Script](docs/README_WINDOWS.md)
+ * Single `.bat` file for installation (if you do not skip any optional packages, takes about 9GB filled on disk).
+ * Recommend base Conda env, which allows for DocTR that requires pygobject that has otherwise no support (except `mysys2` that cannot be used by h2oGPT).
+ * Also allows for the TTS package by Coqui, which is otherwise not currently enabled in the one-click installer.
+
+---
+
+#### Linux (CPU/CUDA) with full document Q/A capability
+ * [Docker Build and Run Docs](docs/README_DOCKER.md)
+ * [Linux Manual Install and Run Docs](docs/README_LINUX.md)
+
+---
+
+#### macOS (CPU/M1/M2) with full document Q/A capability
+* One-click Installers (Experimental and subject to changes)
+
+ Nov 08, 2023
+ - [h2ogpt-osx-m1-cpu](https://h2o-release.s3.amazonaws.com/h2ogpt/Nov2023/h2ogpt-osx-m1-cpu)
+ - [h2ogpt-osx-m1-gpu](https://h2o-release.s3.amazonaws.com/h2ogpt/Nov2023/h2ogpt-osx-m1-gpu)
+
+ Download the runnable file and open it from the Finder. It will take a few minutes to unpack and run the application.
+ These one-click installers are experimental. Report any issues with steps to reproduce at https://github.com/h2oai/h2ogpt/issues.
+
+ **Note:** The app bundle is unsigned. If you experience any issues with running the app, run the following commands:
+ ```bash
+ $ xattr -dr com.apple.quarantine {file-path}/h2ogpt-osx-m1-gpu
+ $ chmod +x {file-path}/h2ogpt-osx-m1-gpu
+ ```
+* [macOS Manual Install and Run Docs](docs/README_MACOS.md)
+
+---
+
+#### Example Models
+* [Highest accuracy and speed](https://huggingface.co/h2oai/h2ogpt-4096-llama2-70b-chat) on 16-bit with TGI/vLLM using ~48GB/GPU when in use (4xA100 high concurrency, 2xA100 for low concurrency)
+* [Middle-range accuracy](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2) on 16-bit with TGI/vLLM using ~45GB/GPU when in use (2xA100)
+* [Small memory profile with ok accuracy](https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF) 16GB GPU if full GPU offloading
+* [Balanced accuracy and size](https://huggingface.co/h2oai/h2ogpt-4096-llama2-13b-chat) on 16-bit with TGI/vLLM using ~45GB/GPU when in use (1xA100)
+* [Smallest or CPU friendly](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF) 32GB system ram or 9GB GPU if full GPU offloading
+* [Best for 4*A10G using g5.12xlarge](https://huggingface.co/TheBloke/Llama-2-70B-chat-AWQ) AWQ LLaMa 70B using 4*A10G using vLLM
+
+**GPU** mode requires CUDA support via torch and transformers. A 7B/13B model in 16-bit uses 14GB/26GB of GPU memory to store the weights (2 bytes per weight). Compression such as 4-bit precision (bitsandbytes, AWQ, GPTQ, etc.) can further reduce memory requirements down to less than 6GB when asking a question about your documents. (For more information, see [low-memory mode](docs/FAQ.md#low-memory-mode).)
+
+**CPU** mode uses GPT4ALL and LLaMa.cpp, e.g. gpt4all-j, requiring about 14GB of system RAM in typical use.
+
+---
+
+### Live Demos
+- [![img-small.png](docs/img-small.png) Live h2oGPT Document Q/A Demo](https://gpt.h2o.ai/)
+- [🤗 Live h2oGPT Chat Demo 1](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot)
+- [🤗 Live h2oGPT Chat Demo 2](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot2)
+- [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT CPU](https://colab.research.google.com/drive/13RiBdAFZ6xqDwDKfW6BG_-tXfXiqPNQe?usp=sharing)
+- [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT GPU](https://colab.research.google.com/drive/143-KFHs2iCqXTQLI2pFCDiR69z0dR8iE?usp=sharing)
+
+### Inference Benchmarks for Summarization & Generation
+
+* [Benchmark results for Llama2](https://github.com/h2oai/h2ogpt/blob/main/benchmarks/perf.md)
+* [pytest to create benchmark results](https://github.com/h2oai/h2ogpt/blob/main/tests/test_perf_benchmarks.py)
+* [Raw benchmark results (JSON)](https://github.com/h2oai/h2ogpt/blob/main/benchmarks/perf.json)
+
+### Resources
+- [Discord](https://discord.gg/WKhYMWcVbq)
+- [Models (LLaMa-2, Falcon 40, etc.) at 🤗](https://huggingface.co/h2oai/)
+- [YouTube: 100% Offline ChatGPT Alternative?](https://www.youtube.com/watch?v=Coj72EzmX20)
+- [YouTube: Ultimate Open-Source LLM Showdown (6 Models Tested) - Surprising Results!](https://www.youtube.com/watch?v=FTm5C_vV_EY)
+- [YouTube: Blazing Fast Falcon 40b 🚀 Uncensored, Open-Source, Fully Hosted, Chat With Your Docs](https://www.youtube.com/watch?v=H8Dx-iUY49s)
+- [Technical Paper: https://arxiv.org/pdf/2306.08161.pdf](https://arxiv.org/pdf/2306.08161.pdf)
+
+### Partners
+
+- [Live Leaderboard](https://evalgpt.ai/) for GPT-4 Elo Evaluation of Instruct/Chat models with [h2o-LLM-eval](https://github.com/h2oai/h2o-LLM-eval).
+- Advanced fine-tuning with [H2O LLM Studio](https://github.com/h2oai/h2o-llmstudio)
+
+### Video Demo
+
+https://github.com/h2oai/h2ogpt/assets/2249614/2f805035-2c85-42fb-807f-fd0bca79abc6
+
+YouTube 4K version: https://www.youtube.com/watch?v=_iktbj4obAI
+
+### Docs Guide
+
+* [Get Started](#get-started)
+ * [Linux (CPU or CUDA)](docs/README_LINUX.md)
+ * [macOS (CPU or M1/M2)](docs/README_MACOS.md)
+ * [Windows 10/11 (CPU or CUDA)](docs/README_WINDOWS.md)
+ * [GPU (CUDA, AutoGPTQ, exllama) Running Details](docs/README_GPU.md)
+ * [CPU Running Details](docs/README_CPU.md)
+ * [CLI chat](docs/README_CLI.md)
+ * [Gradio UI](docs/README_ui.md)
+ * [Client API (Gradio, OpenAI-Compliant)](docs/README_CLIENT.md)
+ * [Inference Servers (HF TGI server, vLLM, Gradio, ExLLaMa, Replicate, OpenAI, Azure OpenAI)](docs/README_InferenceServers.md)
+ * [Python Wheel](docs/README_WHEEL.md)
+ * [Offline Installation](docs/README_offline.md)
+ * [Low Memory](docs/FAQ.md#low-memory-mode)
+ * [Docker](docs/README_DOCKER.md)
+* [LangChain Document Support](docs/README_LangChain.md)
+* [Compare to PrivateGPT et al.](docs/README_LangChain.md#what-is-h2ogpts-langchain-integration-like)
+* [Roadmap](#roadmap)
+* [Development](#development)
+* [Help](#help)
+ * [LangChain file types supported](docs/README_LangChain.md#supported-datatypes)
+ * [CLI Database control](docs/README_LangChain.md#database-creation)
+ * [FAQ](docs/FAQ.md)
+ * [Model Usage Notes](docs/FAQ.md#model-usage-notes)
+ * [Adding LLM Models (including using GGUF and Attention Sinks)](docs/FAQ.md#adding-models)
+ * [Adding Embedding Models](docs/FAQ.md#add-new-embedding-model)
+ * [Adding Prompts](docs/FAQ.md#adding-prompt-templates)
+ * [In-Context Learning](docs/FAQ.md#in-context-learning-via-prompt-engineering)
+ * [Multiple GPUs](docs/FAQ.md#multiple-gpus)
+ * [Low-Memory Usage](docs/FAQ.md#low-memory-mode)
+ * [Environment Variables](docs/FAQ.md#what-envs-can-i-pass-to-control-h2ogpt)
+ * [HTTPS access for server and client](docs/FAQ.md#https-access-for-server-and-client)
+ * [Useful Links](docs/LINKS.md)
+ * [Fine-Tuning](docs/FINETUNE.md)
+ * [Triton](docs/TRITON.md)
+ * [Commercial viability](docs/FAQ.md#commercial-viability)
+* [Acknowledgements](#acknowledgements)
+* [Why H2O.ai?](#why-h2oai)
+* [Disclaimer](#disclaimer)
+
+### Experimental features
+
+These are not part of normal installation instructions and are experimental.
+
+* [Agents](docs/README_Agents.md) -- in Alpha testing. Optimal for OpenAI, but that also fails sometimes.
+
+### Roadmap
+
+- Integration of code and resulting LLMs with downstream applications and low/no-code platforms
+- Complement h2oGPT chatbot with other APIs like [ToolBench](https://github.com/OpenBMB/ToolBench)
+- Enhance the model's code completion, reasoning, and mathematical capabilities, ensure factual correctness, minimize hallucinations, and avoid repetitive output
+- Add better agents for SQL and CSV question/answer
+
+### Development
+
+- To create a development environment for training and generation, follow the [installation instructions](docs/INSTALL.md).
+- To fine-tune any LLM models on your data, follow the [fine-tuning instructions](docs/FINETUNE.md).
+- To run h2oGPT tests:
+ ```bash
+ pip install requirements-parser pytest-instafail pytest-random-order
+ pip install playsound==1.3.0
+ pytest --instafail -s -v tests
+ # for client tests
+ make -C client setup
+ make -C client build
+ pytest --instafail -s -v client/tests
+ # for openai server test on already-running local server
+ pytest -s -v -n 4 openai_server/test_openai_server.py::test_openai_client
+ ```
+ or tweak/run `tests/test4gpus.sh` to run tests in parallel.
+
+### Help
+
+- [FAQs](docs/FAQ.md)
+
+- [README for LangChain](docs/README_LangChain.md)
+
+- Useful [links](docs/LINKS.md) for additional context and information on competitors, models, and datasets
+
+### Acknowledgements
+
+* Some training code was based upon March 24 version of [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/).
+* Used high-quality created data by [OpenAssistant](https://open-assistant.io/).
+* Used base models by [EleutherAI](https://www.eleuther.ai/).
+* Used OIG data created by [LAION](https://laion.ai/blog/oig-dataset/).
+
+### Why H2O.ai?
+
+Our [Makers](https://h2o.ai/company/team/) at [H2O.ai](https://h2o.ai) have built several world-class Machine Learning, Deep Learning and AI platforms:
+- #1 open-source machine learning platform for the enterprise [H2O-3](https://github.com/h2oai/h2o-3)
+- The world's best AutoML (Automatic Machine Learning) with [H2O Driverless AI](https://h2o.ai/platform/ai-cloud/make/h2o-driverless-ai/)
+- No-Code Deep Learning with [H2O Hydrogen Torch](https://h2o.ai/platform/ai-cloud/make/hydrogen-torch/)
+- Document Processing with Deep Learning in [Document AI](https://h2o.ai/platform/ai-cloud/make/document-ai/)
+
+We also built platforms for deployment and monitoring, and for data wrangling and governance:
+- [H2O MLOps](https://h2o.ai/platform/ai-cloud/operate/h2o-mlops/) to deploy and monitor models at scale
+- [H2O Feature Store](https://h2o.ai/platform/ai-cloud/make/feature-store/) in collaboration with AT&T
+- Open-source Low-Code AI App Development Frameworks [Wave](https://wave.h2o.ai/) and [Nitro](https://nitro.h2o.ai/)
+- Open-source Python [datatable](https://github.com/h2oai/datatable/) (the engine for H2O Driverless AI feature engineering)
+
+Many of our customers are creating models and deploying them enterprise-wide and at scale in the [H2O AI Cloud](https://h2o.ai/platform/ai-cloud/):
+- Multi-Cloud or on Premises
+- [Managed Cloud (SaaS)](https://h2o.ai/platform/ai-cloud/managed)
+- [Hybrid Cloud](https://h2o.ai/platform/ai-cloud/hybrid)
+- [AI Appstore](https://docs.h2o.ai/h2o-ai-cloud/)
+
+We are proud to have over 25 (of the world's 280) [Kaggle Grandmasters](https://h2o.ai/company/team/kaggle-grandmasters/) call H2O home, including three Kaggle Grandmasters who have made it to world #1.
+
+### Disclaimer
+
+Please read this disclaimer carefully before using the large language model provided in this repository. Your use of the model signifies your agreement to the following terms and conditions.
+
+- Biases and Offensiveness: The large language model is trained on a diverse range of internet text data, which may contain biased, racist, offensive, or otherwise inappropriate content. By using this model, you acknowledge and accept that the generated content may sometimes exhibit biases or produce content that is offensive or inappropriate. The developers of this repository do not endorse, support, or promote any such content or viewpoints.
+- Limitations: The large language model is an AI-based tool and not a human. It may produce incorrect, nonsensical, or irrelevant responses. It is the user's responsibility to critically evaluate the generated content and use it at their discretion.
+- Use at Your Own Risk: Users of this large language model must assume full responsibility for any consequences that may arise from their use of the tool. The developers and contributors of this repository shall not be held liable for any damages, losses, or harm resulting from the use or misuse of the provided model.
+- Ethical Considerations: Users are encouraged to use the large language model responsibly and ethically. By using this model, you agree not to use it for purposes that promote hate speech, discrimination, harassment, or any form of illegal or harmful activities.
+- Reporting Issues: If you encounter any biased, offensive, or otherwise inappropriate content generated by the large language model, please report it to the repository maintainers through the provided channels. Your feedback will help improve the model and mitigate potential issues.
+- Changes to this Disclaimer: The developers of this repository reserve the right to modify or update this disclaimer at any time without prior notice. It is the user's responsibility to periodically review the disclaimer to stay informed about any changes.
+
+By using the large language model provided in this repository, you agree to accept and comply with the terms and conditions outlined in this disclaimer. If you do not agree with any part of this disclaimer, you should refrain from using the model and any content generated by it.
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=h2oai/h2ogpt&type=Timeline)](https://star-history.com/#h2oai/h2ogpt&Timeline)
diff --git a/auth.json.lock b/auth.json.lock
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/benchmarks/llm_gpu_benchmark.py b/benchmarks/llm_gpu_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..107dd0b30542621ce9886213f8b3b5f70d88256b
--- /dev/null
+++ b/benchmarks/llm_gpu_benchmark.py
@@ -0,0 +1,123 @@
+
+
+# %%
+import json
+
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.io as pio
+from plotly.subplots import make_subplots
+
+# %%
+# Read the json file
+# This file processes the llm_gpu_benchmark.json file in the tmp/inputs folder
+# File is generated using the command
+# curl -sSL https://raw.githubusercontent.com/h2oai/h2ogpt/main/benchmarks/perf.json | jq -s '.' > llm_gpu_benchmarks.json
+with open('llm_gpu_benchmarks.json') as f:
+ data = json.load(f)
+del f
+
+# %%
+# Read the json file into a dataframe
+df = pd.json_normalize(data)
+del data
+
+# %%
+# Process the dataframe
+# Drop columns that are not needed
+df.drop(columns=['task', 'ngpus', 'reps', 'date', 'git_sha', 'transformers', 'bitsandbytes', 'cuda', 'hostname',
+ 'summarize_input_len_bytes'], inplace=True)
+# Rename columns
+df.rename(columns={'n_gpus': 'gpu_count'}, inplace=True)
+# Split the gpu column into gpu and gpu_memory
+df["gpu_name"] = df.gpus.str.extract(r'[1-9] x ([\w\- ]+) .+')
+df["gpu_memory_gb"] = round(
+ pd.to_numeric(df.gpus.str.extract(r'[\w ]+ \(([\d]+) .+', expand=False), errors='coerce') / 1024)
+df["gpu_memory_gb"] = df["gpu_memory_gb"].astype('Int64')
+df.drop(columns=['gpus'], inplace=True)
+# Manage gpu_names
+df.gpu_name = df.gpu_name.str.replace('NVIDIA ', '')
+df.gpu_name = df.gpu_name.str.replace('GeForce ', '')
+df.gpu_name = df.gpu_name.str.replace('A100-SXM4-80GB', 'A100 SXM4')
+df.gpu_name = df.gpu_memory_gb.astype(str) + "-" + df.gpu_name
+# Remove CPUs
+df.drop(df[df.gpu_name.isnull()].index, inplace=True)
+
+# %%
+# Remove duplicate rows
+df.drop_duplicates(['backend', 'base_model', 'bits', 'gpu_count', 'gpu_name'], inplace=True)
+
+# %% Add baseline comparison columns
+# Looking at the CPU data for 4, 8, and 16 bit quantization values for the benchmark we are simplifying it to a single
+# value
+cpu_summary_out_throughput = 1353 / 1216 # bytes/second (calculated from summarize_output_len_bytes / summarize_time)
+cpu_generate_out_throughput = 849 / 180 # bytes/second (calculated from generate_output_len_bytes / generate_time)
+
+# add GPU throughput columns
+df["summary_out_throughput"] = df.summarize_output_len_bytes / df.summarize_time
+df["generate_out_throughput"] = df.generate_output_len_bytes / df.generate_time
+# add GPU throughput boost columns
+df["summary_out_throughput_normalize"] = df.summary_out_throughput / cpu_summary_out_throughput
+df["generate_out_throughput_normalize"] = df.generate_out_throughput / cpu_generate_out_throughput
+
+# %%
+# df.to_excel('tmp/scratchpad/output/llm_gpu_benchmarks.xlsx', index=False)
+
+# %%
+pio.renderers.default = "browser"
+
+# %%
+bits_bar_colors = {'4': px.colors.qualitative.D3[0],
+ '8': px.colors.qualitative.D3[1],
+ '16': px.colors.qualitative.D3[2]}
+
+backends = list(df.backend.unique())
+base_models = list(df.base_model.unique())
+n_gpus = list(df.gpu_count.unique())
+
+# %%
+for backend in backends:
+ # for backend in ['transformers']:
+ fig_bar = make_subplots(rows=len(n_gpus),
+ cols=len(base_models) * 2,
+ shared_xaxes='all',
+ shared_yaxes='columns',
+ start_cell="top-left",
+ vertical_spacing=0.1,
+ print_grid=False,
+ row_titles=[f'{gpu_count} GPUs' for gpu_count in n_gpus],
+ column_titles=['llama2-7b-chat Summarization', 'llama2-7b-chat Generation',
+ 'llama2-13b-chat Summarization', 'llama2-13b-chat Generation',
+ 'llama2-70b-chat Summarization', 'llama2-70b-chat Generation'],)
+
+ # for base_model in ['h2oai/h2ogpt-4096-llama2-7b-chat']:
+ for base_model in base_models:
+ for gpu_count in n_gpus:
+ for bits in sorted(df.bits.unique()):
+ sub_df = df[(df.backend == backend) &
+ (df.base_model == base_model) &
+ (df.gpu_count == gpu_count) &
+ (df.bits == bits)].sort_values(by='gpu_name')
+ fig_bar.add_trace(go.Bar(x=sub_df.summary_out_throughput_normalize,
+ y=sub_df.gpu_name,
+ name=f'sum-{bits} bits',
+ legendgroup=f'sum-{bits} bits',
+ marker=dict(color=bits_bar_colors[f'{bits}']),
+ orientation='h'),
+ row=n_gpus.index(gpu_count) + 1,
+ col=base_models.index(base_model) * 2 + 1)
+ fig_bar.add_trace(go.Bar(x=sub_df.generate_out_throughput_normalize,
+ y=sub_df.gpu_name,
+ name=f'gen-{bits} bits',
+ legendgroup=f'gen-{bits} bits',
+ marker=dict(color=bits_bar_colors[f'{bits}']),
+ orientation='h'),
+ row=list(n_gpus).index(gpu_count) + 1,
+ col=list(base_models).index(base_model) * 2 + 2)
+
+ fig_bar.update_layout(plot_bgcolor='rgb(250,250,250)',
+ showlegend=True,
+ barmode="group")
+ # fig_bar.show()
+ fig_bar.write_html(f'llm_gpu_benchmark_{backend}.html', include_plotlyjs='cdn')
\ No newline at end of file
diff --git a/benchmarks/llm_gpu_benchmark_text-generation-inference.html b/benchmarks/llm_gpu_benchmark_text-generation-inference.html
new file mode 100644
index 0000000000000000000000000000000000000000..1843e3808994e88ccad66c2a57211e100b636846
--- /dev/null
+++ b/benchmarks/llm_gpu_benchmark_text-generation-inference.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/benchmarks/llm_gpu_benchmark_transformers.html b/benchmarks/llm_gpu_benchmark_transformers.html
new file mode 100644
index 0000000000000000000000000000000000000000..14ba4bac9d0d9ed21551760fc8a5021bd4214ce4
--- /dev/null
+++ b/benchmarks/llm_gpu_benchmark_transformers.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/benchmarks/llm_gpu_benchmarks.json b/benchmarks/llm_gpu_benchmarks.json
new file mode 100644
index 0000000000000000000000000000000000000000..157082aae7ac8ea6fde8371b96e9e6304a02feda
--- /dev/null
+++ b/benchmarks/llm_gpu_benchmarks.json
@@ -0,0 +1,2790 @@
+[
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 10:46:19",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 32.29472152392069,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 14.563165505727133
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 10:48:55",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 67.97515447934468,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 33.00641902287801
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 10:48:58",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1440,
+ "summarize_time": 114.62220064798991,
+ "generate_output_len_bytes": 2619,
+ "generate_time": 71.0722058614095
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 10:58:34",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 866,
+ "summarize_time": 39.54404203097025,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 22.466302394866943
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 11:01:59",
+ "git_sha": "55d3b55b",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 32.1394579410553,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 14.757195552190145
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 10:54:29",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 910,
+ "summarize_time": 185.14580019315085,
+ "generate_output_len_bytes": 2042,
+ "generate_time": 117.13909141222636
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 11:04:37",
+ "git_sha": "55d3b55b",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 94.98129558563232,
+ "generate_output_len_bytes": 2512,
+ "generate_time": 69.4871145884196
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 11:13:08",
+ "git_sha": "55d3b55b",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1276,
+ "summarize_time": 43.23498781522115,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 22.826789538065594
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 11:10:08",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 991,
+ "summarize_time": 90.51939169565837,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 48.96095744768778
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 11:16:48",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 31.86189842224121,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 14.209659894307455
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 11:17:39",
+ "git_sha": "55d3b55b",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 71.48081835110982,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 33.5740262667338
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 11:19:24",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 94.17744310696919,
+ "generate_output_len_bytes": 2512,
+ "generate_time": 70.12592967351277
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 11:27:57",
+ "git_sha": "55d3b55b",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1276,
+ "summarize_time": 42.8066500822703,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 22.626200040181477
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 11:23:22",
+ "git_sha": "55d3b55b",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 910,
+ "summarize_time": 186.88371555010477,
+ "generate_output_len_bytes": 2042,
+ "generate_time": 117.3530724843343
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 11:39:03",
+ "git_sha": "55d3b55b",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 991,
+ "summarize_time": 94.50985678037007,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 50.06416177749634
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 21:08:31",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 38.80374129613241,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 19.23690136273702
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 21:11:49",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 178.79640992482504,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 93.99476226170857
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 21:25:53",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 53.44271365801493,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 30.641155401865642
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 21:30:30",
+ "git_sha": "fc4826f2",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 40.80062770843506,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 19.825008392333984
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 21:35:29",
+ "git_sha": "fc4826f2",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 177.35046529769897,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 91.73111907641093
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 21:49:20",
+ "git_sha": "fc4826f2",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 56.894784371058144,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 32.15500020980835
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/18/2023 21:54:11",
+ "git_sha": "fc4826f2",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 41.46419604619344,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 20.049855709075928
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/18/2023 21:57:39",
+ "git_sha": "fc4826f2",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 183.73364853858948,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 94.9052836894989
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/18/2023 22:11:59",
+ "git_sha": "fc4826f2",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 59.204413731892906,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 33.25332593917847
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/18/2023 22:17:00",
+ "git_sha": "fc4826f2",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 42.09002653757731,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 20.106103817621868
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/18/2023 22:20:31",
+ "git_sha": "fc4826f2",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 185.28164370854697,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 95.13023789723714
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/18/2023 22:34:58",
+ "git_sha": "fc4826f2",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 60.9919019540151,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 34.328625202178955
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 13:31:34",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 52.49842747052511,
+ "generate_output_len_bytes": 2172,
+ "generate_time": 20.686774571736652
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 13:31:55",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 13:35:38",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1007,
+ "summarize_time": 168.9666860898336,
+ "generate_output_len_bytes": 2249,
+ "generate_time": 73.25518870353699
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 13:48:09",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 856,
+ "summarize_time": 45.30513469378153,
+ "generate_output_len_bytes": 1802,
+ "generate_time": 22.000216643015545
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 13:51:56",
+ "git_sha": "fc4826f2",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 51.64275654157003,
+ "generate_output_len_bytes": 2172,
+ "generate_time": 20.737667481104534
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 13:35:47",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 980,
+ "summarize_time": 280.4669913450877,
+ "generate_output_len_bytes": 2132,
+ "generate_time": 141.7793349424998
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 13:57:35",
+ "git_sha": "fc4826f2",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 869,
+ "summarize_time": 96.61887431144714,
+ "generate_output_len_bytes": 3244,
+ "generate_time": 82.98751719792683
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 13:55:51",
+ "git_sha": "fc4826f2",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1007,
+ "summarize_time": 167.52292919158936,
+ "generate_output_len_bytes": 2249,
+ "generate_time": 71.82611886660258
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 14:08:08",
+ "git_sha": "fc4826f2",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 856,
+ "summarize_time": 47.14254776636759,
+ "generate_output_len_bytes": 1802,
+ "generate_time": 22.54850967725118
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 14:15:15",
+ "git_sha": "d13230ee",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 14:07:15",
+ "git_sha": "fc4826f2",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 915,
+ "summarize_time": 89.59958203633626,
+ "generate_output_len_bytes": 2172,
+ "generate_time": 42.32424934705099
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 14:15:30",
+ "git_sha": "d13230ee",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1024,
+ "summarize_time": 185.44230167071024,
+ "generate_output_len_bytes": 2122,
+ "generate_time": 88.11553311347961
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 14:29:36",
+ "git_sha": "d13230ee",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 922,
+ "summarize_time": 68.06459252039592,
+ "generate_output_len_bytes": 1802,
+ "generate_time": 27.939613421758015
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 14:26:29",
+ "git_sha": "d13230ee",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 980,
+ "summarize_time": 280.8310640652974,
+ "generate_output_len_bytes": 2132,
+ "generate_time": 143.21916349728903
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 14:48:17",
+ "git_sha": "d13230ee",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 869,
+ "summarize_time": 98.47045453389485,
+ "generate_output_len_bytes": 3244,
+ "generate_time": 83.71360301971436
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 15:35:13",
+ "git_sha": "0dec0f52",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 15:49:33",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 16:26:53",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 16:27:32",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 16:29:03",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 17:26:02",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 18:59:16",
+ "git_sha": "5691db4a",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1075,
+ "summarize_time": 39.01545596122742,
+ "generate_output_len_bytes": 2242,
+ "generate_time": 10.151424566904703
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 19:03:13",
+ "git_sha": "5691db4a",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 940,
+ "summarize_time": 21.78233750661214,
+ "generate_output_len_bytes": 2130,
+ "generate_time": 15.794983307520548
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 19:38:40",
+ "git_sha": "6f05e8f1",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1114,
+ "summarize_time": 7.636120955149333,
+ "generate_output_len_bytes": 2275,
+ "generate_time": 7.922623078028361
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 19:41:02",
+ "git_sha": "6f05e8f1",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1024,
+ "summarize_time": 10.824170271555582,
+ "generate_output_len_bytes": 2130,
+ "generate_time": 9.209020694096884
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 19:55:17",
+ "git_sha": "2c548f21",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1088,
+ "summarize_time": 24.39883820215861,
+ "generate_output_len_bytes": 2275,
+ "generate_time": 12.755743900934855
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 00:57:21",
+ "git_sha": "a227be4f",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 37.113919814427696,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 18.36507821083069
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:00:31",
+ "git_sha": "a227be4f",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 49.79721482594808,
+ "generate_output_len_bytes": 2172,
+ "generate_time": 21.780913591384888
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:04:36",
+ "git_sha": "a227be4f",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:05:26",
+ "git_sha": "a227be4f",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 181.2461258570353,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 92.64811905225118
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:19:33",
+ "git_sha": "a227be4f",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 800,
+ "summarize_time": 174.4576851526896,
+ "generate_output_len_bytes": 2713,
+ "generate_time": 119.14412077267964
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:36:14",
+ "git_sha": "a227be4f",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 53.39731526374817,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 31.369641542434692
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:40:53",
+ "git_sha": "a227be4f",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1000,
+ "summarize_time": 74.27096923192342,
+ "generate_output_len_bytes": 1802,
+ "generate_time": 29.860486666361492
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 01:48:09",
+ "git_sha": "a227be4f",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 39.926851193110146,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 18.481745958328247
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 01:51:27",
+ "git_sha": "a227be4f",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 51.299002488454185,
+ "generate_output_len_bytes": 2172,
+ "generate_time": 21.828503131866455
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 01:56:20",
+ "git_sha": "a227be4f",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 178.19972308476767,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 91.73426882425944
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 02:10:13",
+ "git_sha": "a227be4f",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 800,
+ "summarize_time": 180.7814578215281,
+ "generate_output_len_bytes": 2713,
+ "generate_time": 124.72717420260112
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 02:26:43",
+ "git_sha": "a227be4f",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 57.08081785837809,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 32.26534946759542
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 02:31:36",
+ "git_sha": "a227be4f",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1000,
+ "summarize_time": 79.9461121559143,
+ "generate_output_len_bytes": 1802,
+ "generate_time": 31.403561115264893
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 02:38:23",
+ "git_sha": "a227be4f",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 42.33977222442627,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 19.723278522491455
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 02:41:52",
+ "git_sha": "a227be4f",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 55.377869288126625,
+ "generate_output_len_bytes": 2172,
+ "generate_time": 25.01458676656087
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 02:47:05",
+ "git_sha": "a227be4f",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 180.53432401021323,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 91.93375285466512
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 03:01:07",
+ "git_sha": "a227be4f",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 800,
+ "summarize_time": 179.50477250417075,
+ "generate_output_len_bytes": 2713,
+ "generate_time": 124.40728378295898
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 03:17:36",
+ "git_sha": "a227be4f",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 58.62867816289266,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 33.394495725631714
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 03:22:37",
+ "git_sha": "a227be4f",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1000,
+ "summarize_time": 78.90612125396729,
+ "generate_output_len_bytes": 1802,
+ "generate_time": 30.697617371877033
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/19/2023 03:29:20",
+ "git_sha": "a227be4f",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 40.498607873916626,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 19.509677171707153
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/19/2023 03:32:44",
+ "git_sha": "a227be4f",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 55.3964786529541,
+ "generate_output_len_bytes": 2172,
+ "generate_time": 24.347585439682007
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/19/2023 03:37:55",
+ "git_sha": "a227be4f",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1179,
+ "summarize_time": 186.71331850687662,
+ "generate_output_len_bytes": 2772,
+ "generate_time": 95.784650405248
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/19/2023 03:52:28",
+ "git_sha": "a227be4f",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 800,
+ "summarize_time": 185.3280005455017,
+ "generate_output_len_bytes": 2713,
+ "generate_time": 125.91738017400105
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/19/2023 04:09:18",
+ "git_sha": "a227be4f",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1002,
+ "summarize_time": 60.18280680974325,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 33.386961142222084
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/19/2023 04:14:25",
+ "git_sha": "a227be4f",
+ "n_gpus": 8,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1000,
+ "summarize_time": 83.04790727297465,
+ "generate_output_len_bytes": 1802,
+ "generate_time": 32.24992283185323
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 23:26:19",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 47.03754989306132,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 19.964784463246662
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 23:33:09",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 915,
+ "summarize_time": 71.91136892636617,
+ "generate_output_len_bytes": 2480,
+ "generate_time": 33.6295014222463
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 23:44:08",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 00:45:42",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1007,
+ "summarize_time": 148.61560583114624,
+ "generate_output_len_bytes": 2357,
+ "generate_time": 89.01266026496887
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 00:58:00",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 763,
+ "summarize_time": 193.99270629882812,
+ "generate_output_len_bytes": 2129,
+ "generate_time": 95.66660761833191
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:13:01",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:13:55",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 991,
+ "summarize_time": 61.52411222457886,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 32.030215660730995
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 01:19:00",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 1,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1000,
+ "summarize_time": 81.13888708750407,
+ "generate_output_len_bytes": 3486,
+ "generate_time": 55.5331826210022
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 01:27:49",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 47.41046245892843,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 20.660600344340008
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 01:34:28",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 915,
+ "summarize_time": 72.85646979014079,
+ "generate_output_len_bytes": 2480,
+ "generate_time": 34.05861854553223
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 02:39:22",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1007,
+ "summarize_time": 152.54357608159384,
+ "generate_output_len_bytes": 2357,
+ "generate_time": 91.51808977127075
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 02:52:58",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 763,
+ "summarize_time": 195.92926557858786,
+ "generate_output_len_bytes": 2129,
+ "generate_time": 96.55542047818501
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 03:15:01",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 991,
+ "summarize_time": 64.64422671000163,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 33.30378039677938
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 03:20:19",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 2,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1000,
+ "summarize_time": 84.57761120796204,
+ "generate_output_len_bytes": 3486,
+ "generate_time": 57.59072462717692
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 03:28:44",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1417,
+ "summarize_time": 49.08898218472799,
+ "generate_output_len_bytes": 2384,
+ "generate_time": 21.489527861277264
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 03:32:39",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 915,
+ "summarize_time": 74.43774898846944,
+ "generate_output_len_bytes": 2480,
+ "generate_time": 34.72673638661703
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 03:39:21",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1007,
+ "summarize_time": 153.41076453526816,
+ "generate_output_len_bytes": 2357,
+ "generate_time": 91.14894040425618
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 03:52:00",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 763,
+ "summarize_time": 199.79869039853415,
+ "generate_output_len_bytes": 2129,
+ "generate_time": 98.61504419644673
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 04:08:12",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 991,
+ "summarize_time": 66.49260465304057,
+ "generate_output_len_bytes": 2927,
+ "generate_time": 34.17951035499573
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 04:13:39",
+ "git_sha": "0cdb75ef",
+ "n_gpus": 4,
+ "transformers": "4.30.2",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1000,
+ "summarize_time": 87.65787092844646,
+ "generate_output_len_bytes": 3486,
+ "generate_time": 59.3750696182251
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 22:22:24",
+ "git_sha": "b63768c6",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 948,
+ "summarize_time": 122.13213857014973,
+ "generate_output_len_bytes": 2826,
+ "generate_time": 66.34098903338115
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/18/2023 22:33:33",
+ "git_sha": "c1348fb3",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 948,
+ "summarize_time": 120.53812781969707,
+ "generate_output_len_bytes": 2826,
+ "generate_time": 67.28052496910095
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 22:56:52",
+ "git_sha": "fb84de76",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1036,
+ "summarize_time": 29.128981749216717,
+ "generate_output_len_bytes": 2242,
+ "generate_time": 12.197122732798258
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/18/2023 23:00:33",
+ "git_sha": "fb84de76",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 05:47:43",
+ "git_sha": "22352acd",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 05:48:58",
+ "git_sha": "22352acd",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/19/2023 05:50:40",
+ "git_sha": "22352acd",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 948,
+ "summarize_time": 165.05752809842429,
+ "generate_output_len_bytes": 2605,
+ "generate_time": 93.80659619967143
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 06:05:51",
+ "git_sha": "22352acd",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 06:10:05",
+ "git_sha": "22352acd",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 906,
+ "summarize_time": 410.0691332022349,
+ "generate_output_len_bytes": 521,
+ "generate_time": 57.71272214253744
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 06:36:58",
+ "git_sha": "22352acd",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 948,
+ "summarize_time": 171.74388321240744,
+ "generate_output_len_bytes": 2605,
+ "generate_time": 97.00725762049358
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 06:51:13",
+ "git_sha": "22352acd",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 792,
+ "summarize_time": 267.0555826822917,
+ "generate_output_len_bytes": 2783,
+ "generate_time": 163.99818523724875
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 07:13:35",
+ "git_sha": "22352acd",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 906,
+ "summarize_time": 413.9569679101308,
+ "generate_output_len_bytes": 521,
+ "generate_time": 58.52583885192871
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/19/2023 07:38:02",
+ "git_sha": "22352acd",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 948,
+ "summarize_time": 175.4907926718394,
+ "generate_output_len_bytes": 2605,
+ "generate_time": 98.97720170021057
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/19/2023 12:35:08",
+ "git_sha": "29a002e5",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "timemachine",
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 983,
+ "summarize_time": 42.21107586224874,
+ "generate_output_len_bytes": 2130,
+ "generate_time": 16.94527777036031
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/21/2023 20:03:36",
+ "git_sha": "51318f44",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 41.0461368560791,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 19.614749511082966
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/21/2023 20:07:35",
+ "git_sha": "51318f44",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 42.8376894791921,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 20.2719091574351
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/21/2023 20:42:46",
+ "git_sha": "2f4bb620",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/21/2023 20:50:19",
+ "git_sha": "2f4bb620",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 915,
+ "summarize_time": 66.52468911806743,
+ "generate_output_len_bytes": 2479,
+ "generate_time": 29.828714847564697
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/21/2023 20:56:04",
+ "git_sha": "2f4bb620",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
+ "exception": "OOM"
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/21/2023 19:55:35",
+ "git_sha": "51318f44",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 38.753786404927574,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 19.529522736867268
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/21/2023 20:36:13",
+ "git_sha": "51318f44",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 41.024452924728394,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 20.29120985666911
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/21/2023 20:40:08",
+ "git_sha": "51318f44",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 54.554532527923584,
+ "generate_output_len_bytes": 2171,
+ "generate_time": 24.604793945948284
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/21/2023 20:50:05",
+ "git_sha": "51318f44",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 41.09950613975525,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 20.947362899780273
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/21/2023 20:54:08",
+ "git_sha": "51318f44",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 58.3172922929128,
+ "generate_output_len_bytes": 2171,
+ "generate_time": 25.735217014948528
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/21/2023 21:01:04",
+ "git_sha": "51318f44",
+ "n_gpus": 8,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 42.85940829912821,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 21.380353291829426
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/21/2023 21:05:24",
+ "git_sha": "51318f44",
+ "n_gpus": 8,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 54.235164642333984,
+ "generate_output_len_bytes": 2171,
+ "generate_time": 25.70338026682536
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/21/2023 21:10:37",
+ "git_sha": "51318f44",
+ "n_gpus": 8,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 927,
+ "summarize_time": 133.53030570348105,
+ "generate_output_len_bytes": 2782,
+ "generate_time": 72.97924383481343
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/21/2023 22:18:17",
+ "git_sha": "51318f44",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 927,
+ "summarize_time": 131.45291074117026,
+ "generate_output_len_bytes": 2782,
+ "generate_time": 72.30849742889404
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/21/2023 22:51:09",
+ "git_sha": "383b6bbc",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 39.269713958104454,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 19.65731406211853
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/21/2023 22:54:54",
+ "git_sha": "383b6bbc",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 51.84283971786499,
+ "generate_output_len_bytes": 2171,
+ "generate_time": 28.441521485646565
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/21/2023 23:13:10",
+ "git_sha": "383b6bbc",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 53.383726040522255,
+ "generate_output_len_bytes": 2171,
+ "generate_time": 24.422890504201252
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 4,
+ "reps": 3,
+ "date": "08/21/2023 23:18:04",
+ "git_sha": "383b6bbc",
+ "n_gpus": 4,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 52.791220347086586,
+ "generate_output_len_bytes": 2171,
+ "generate_time": 25.378511508305866
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 8,
+ "reps": 3,
+ "date": "08/21/2023 23:23:11",
+ "git_sha": "383b6bbc",
+ "n_gpus": 8,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.8",
+ "hostname": "cloudvm",
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1046,
+ "summarize_time": 56.3846542040507,
+ "generate_output_len_bytes": 2171,
+ "generate_time": 26.636192480723064
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 1,
+ "reps": 3,
+ "date": "08/21/2023 23:52:44",
+ "git_sha": "da69b822",
+ "n_gpus": 1,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1267,
+ "summarize_time": 40.36223220825195,
+ "generate_output_len_bytes": 2383,
+ "generate_time": 19.87660264968872
+ },
+ {
+ "backend": "text-generation-inference",
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 2,
+ "reps": 3,
+ "date": "08/22/2023 00:15:05",
+ "git_sha": "e843e8c3",
+ "n_gpus": 2,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "recypabaszmhhmuae",
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 915,
+ "summarize_time": 64.78201874097188,
+ "generate_output_len_bytes": 2479,
+ "generate_time": 29.02147897084554
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 16,
+ "ngpus": 0,
+ "reps": 3,
+ "date": "08/22/2023 19:01:15",
+ "git_sha": "855b7d15",
+ "n_gpus": 0,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "CPU",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1351,
+ "summarize_time": 1215.5185990333557,
+ "generate_output_len_bytes": 849,
+ "generate_time": 180.56836318969727
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 8,
+ "ngpus": 0,
+ "reps": 3,
+ "date": "08/22/2023 20:11:16",
+ "git_sha": "855b7d15",
+ "n_gpus": 0,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "CPU",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1353,
+ "summarize_time": 1216.9783231417339,
+ "generate_output_len_bytes": 849,
+ "generate_time": 180.42225472132364
+ },
+ {
+ "backend": "transformers",
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
+ "task": "summary_and_generate",
+ "bits": 4,
+ "ngpus": 0,
+ "reps": 3,
+ "date": "08/22/2023 21:21:20",
+ "git_sha": "855b7d15",
+ "n_gpus": 0,
+ "transformers": "4.31.0",
+ "bitsandbytes": "0.41.1",
+ "cuda": "11.7",
+ "hostname": "rippa",
+ "gpus": "CPU",
+ "summarize_input_len_bytes": 857252,
+ "summarize_output_len_bytes": 1354,
+ "summarize_time": 1217.1687794526417,
+ "generate_output_len_bytes": 843,
+ "generate_time": 180.78463260332742
+ }
+]
diff --git a/benchmarks/perf.json b/benchmarks/perf.json
new file mode 100644
index 0000000000000000000000000000000000000000..ea7c898206a8b1149c80e3b458770dea8ec0b9cf
--- /dev/null
+++ b/benchmarks/perf.json
@@ -0,0 +1,136 @@
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:46:19", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.29472152392069, "generate_output_len_bytes": 2384, "generate_time": 14.563165505727133}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:55", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 67.97515447934468, "generate_output_len_bytes": 2384, "generate_time": 33.00641902287801}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:58", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1440, "summarize_time": 114.62220064798991, "generate_output_len_bytes": 2619, "generate_time": 71.0722058614095}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:58:34", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 866, "summarize_time": 39.54404203097025, "generate_output_len_bytes": 2927, "generate_time": 22.466302394866943}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:01:59", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.1394579410553, "generate_output_len_bytes": 2384, "generate_time": 14.757195552190145}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:54:29", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 185.14580019315085, "generate_output_len_bytes": 2042, "generate_time": 117.13909141222636}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:04:37", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.98129558563232, "generate_output_len_bytes": 2512, "generate_time": 69.4871145884196}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:13:08", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 43.23498781522115, "generate_output_len_bytes": 2927, "generate_time": 22.826789538065594}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:10:08", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 90.51939169565837, "generate_output_len_bytes": 2927, "generate_time": 48.96095744768778}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:16:48", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 31.86189842224121, "generate_output_len_bytes": 2384, "generate_time": 14.209659894307455}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:17:39", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 71.48081835110982, "generate_output_len_bytes": 2384, "generate_time": 33.5740262667338}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:19:24", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.17744310696919, "generate_output_len_bytes": 2512, "generate_time": 70.12592967351277}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:27:57", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 42.8066500822703, "generate_output_len_bytes": 2927, "generate_time": 22.626200040181477}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:23:22", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 186.88371555010477, "generate_output_len_bytes": 2042, "generate_time": 117.3530724843343}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:39:03", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 94.50985678037007, "generate_output_len_bytes": 2927, "generate_time": 50.06416177749634}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:08:31", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.80374129613241, "generate_output_len_bytes": 2384, "generate_time": 19.23690136273702}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:11:49", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.79640992482504, "generate_output_len_bytes": 2772, "generate_time": 93.99476226170857}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:25:53", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.44271365801493, "generate_output_len_bytes": 2927, "generate_time": 30.641155401865642}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:30:30", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.80062770843506, "generate_output_len_bytes": 2384, "generate_time": 19.825008392333984}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:35:29", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 177.35046529769897, "generate_output_len_bytes": 2772, "generate_time": 91.73111907641093}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:49:20", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 56.894784371058144, "generate_output_len_bytes": 2927, "generate_time": 32.15500020980835}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:54:11", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.46419604619344, "generate_output_len_bytes": 2384, "generate_time": 20.049855709075928}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:57:39", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 183.73364853858948, "generate_output_len_bytes": 2772, "generate_time": 94.9052836894989}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/18/2023 22:11:59", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 59.204413731892906, "generate_output_len_bytes": 2927, "generate_time": 33.25332593917847}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:17:00", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.09002653757731, "generate_output_len_bytes": 2384, "generate_time": 20.106103817621868}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:20:31", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 185.28164370854697, "generate_output_len_bytes": 2772, "generate_time": 95.13023789723714}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:34:58", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.9919019540151, "generate_output_len_bytes": 2927, "generate_time": 34.328625202178955}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:34", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.49842747052511, "generate_output_len_bytes": 2172, "generate_time": 20.686774571736652}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:55", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:38", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 168.9666860898336, "generate_output_len_bytes": 2249, "generate_time": 73.25518870353699}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:48:09", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 45.30513469378153, "generate_output_len_bytes": 1802, "generate_time": 22.000216643015545}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:51:56", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.64275654157003, "generate_output_len_bytes": 2172, "generate_time": 20.737667481104534}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:47", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.4669913450877, "generate_output_len_bytes": 2132, "generate_time": 141.7793349424998}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:57:35", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 96.61887431144714, "generate_output_len_bytes": 3244, "generate_time": 82.98751719792683}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:55:51", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 167.52292919158936, "generate_output_len_bytes": 2249, "generate_time": 71.82611886660258}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:08:08", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 47.14254776636759, "generate_output_len_bytes": 1802, "generate_time": 22.54850967725118}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:15", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:07:15", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 89.59958203633626, "generate_output_len_bytes": 2172, "generate_time": 42.32424934705099}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:30", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 185.44230167071024, "generate_output_len_bytes": 2122, "generate_time": 88.11553311347961}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:29:36", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 922, "summarize_time": 68.06459252039592, "generate_output_len_bytes": 1802, "generate_time": 27.939613421758015}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:26:29", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.8310640652974, "generate_output_len_bytes": 2132, "generate_time": 143.21916349728903}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:48:17", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 98.47045453389485, "generate_output_len_bytes": 3244, "generate_time": 83.71360301971436}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:35:13", "git_sha": "0dec0f52", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:49:33", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:26:53", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:27:32", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:29:03", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 17:26:02", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 18:59:16", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1075, "summarize_time": 39.01545596122742, "generate_output_len_bytes": 2242, "generate_time": 10.151424566904703}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:03:13", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 940, "summarize_time": 21.78233750661214, "generate_output_len_bytes": 2130, "generate_time": 15.794983307520548}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:38:40", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1114, "summarize_time": 7.636120955149333, "generate_output_len_bytes": 2275, "generate_time": 7.922623078028361}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:41:02", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 10.824170271555582, "generate_output_len_bytes": 2130, "generate_time": 9.209020694096884}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:55:17", "git_sha": "2c548f21", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1088, "summarize_time": 24.39883820215861, "generate_output_len_bytes": 2275, "generate_time": 12.755743900934855}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:57:21", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 37.113919814427696, "generate_output_len_bytes": 2384, "generate_time": 18.36507821083069}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:00:31", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 49.79721482594808, "generate_output_len_bytes": 2172, "generate_time": 21.780913591384888}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:04:36", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:05:26", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 181.2461258570353, "generate_output_len_bytes": 2772, "generate_time": 92.64811905225118}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:33", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 174.4576851526896, "generate_output_len_bytes": 2713, "generate_time": 119.14412077267964}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:36:14", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.39731526374817, "generate_output_len_bytes": 2927, "generate_time": 31.369641542434692}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:40:53", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 74.27096923192342, "generate_output_len_bytes": 1802, "generate_time": 29.860486666361492}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:48:09", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.926851193110146, "generate_output_len_bytes": 2384, "generate_time": 18.481745958328247}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:51:27", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.299002488454185, "generate_output_len_bytes": 2172, "generate_time": 21.828503131866455}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:56:20", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.19972308476767, "generate_output_len_bytes": 2772, "generate_time": 91.73426882425944}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:10:13", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 180.7814578215281, "generate_output_len_bytes": 2713, "generate_time": 124.72717420260112}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:26:43", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 57.08081785837809, "generate_output_len_bytes": 2927, "generate_time": 32.26534946759542}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:31:36", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 79.9461121559143, "generate_output_len_bytes": 1802, "generate_time": 31.403561115264893}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:38:23", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.33977222442627, "generate_output_len_bytes": 2384, "generate_time": 19.723278522491455}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:41:52", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.377869288126625, "generate_output_len_bytes": 2172, "generate_time": 25.01458676656087}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:47:05", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 180.53432401021323, "generate_output_len_bytes": 2772, "generate_time": 91.93375285466512}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:01:07", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 179.50477250417075, "generate_output_len_bytes": 2713, "generate_time": 124.40728378295898}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:17:36", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 58.62867816289266, "generate_output_len_bytes": 2927, "generate_time": 33.394495725631714}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:22:37", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 78.90612125396729, "generate_output_len_bytes": 1802, "generate_time": 30.697617371877033}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:29:20", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.498607873916626, "generate_output_len_bytes": 2384, "generate_time": 19.509677171707153}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:32:44", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.3964786529541, "generate_output_len_bytes": 2172, "generate_time": 24.347585439682007}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:37:55", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 186.71331850687662, "generate_output_len_bytes": 2772, "generate_time": 95.784650405248}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:52:28", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 185.3280005455017, "generate_output_len_bytes": 2713, "generate_time": 125.91738017400105}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:09:18", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.18280680974325, "generate_output_len_bytes": 2927, "generate_time": 33.386961142222084}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:14:25", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 83.04790727297465, "generate_output_len_bytes": 1802, "generate_time": 32.24992283185323}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:26:19", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.03754989306132, "generate_output_len_bytes": 2384, "generate_time": 19.964784463246662}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:33:09", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 71.91136892636617, "generate_output_len_bytes": 2480, "generate_time": 33.6295014222463}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:44:08", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:45:42", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 148.61560583114624, "generate_output_len_bytes": 2357, "generate_time": 89.01266026496887}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:58:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 193.99270629882812, "generate_output_len_bytes": 2129, "generate_time": 95.66660761833191}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:01", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:55", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 61.52411222457886, "generate_output_len_bytes": 2927, "generate_time": 32.030215660730995}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 81.13888708750407, "generate_output_len_bytes": 3486, "generate_time": 55.5331826210022}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:27:49", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.41046245892843, "generate_output_len_bytes": 2384, "generate_time": 20.660600344340008}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:34:28", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 72.85646979014079, "generate_output_len_bytes": 2480, "generate_time": 34.05861854553223}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:39:22", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 152.54357608159384, "generate_output_len_bytes": 2357, "generate_time": 91.51808977127075}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:52:58", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 195.92926557858786, "generate_output_len_bytes": 2129, "generate_time": 96.55542047818501}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:15:01", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 64.64422671000163, "generate_output_len_bytes": 2927, "generate_time": 33.30378039677938}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:20:19", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 84.57761120796204, "generate_output_len_bytes": 3486, "generate_time": 57.59072462717692}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:28:44", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 49.08898218472799, "generate_output_len_bytes": 2384, "generate_time": 21.489527861277264}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:32:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 74.43774898846944, "generate_output_len_bytes": 2480, "generate_time": 34.72673638661703}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:39:21", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 153.41076453526816, "generate_output_len_bytes": 2357, "generate_time": 91.14894040425618}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:52:00", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 199.79869039853415, "generate_output_len_bytes": 2129, "generate_time": 98.61504419644673}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:08:12", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 66.49260465304057, "generate_output_len_bytes": 2927, "generate_time": 34.17951035499573}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:13:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 87.65787092844646, "generate_output_len_bytes": 3486, "generate_time": 59.3750696182251}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:22:24", "git_sha": "b63768c6", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 122.13213857014973, "generate_output_len_bytes": 2826, "generate_time": 66.34098903338115}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 22:33:33", "git_sha": "c1348fb3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 120.53812781969707, "generate_output_len_bytes": 2826, "generate_time": 67.28052496910095}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:56:52", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1036, "summarize_time": 29.128981749216717, "generate_output_len_bytes": 2242, "generate_time": 12.197122732798258}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:00:33", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:47:43", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:48:58", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:50:40", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 165.05752809842429, "generate_output_len_bytes": 2605, "generate_time": 93.80659619967143}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:05:51", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:10:05", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 410.0691332022349, "generate_output_len_bytes": 521, "generate_time": 57.71272214253744}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:36:58", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 171.74388321240744, "generate_output_len_bytes": 2605, "generate_time": 97.00725762049358}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 06:51:13", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 792, "summarize_time": 267.0555826822917, "generate_output_len_bytes": 2783, "generate_time": 163.99818523724875}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:13:35", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 413.9569679101308, "generate_output_len_bytes": 521, "generate_time": 58.52583885192871}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:38:02", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 175.4907926718394, "generate_output_len_bytes": 2605, "generate_time": 98.97720170021057}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 12:35:08", "git_sha": "29a002e5", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 983, "summarize_time": 42.21107586224874, "generate_output_len_bytes": 2130, "generate_time": 16.94527777036031}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:03:36", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.0461368560791, "generate_output_len_bytes": 2383, "generate_time": 19.614749511082966}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:07:35", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.8376894791921, "generate_output_len_bytes": 2383, "generate_time": 20.2719091574351}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 20:42:46", "git_sha": "2f4bb620", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:19", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 66.52468911806743, "generate_output_len_bytes": 2479, "generate_time": 29.828714847564697}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:56:04", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 19:55:35", "git_sha": "51318f44", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.753786404927574, "generate_output_len_bytes": 2383, "generate_time": 19.529522736867268}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:36:13", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.024452924728394, "generate_output_len_bytes": 2383, "generate_time": 20.29120985666911}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:40:08", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.554532527923584, "generate_output_len_bytes": 2171, "generate_time": 24.604793945948284}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:05", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.09950613975525, "generate_output_len_bytes": 2383, "generate_time": 20.947362899780273}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:54:08", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 58.3172922929128, "generate_output_len_bytes": 2171, "generate_time": 25.735217014948528}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:01:04", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.85940829912821, "generate_output_len_bytes": 2383, "generate_time": 21.380353291829426}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:05:24", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.235164642333984, "generate_output_len_bytes": 2171, "generate_time": 25.70338026682536}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:10:37", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 133.53030570348105, "generate_output_len_bytes": 2782, "generate_time": 72.97924383481343}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 22:18:17", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 131.45291074117026, "generate_output_len_bytes": 2782, "generate_time": 72.30849742889404}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:51:09", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.269713958104454, "generate_output_len_bytes": 2383, "generate_time": 19.65731406211853}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:54:54", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.84283971786499, "generate_output_len_bytes": 2171, "generate_time": 28.441521485646565}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 23:13:10", "git_sha": "383b6bbc", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 53.383726040522255, "generate_output_len_bytes": 2171, "generate_time": 24.422890504201252}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 23:18:04", "git_sha": "383b6bbc", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.791220347086586, "generate_output_len_bytes": 2171, "generate_time": 25.378511508305866}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 23:23:11", "git_sha": "383b6bbc", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 56.3846542040507, "generate_output_len_bytes": 2171, "generate_time": 26.636192480723064}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 23:52:44", "git_sha": "da69b822", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.36223220825195, "generate_output_len_bytes": 2383, "generate_time": 19.87660264968872}
+{"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/22/2023 00:15:05", "git_sha": "e843e8c3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 64.78201874097188, "generate_output_len_bytes": 2479, "generate_time": 29.02147897084554}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 0, "reps": 3, "date": "08/22/2023 19:01:15", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1351, "summarize_time": 1215.5185990333557, "generate_output_len_bytes": 849, "generate_time": 180.56836318969727}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 0, "reps": 3, "date": "08/22/2023 20:11:16", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1353, "summarize_time": 1216.9783231417339, "generate_output_len_bytes": 849, "generate_time": 180.42225472132364}
+{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 0, "reps": 3, "date": "08/22/2023 21:21:20", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1354, "summarize_time": 1217.1687794526417, "generate_output_len_bytes": 843, "generate_time": 180.78463260332742}
diff --git a/benchmarks/perf.md b/benchmarks/perf.md
new file mode 100644
index 0000000000000000000000000000000000000000..a6f22582d20dd5d066eae14f27d08753cb5bc9ec
--- /dev/null
+++ b/benchmarks/perf.md
@@ -0,0 +1,200 @@
+# Backend: transformers
+
+For [Interactive visualization of the results](https://raw.githubusercontent.com/h2oai/h2ogpt/blob/main/benchmarks/llm_gpu_benchmark_transformers.html), save the linked file as html on your machine and open it in a browser.
+
+
+## Model: h2oai/h2ogpt-4096-llama2-7b-chat (transformers)
+### Number of GPUs: 0
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-------|---------------------------:|--------------------------------:|:------------|
+| 16 | CPU | 1215.52 | 1.17546 | |
+| 8 | CPU | 1216.98 | 1.17641 | |
+| 4 | CPU | 1217.17 | 1.16575 | |
+### Number of GPUs: 1
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 31.8619 | 41.9433 | |
+| 16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 32.2947 | 40.9252 | |
+| 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 37.1139 | 32.4529 | |
+| 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | 47.0375 | 29.8526 | |
+| 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 67.9752 | 18.0571 | |
+| 8 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 114.622 | 9.21246 | |
+| 8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 94.1774 | 8.95532 | |
+| 8 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 181.246 | 7.47991 | |
+| 8 | 1 x NVIDIA RTX A6000 (46068 MiB) | 148.616 | 6.61984 | |
+| 8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 185.146 | 4.35807 | |
+| 4 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 39.544 | 32.571 | |
+| 4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 42.8067 | 32.3408 | |
+| 4 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 53.3973 | 23.3267 | |
+| 4 | 1 x NVIDIA RTX A6000 (46068 MiB) | 61.5241 | 22.8456 | |
+| 4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 90.5194 | 14.9456 | |
+### Number of GPUs: 2
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 32.1395 | 40.3871 | |
+| 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 39.9269 | 32.248 | |
+| 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 47.4105 | 28.8472 | |
+| 16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 71.4808 | 17.7518 | |
+| 8 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 94.9813 | 9.03765 | |
+| 8 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 178.2 | 7.55443 | |
+| 8 | 2 x NVIDIA RTX A6000 (46068 MiB) | 152.544 | 6.43862 | |
+| 8 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 186.884 | 4.35012 | |
+| 4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 43.235 | 32.0566 | |
+| 4 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 57.0808 | 22.6791 | |
+| 4 | 2 x NVIDIA RTX A6000 (46068 MiB) | 64.6442 | 21.972 | |
+| 4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 94.5099 | 14.6162 | |
+### Number of GPUs: 4
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 42.3398 | 30.2181 | |
+| 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 49.089 | 27.7344 | |
+| 8 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 180.534 | 7.53804 | |
+| 8 | 4 x NVIDIA RTX A6000 (46068 MiB) | 153.411 | 6.46469 | |
+| 4 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 58.6287 | 21.9123 | |
+| 4 | 4 x NVIDIA RTX A6000 (46068 MiB) | 66.4926 | 21.409 | |
+### Number of GPUs: 8
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 40.4986 | 30.5489 | |
+| 8 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 186.713 | 7.23498 | |
+| 4 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 60.1828 | 21.9172 | |
+## Model: h2oai/h2ogpt-4096-llama2-13b-chat (transformers)
+### Number of GPUs: 1
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 52.4984 | 26.2487 | |
+| 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 49.7972 | 24.9301 | |
+| 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | 71.9114 | 18.4362 | |
+| 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
+| 16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | nan | nan | OOM |
+| 8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 168.967 | 7.67522 | |
+| 8 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 185.442 | 6.0205 | |
+| 8 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 174.458 | 5.69269 | |
+| 8 | 1 x NVIDIA RTX A6000 (46068 MiB) | 193.993 | 5.56359 | |
+| 8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 280.467 | 3.75936 | |
+| 4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 45.3051 | 20.4771 | |
+| 4 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 68.0646 | 16.1241 | |
+| 4 | 1 x NVIDIA RTX A6000 (46068 MiB) | 81.1389 | 15.6933 | |
+| 4 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 74.271 | 15.0868 | |
+| 4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 96.6189 | 9.77255 | |
+### Number of GPUs: 2
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 51.6428 | 26.1842 | |
+| 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 51.299 | 24.8757 | |
+| 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 72.8565 | 18.2039 | |
+| 16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 89.5996 | 12.8295 | |
+| 8 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 167.523 | 7.82793 | |
+| 8 | 2 x NVIDIA RTX A6000 (46068 MiB) | 195.929 | 5.51238 | |
+| 8 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 180.781 | 5.43787 | |
+| 8 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 280.831 | 3.72157 | |
+| 4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 47.1425 | 19.9791 | |
+| 4 | 2 x NVIDIA RTX A6000 (46068 MiB) | 84.5776 | 15.1326 | |
+| 4 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 79.9461 | 14.3455 | |
+| 4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 98.4705 | 9.68779 | |
+### Number of GPUs: 4
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 55.3779 | 21.7073 | |
+| 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 74.4377 | 17.8537 | |
+| 8 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 179.505 | 5.45185 | |
+| 8 | 4 x NVIDIA RTX A6000 (46068 MiB) | 199.799 | 5.39725 | |
+| 4 | 4 x NVIDIA RTX A6000 (46068 MiB) | 87.6579 | 14.6779 | |
+| 4 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 78.9061 | 14.6754 | |
+### Number of GPUs: 8
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 55.3965 | 22.302 | |
+| 8 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 185.328 | 5.38647 | |
+| 4 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 83.0479 | 13.969 | |
+## Model: h2oai/h2ogpt-4096-llama2-70b-chat (transformers)
+### Number of GPUs: 1
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | nan | nan | OOM |
+| 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
+| 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | nan | nan | OOM |
+| 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
+| 8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | nan | nan | OOM |
+| 8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
+| 8 | 1 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
+| 4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 122.132 | 10.6495 | |
+| 4 | 1 x NVIDIA RTX A6000 (46068 MiB) | 165.058 | 6.94248 | |
+| 4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
+### Number of GPUs: 2
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
+| 8 | 2 x NVIDIA RTX A6000 (46068 MiB) | 410.069 | 2.25687 | |
+| 4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 120.538 | 10.5008 | |
+| 4 | 2 x NVIDIA RTX A6000 (46068 MiB) | 171.744 | 6.71342 | |
+| 4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
+### Number of GPUs: 4
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:---------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 267.056 | 4.24242 | |
+| 8 | 4 x NVIDIA RTX A6000 (46068 MiB) | 413.957 | 2.22551 | |
+| 4 | 4 x NVIDIA RTX A6000 (46068 MiB) | 175.491 | 6.5798 | |
+# Backend: text-generation-inference
+
+For [Interactive visualization of the results](https://raw.githubusercontent.com/h2oai/h2ogpt/blob/main/benchmarks/llm_gpu_benchmark_text-generation-inference.html), save the linked file as html on your machine and open it in a browser.
+
+
+## Model: h2oai/h2ogpt-4096-llama2-7b-chat (text-generation-inference)
+### Number of GPUs: 1
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 39.0155 | 55.2139 | |
+| 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 29.129 | 45.9535 | |
+| 16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 24.3988 | 44.5878 | |
+| 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 39.2697 | 30.3068 | |
+| 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | 40.3622 | 29.9724 | |
+### Number of GPUs: 2
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 7.63612 | 71.7881 | |
+| 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 41.0461 | 30.3726 | |
+| 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 41.0245 | 29.36 | |
+### Number of GPUs: 4
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 42.8377 | 29.388 | |
+| 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 41.0995 | 28.4403 | |
+### Number of GPUs: 8
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 42.8594 | 27.8644 | |
+## Model: h2oai/h2ogpt-4096-llama2-13b-chat (text-generation-inference)
+### Number of GPUs: 1
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 21.7823 | 33.7132 | |
+| 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 51.8428 | 19.083 | |
+| 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
+| 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
+### Number of GPUs: 2
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 10.8242 | 57.8237 | |
+| 16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 42.2111 | 31.4247 | |
+| 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 53.3837 | 22.223 | |
+| 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 64.782 | 21.3549 | |
+### Number of GPUs: 4
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 52.7912 | 21.3862 | |
+| 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 66.5247 | 20.777 | |
+### Number of GPUs: 8
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 56.3847 | 20.3764 | |
+## Model: h2oai/h2ogpt-4096-llama2-70b-chat (text-generation-inference)
+### Number of GPUs: 4
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 131.453 | 9.61851 | |
+| 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
+### Number of GPUs: 8
+| bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
+|-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
+| 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 133.53 | 9.53011 | |
diff --git a/blog/README.md b/blog/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..887de5d8bc51f4ec2594cd2444d2164c4a8990a0
--- /dev/null
+++ b/blog/README.md
@@ -0,0 +1,81 @@
+# Building the World's Best Open-Source Large Language Model: H2O.ai's Journey
+
+by Arno Candel, PhD, CTO H2O.ai, April 19 2023
+
+At H2O.ai, we pride ourselves on developing world-class Machine Learning, Deep Learning, and AI platforms. We released H2O, the most widely used open-source distributed and scalable machine learning platform, before XGBoost, TensorFlow and PyTorch existed. H2O.ai is home to over 25 Kaggle grandmasters, including the current #1. In 2017, we used GPUs to create the world's best AutoML in H2O Driverless AI. We have witnessed first-hand how Large Language Models (LLMs) have taken over the world by storm.
+
+We are proud to announce that we are building h2oGPT, an LLM that not only excels in performance but is also fully open-source and commercially usable, providing a valuable resource for developers, researchers, and organizations worldwide.
+
+In this blog, we'll explore our journey in building h2oGPT in our effort to further democratize AI.
+
+## Why Open-Source LLMs?
+
+While LLMs like OpenAI's ChatGPT/GPT-4, Anthropic's Claude, Microsoft's Bing AI Chat, Google's Bard, and Cohere are powerful and effective, they have certain limitations compared to open-source LLMs:
+
+1. **Data Privacy and Security**: Using hosted LLMs requires sending data to external servers. This can raise concerns about data privacy, security, and compliance, especially for sensitive information or industries with strict regulations.
+2. **Dependency and Customization**: Hosted LLMs often limit the extent of customization and control, as users rely on the service provider's infrastructure and predefined models. Open-source LLMs allow users to tailor the models to their specific needs, deploy on their own infrastructure, and even modify the underlying code.
+3. **Cost and Scalability**: Hosted LLMs usually come with usage fees, which can increase significantly with large-scale applications. Open-source LLMs can be more cost-effective, as users can scale the models on their own infrastructure without incurring additional costs from the service provider.
+4. **Access and Availability**: Hosted LLMs may be subject to downtime or limited availability, affecting users' access to the models. Open-source LLMs can be deployed on-premises or on private clouds, ensuring uninterrupted access and reducing reliance on external providers.
+
+Overall, open-source LLMs offer greater flexibility, control, and cost-effectiveness, while addressing data privacy and security concerns. They foster a competitive landscape in the AI industry and empower users to innovate and customize models to suit their specific needs.
+
+## The H2O.ai LLM Ecosystem
+
+Our open-source LLM ecosystem currently includes the following components:
+
+1. **Code, data, and models**: Fully permissive, commercially usable [code](https://github.com/h2oai/h2ogpt), curated fine-tuning [data](https://huggingface.co/h2oai), and fine-tuned [models](https://huggingface.co/h2oai) ranging from 7 to 20 billion parameters.
+2. **State-of-the-art fine-tuning**: We provide code for highly efficient fine-tuning, including targeted data preparation, prompt engineering, and computational optimizations to fine-tune LLMs with up to 20 billion parameters (even larger models expected soon) in hours on commodity hardware or enterprise servers. Techniques like low-rank approximations (LoRA) and data compression allow computational savings of several orders of magnitude.
+3. **Chatbot**: We provide code to run a multi-tenant chatbot on GPU servers, with an easily shareable end-point and a Python client API, allowing you to evaluate and compare the performance of fine-tuned LLMs.
+4. **H2O LLM Studio**: Our no-code LLM fine-tuning framework created by the world's top Kaggle grandmasters makes it even easier to fine-tune and evaluate LLMs.
+
+Everything we release is based on fully permissive data and models, with all code open-sourced, enabling broader access for businesses and commercial products without legal concerns, thus expanding access to cutting-edge AI while adhering to licensing requirements.
+
+## Roadmap and Future Plans
+
+We have an ambitious roadmap for our LLM ecosystem, including:
+
+1. Integration with downstream applications and low/no-code platforms (H2O Document AI, H2O LLM Studio, etc.)
+2. Improved validation and benchmarking frameworks of LLMs
+3. Complementing our chatbot with search and other APIs (LangChain, etc.)
+4. Contribute to large-scale data cleaning efforts (Open Assistant, Stability AI, RedPajama, etc.)
+5. High-performance distributed training of larger models on trillion tokens
+6. High-performance scalable on-premises hosting for high-throughput endpoints
+7. Improvements in code completion, reasoning, mathematics, factual correctness, hallucinations, and reducing repetitions
+
+## Getting Started with H2O.ai's LLMs
+
+You can [Chat with h2oGPT](https://gpt.h2o.ai/) right now!
+
+https://user-images.githubusercontent.com/6147661/232924684-6c0e2dfb-2f24-4098-848a-c3e4396f29f6.mov
+
+![](https://user-images.githubusercontent.com/6147661/233239878-de3b0fce-5425-4189-8095-5313c7817d58.png)
+![](https://user-images.githubusercontent.com/6147661/233239861-e99f238c-dd5d-4dd7-ac17-6367f91f86ac.png)
+
+To start using our LLM as a developer, follow the steps below:
+
+1. Clone the repository: `git clone https://github.com/h2oai/h2ogpt.git`
+2. Change to the repository directory: `cd h2ogpt`
+3. Install the requirements: `pip install -r requirements.txt`
+4. Run the chatbot: `python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-256-6_9b`
+5. Open your browser at `http://0.0.0.0:7860` or the public live URL printed by the server.
+
+For more information, visit [h2oGPT GitHub page](https://github.com/h2oai/h2ogpt), [H2O.ai's Hugging Face page](https://huggingface.co/h2oai) and [H2O LLM Studio GitHub page](https://github.com/h2oai/h2o-llmstudio).
+
+Join us on this exciting journey as we continue to improve and expand the capabilities of our open-source LLM ecosystem!
+
+## Acknowledgements
+
+We appreciate the work by many open-source contributors, especially:
+
+* [H2O.ai makers](https://h2o.ai/company/team/)
+* [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/)
+* [LoRA](https://github.com/microsoft/LoRA/)
+* [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca/)
+* [Hugging Face](https://huggingface.co/)
+* [OpenAssistant](https://open-assistant.io/)
+* [EleutherAI](https://www.eleuther.ai/)
+* [LAION](https://laion.ai/blog/oig-dataset/)
+* [BigScience](https://github.com/bigscience-workshop/bigscience/)
+* [LLaMa](https://github.com/facebookresearch/llama/)
+* [StableLM](https://github.com/Stability-AI/StableLM/)
+* [Vicuna](https://github.com/lm-sys/FastChat/)
diff --git a/ci/jenkinsfile b/ci/jenkinsfile
new file mode 100644
index 0000000000000000000000000000000000000000..1868cc06423d5381b83ee1549c817191c168708e
--- /dev/null
+++ b/ci/jenkinsfile
@@ -0,0 +1,158 @@
+#!/usr/bin/groovy
+
+@Library('test-shared-library@dai_pipeline') _
+
+import ai.h2o.ci.buildsummary.StagesSummary
+import groovy.json.JsonOutput
+
+buildSummary('https://github.com/h2oai/h2ogpt', true)
+buildSummary.get().addStagesSummary(this, new StagesSummary())
+
+def ALL_TESTS = [
+ "test_osx": [
+ install_deps: "TRAINING",
+ test_target: "test_imports",
+ node: "osx",
+ test_markers: "not need_tokens and not need_gpu",
+ timeout: 90,
+ use_docker: false,
+ env: ['PYTHON_BINARY=/Users/jenkins/anaconda/envs/h2ogpt-py3.10/bin/python']
+ ],
+ "test_all": [
+ install_deps: "TRAINING,WIKI_EXTRA",
+ test_target: "test",
+ test_markers: "not need_tokens and not need_gpu",
+ node: "DAIDEV-GPU || DAIDEV-2GPU",
+ timeout: 90,
+ use_docker: true,
+ env: []
+ ],
+]
+
+pipeline {
+ agent none
+ parameters {
+ booleanParam(name: 'skipTesting', defaultValue: false, description: 'Skip testing')
+ text(name: "testTargets", defaultValue: "${ALL_TESTS.keySet().join('\n')}", description: "A select set of tests to run")
+ booleanParam(name: 'publish', defaultValue: false, description: 'Upload to HF')
+ }
+ options {
+ ansiColor('xterm')
+ timestamps()
+ }
+ stages {
+ stage('Build') {
+ agent {
+ label "linux && docker"
+ }
+ steps {
+ script {
+ def shortHash = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim()
+ def commitMsg = sh(returnStdout: true, script: 'git log -1 --pretty=format:"[%an] %s"').trim()
+ currentBuild.displayName = "${env.BUILD_ID} - [${shortHash}]"
+ currentBuild.description = "${commitMsg}"
+
+ sh "make docker_build"
+ docker.image("harbor.h2o.ai/library/python:3.10").inside("--entrypoint='' --security-opt seccomp=unconfined -e USE_WHEEL=1 -e HOME=${WORKSPACE}") {
+ sh "make clean dist"
+ }
+
+ archiveArtifacts allowEmptyArchive: true, artifacts: "dist/h2ogpt-*.whl"
+ stash includes: "dist/h2ogpt-*.whl", name: "wheel_file"
+ }
+ }
+ }
+
+ stage('Tests') {
+ when {
+ anyOf {
+ expression { return !params.skipTesting }
+ }
+ beforeAgent true
+ }
+ agent {
+ label "linux && docker"
+ }
+ steps {
+ script {
+ def testTargets = [:]
+ params.testTargets.split('\n').findAll{ it.contains("test_") }.each { testName ->
+ testTargets[testName] = {
+ node("${ALL_TESTS[testName].node}") {
+ buildSummary.stageWithSummary("${testName}", "${testName}") {
+ buildSummary.setStageUrl("${testName}")
+ timeout(time: ALL_TESTS[testName].timeout, unit: 'MINUTES') {
+ script {
+ try {
+ dir("${testName}") {
+ withEnv(ALL_TESTS[testName].env + ["PYTEST_TEST_NAME=_${testName}", "IS_PR_BUILD=${isPrBranch()}", "USE_WHEEL=1"]) {
+
+ // cleanup and force the use of the installed wheel
+ deleteDir()
+ checkout scm
+ unstash "wheel_file"
+ sh "rm -rf *.py spaces models"
+
+ // pull runtime details
+ def dockerImage = sh(returnStdout: true, script: "make print-DOCKER_TEST_IMAGE").trim()
+ def nvidiaSmiExitCode = sh(returnStdout: false, returnStatus: true, script: "nvidia-smi")
+ // def dockerRuntime = "${nvidiaSmiExitCode}" == "0" ? "--runtime nvidia" : ""
+ def dockerRuntime = "" // TODO: keep until lab machines are upgraded
+
+ if (ALL_TESTS[testName].use_docker) {
+ docker.image("${dockerImage}").inside("--entrypoint='' --security-opt seccomp=unconfined --ulimit core=-1 --init --pid=host -e USE_WHEEL=1 -e HOME=${WORKSPACE}/${testName} ${dockerRuntime}") {
+ sh "nvidia-smi || true"
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make install"
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make install-${ALL_TESTS[testName].install_deps}"
+ sh """DEFAULT_MARKERS="${ALL_TESTS[testName].test_markers}" SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make ${ALL_TESTS[testName].test_target}"""
+ }
+ } else {
+ sh "make venv"
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make install"
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make install-${ALL_TESTS[testName].install_deps}"
+ sh """DEFAULT_MARKERS="${ALL_TESTS[testName].test_markers}" SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make ${ALL_TESTS[testName].test_target}"""
+ }
+ }
+ }
+ } catch (e) {
+ throw e
+ } finally {
+ sh "mv ${testName}/test_report.xml ${testName}/${testName}_report.xml"
+ archiveArtifacts allowEmptyArchive: true, artifacts: "${testName}/${testName}_report.xml"
+ junit testResults: "${testName}/${testName}_report.xml", keepLongStdio: true, allowEmptyResults: true
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ parallel(testTargets)
+ }
+ }
+ }
+
+ stage('Publish') {
+ when {
+ anyOf {
+ expression { return params.publish }
+ }
+ beforeAgent true
+ }
+ agent {
+ label "linux && docker"
+ }
+ steps {
+ script {
+ sh "make IS_PR_BUILD=${isPrBranch()} BUILD_NUMBER=${env.BUILD_ID} BUILD_BASE_NAME=${env.JOB_BASE_NAME} publish"
+ }
+ }
+ }
+ }
+}
+
+def isPrBranch() {
+ return (env.CHANGE_BRANCH != null && env.CHANGE_BRANCH != '') ||
+ (env.BRANCH_NAME != null && env.BRANCH_NAME.startsWith("PR-"))
+}
diff --git a/client/.gitignore b/client/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..c6a4bb29f8188faf8a899f7eb17252e9c993a842
--- /dev/null
+++ b/client/.gitignore
@@ -0,0 +1,168 @@
+### Copied files ###
+h2ogpt_client/_h2ogpt_*.py
+
+### Poetry ###
+.poetry
+poetry
+
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
diff --git a/client/Makefile b/client/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..a4a7c7ec295cda3ff39351f140a64c1f351e54e4
--- /dev/null
+++ b/client/Makefile
@@ -0,0 +1,58 @@
+POETRY_INSTALL_DIR := $(abspath ./.poetry)
+POETRY_BIN := $(POETRY_INSTALL_DIR)/bin/poetry
+
+PACKAGE_NAME = $(firstword $(shell $(POETRY_BIN) version))
+PACKAGE_DIR = $(subst -,_,$(PACKAGE_NAME))
+PACKAGE_VERSION = $(shell $(POETRY_BIN) version --short)
+
+# Space separated list of file path that needs to be copied from h2oGPT.
+FILES_FROM_H2OGPT := enums.py
+NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT = _h2ogpt_
+
+$(POETRY_BIN):
+ @echo "Installing Poetry into '$(POETRY_INSTALL_DIR)' ..."
+ curl -sSL https://install.python-poetry.org | POETRY_HOME="$(POETRY_INSTALL_DIR)" python3 - --force --version 1.5.1
+
+.PHONY: copy_files_from_h2ogpt
+copy_files_from_h2ogpt:
+ for file in $(FILES_FROM_H2OGPT); do \
+ dst="$(PACKAGE_DIR)/$(NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT)$(notdir $$file)"; \
+ echo "Copying '$$file' to '$$dst' ..."; \
+ cp -f "./../src/$$file" "$$dst"; \
+ done
+
+.PHONY: clean
+clean:
+ rm -rf dist
+ find "$(PACKAGE_DIR)" -name "$(NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT)*" -delete
+
+.PHONY: clean_deep
+clean_deep: clean
+ rm -rf "$(POETRY_INSTALL_DIR)"
+ rm -rf ".venv"
+
+.PHONY: setup
+setup: $(POETRY_BIN)
+ $(POETRY_BIN) install
+
+.PHONY: setup_test
+setup_test:
+ $(POETRY_BIN) install --only=test
+
+.PHONY: lint
+lint: copy_files_from_h2ogpt
+ $(POETRY_BIN) run black .
+ $(POETRY_BIN) run isort .
+ $(POETRY_BIN) run flake8 "$(PACKAGE_DIR)" "tests" || true
+ $(POETRY_BIN) run mypy --show-error-codes --pretty .
+
+.PHONY: test
+test: copy_files_from_h2ogpt
+ $(POETRY_BIN) run pytest -r=A
+
+.PHONY: build
+build: copy_files_from_h2ogpt
+ $(POETRY_BIN) build
+
+print-%:
+ @echo $($*)
diff --git a/client/README.md b/client/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..95a378cc078f0b2f2558d430dba51a7b32eb88e9
--- /dev/null
+++ b/client/README.md
@@ -0,0 +1,107 @@
+# h2oGPT Client
+A Python thin-client for h2oGPT.
+
+## Prerequisites
+- Python 3.8+
+
+If you don't have Python 3.8 in your system, you can use [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html).
+```bash
+conda create -n h2ogpt_client_build -y
+conda activate h2ogpt_client_build
+conda install python=3.8 -y
+```
+
+## Download Client Wheel
+
+Install the latest nightly wheel from S3.
+
+```bash
+pip install https://s3.amazonaws.com/artifacts.h2o.ai/snapshots/ai/h2o/h2ogpt_client/latest-nightly/h2ogpt_client-0.1.0-py3-none-any.whl
+```
+
+Nightly releases can also be found [here](https://github.com/h2oai/h2ogpt/releases)
+
+## Build Client Wheel
+
+If want to build fresh wheel from main branch instead of getting nightly, follow these instructions.
+
+### Setup
+:information_source: [Poetry](https://python-poetry.org) is used as the build tool.
+```shell
+rm -rf client/.poetry/
+make -C client setup
+```
+
+### Build
+```shell
+make -C client build
+```
+Distribution wheel file can be found in the `client/dist` directory. This wheel can be installed in the primary h2oGPT environment or any other environment, e.g.
+```bash
+pip uninstall -y h2ogpt_client
+pip install client/dist/h2ogpt_client-*-py3-none-any.whl
+```
+
+## Usage
+
+Based upon [test code](tests/test_client.py) and test code `test_readme_example`:
+```python
+
+
+def test_readme_example(local_server):
+ import os
+ import asyncio
+ from h2ogpt_client import Client
+
+ if local_server:
+ client = Client("http://0.0.0.0:7860")
+ else:
+ h2ogpt_key = os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
+ if h2ogpt_key is None:
+ return
+ # if you have API key for public instance:
+ client = Client("https://gpt.h2o.ai", h2ogpt_key=h2ogpt_key)
+
+ # Text completion
+ text_completion = client.text_completion.create()
+ response = asyncio.run(text_completion.complete("Hello world"))
+ print("asyncio text completion response: %s" % response)
+ # Text completion: synchronous
+ response = text_completion.complete_sync("Hello world")
+ print("sync text completion response: %s" % response)
+
+ # Chat completion
+ chat_completion = client.chat_completion.create()
+ reply = asyncio.run(chat_completion.chat("Hey!"))
+ print("asyncio text completion user: %s gpt: %s" % (reply["user"], reply["gpt"]))
+ chat_history = chat_completion.chat_history()
+ print("chat_history: %s" % chat_history)
+ # Chat completion: synchronous
+ reply = chat_completion.chat_sync("Hey!")
+ print("sync chat completion gpt: %s" % reply["gpt"])
+
+test_readme_example(local_server=True)
+```
+:warning: **Note**: Client APIs are still evolving. Hence, APIs can be changed without prior warnings.
+
+## Development Guide
+
+### Test
+
+In an h2oGPT environment with the client installed, can run tests that test client and server.
+
+### Test with h2oGPT env
+1. Install test dependencies of the Client into the h2oGPT Python environment.
+```shell
+make -C client setup_test
+```
+2. Run the tests with h2oGPT.
+```shell
+pytest client/tests/
+```
+
+#### Test with an existing h2oGPT server
+If you already have a running h2oGPT server, then set the `H2OGPT_SERVER` environment variable to use it for testing.
+```shell
+make H2OGPT_SERVER="http://0.0.0.0:7860" -C client test
+```
diff --git a/client/h2ogpt_client/__init__.py b/client/h2ogpt_client/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cebc384817dce2c6a50de674c9c6d6f9ecadf64
--- /dev/null
+++ b/client/h2ogpt_client/__init__.py
@@ -0,0 +1,4 @@
+from h2ogpt_client._core import Client
+from h2ogpt_client._h2ogpt_enums import LangChainMode, PromptType
+
+__all__ = ["Client", "PromptType", "LangChainMode"]
diff --git a/client/h2ogpt_client/_completion.py b/client/h2ogpt_client/_completion.py
new file mode 100644
index 0000000000000000000000000000000000000000..325b08c226d0f2914974e843ef3183d773dad11a
--- /dev/null
+++ b/client/h2ogpt_client/_completion.py
@@ -0,0 +1,507 @@
+import abc
+import ast
+import collections
+from typing import (
+ Any,
+ AsyncGenerator,
+ Dict,
+ Generator,
+ List,
+ Optional,
+ OrderedDict,
+ Union,
+)
+
+from h2ogpt_client._gradio_client import GradioClientWrapper
+from h2ogpt_client._h2ogpt_enums import (
+ DocumentSubset,
+ LangChainAction,
+ LangChainMode,
+ PromptType,
+)
+from h2ogpt_client._models import Model
+
+_H2OGPT_PARAMETERS_TO_CLIENT = collections.OrderedDict(
+ instruction="instruction",
+ iinput="input",
+ context="system_pre_context",
+ stream_output="stream_output",
+ prompt_type="prompt_type",
+ prompt_dict="prompt_dict",
+ temperature="temperature",
+ top_p="top_p",
+ top_k="top_k",
+ penalty_alpha="penalty_alpha",
+ num_beams="beams",
+ max_new_tokens="max_output_length",
+ min_new_tokens="min_output_length",
+ early_stopping="early_stopping",
+ max_time="max_time",
+ repetition_penalty="repetition_penalty",
+ num_return_sequences="number_returns",
+ do_sample="enable_sampler",
+ chat="chat",
+ instruction_nochat="instruction_nochat",
+ iinput_nochat="input_context_for_instruction",
+ langchain_mode="langchain_mode",
+ add_chat_history_to_context="add_chat_history_to_context",
+ langchain_action="langchain_action",
+ langchain_agents="langchain_agents",
+ top_k_docs="langchain_top_k_docs",
+ chunk="langchain_enable_chunk",
+ chunk_size="langchain_chunk_size",
+ document_subset="langchain_document_subset",
+ document_choice="langchain_document_choice",
+ document_source_substrings="langchain_document_source_substrings",
+ document_source_substrings_op="langchain_document_source_substrings_op",
+ document_content_substrings="langchain_document_content_substrings",
+ document_content_substrings_op="langchain_document_content_substrings_op",
+ pre_prompt_query="pre_prompt_query",
+ prompt_query="prompt_query",
+ pre_prompt_summary="pre_prompt_summary",
+ prompt_summary="prompt_summary",
+ hyde_llm_prompt="hyde_llm_prompt",
+ system_prompt="system_prompt",
+ image_audio_loaders="image_audio_loaders",
+ pdf_loaders="pdf_loaders",
+ url_loaders="url_loaders",
+ jq_schema="jq_schema",
+ visible_models="model",
+ h2ogpt_key="h2ogpt_key",
+ add_search_to_context="add_search_to_context",
+ chat_conversation="chat_conversation",
+ text_context_list="text_context_list",
+ docs_ordering_type="docs_ordering_type",
+ min_max_new_tokens="min_max_new_tokens",
+ max_input_tokens="max_input_tokens",
+ max_total_input_tokens="max_total_input_tokens",
+ docs_token_handling="docs_token_handling",
+ docs_joiner="docs_joiner",
+ hyde_level="hyde_level",
+ hyde_template="hyde_template",
+ hyde_show_only_final="hyde_show_only_final",
+ doc_json_mode="doc_json_mode",
+ chatbot_role="chatbot_role",
+ speaker="speaker",
+ tts_language="tts_language",
+ tts_speed="tts_speed",
+)
+
+
+def _to_h2ogpt_params(client_params: Dict[str, Any]) -> OrderedDict[str, Any]:
+ """Convert given params to the order of params in h2oGPT."""
+
+ h2ogpt_params: OrderedDict[str, Any] = collections.OrderedDict()
+ for h2ogpt_param_name, client_param_name in _H2OGPT_PARAMETERS_TO_CLIENT.items():
+ if client_param_name in client_params:
+ h2ogpt_params[h2ogpt_param_name] = client_params[client_param_name]
+ return h2ogpt_params
+
+
+_DEFAULT_PARAMETERS: Dict[str, Any] = dict(
+ instruction="",
+ input="",
+ system_pre_context="",
+ stream_output=False,
+ prompt_type=PromptType.plain.value,
+ prompt_dict="", # empty as prompt_type cannot be 'custom'
+ temperature=0.1,
+ top_p=1.0,
+ top_k=40,
+ penalty_alpha=0.0,
+ beams=1.0,
+ max_output_length=1024,
+ min_output_length=0,
+ early_stopping=False,
+ max_time=360,
+ repetition_penalty=1.07,
+ number_returns=1,
+ enable_sampler=False,
+ chat=False,
+ instruction_nochat="",
+ input_context_for_instruction="",
+ langchain_mode=LangChainMode.DISABLED.value,
+ add_chat_history_to_context=False, # relevant only for the UI
+ langchain_action=LangChainAction.QUERY.value,
+ langchain_agents=[],
+ langchain_top_k_docs=4, # langchain: number of document chunks
+ langchain_enable_chunk=True, # langchain: whether to chunk documents
+ langchain_chunk_size=512, # langchain: chunk size for document chunking
+ langchain_document_subset=DocumentSubset.Relevant.name,
+ langchain_document_choice=[],
+ langchain_document_source_substrings=[],
+ langchain_document_source_substrings_op='and',
+ langchain_document_content_substrings=[],
+ langchain_document_content_substrings_op='and',
+ pre_prompt_query=[],
+ prompt_query="",
+ pre_prompt_summary="",
+ prompt_summary="",
+ hyde_llm_prompt="",
+ system_prompt="",
+ image_audio_loaders=[],
+ pdf_loaders=[],
+ url_loaders=[],
+ jq_schema=".[]",
+ model=None,
+ h2ogpt_key=None,
+ add_search_to_context=False,
+ chat_conversation=None,
+ text_context_list=[],
+ docs_ordering_type="reverse_ucurve_sort",
+ min_max_new_tokens=256,
+ max_input_tokens=-1,
+ max_total_input_tokens=-1,
+ docs_token_handling="split_or_merge",
+ docs_joiner="\n\n",
+ hyde_level=0,
+ hyde_template=None,
+ hyde_show_only_final=None,
+ doc_json_mode=False,
+ chatbot_role="None",
+ speaker="None",
+ tts_language="autodetect",
+ tts_speed=1.0,
+)
+
+
+class _Completion(abc.ABC):
+ _API_NAME = "/submit_nochat_api"
+
+ def __init__(self, client: GradioClientWrapper, parameters: OrderedDict[str, Any]):
+ self._client = client
+ self._parameters = dict(parameters)
+
+ def _get_parameters(self, prompt: str) -> Dict[str, Any]:
+ self._parameters["instruction_nochat"] = prompt
+ return self._parameters
+
+ @staticmethod
+ def _get_reply(response: str) -> str:
+ return ast.literal_eval(response)["response"]
+
+ def _predict(self, prompt: str) -> str:
+ response = self._client.predict(
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
+ )
+ return self._get_reply(response)
+
+ def _predict_and_stream(self, prompt: str) -> Generator[str, None, None]:
+ generator = self._client.predict_and_stream(
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
+ )
+ reply_size_so_far = 0
+ for response in generator:
+ current_reply = self._get_reply(response)
+ new_reply_chunk = current_reply[reply_size_so_far:]
+ if not new_reply_chunk:
+ continue
+ reply_size_so_far += len(new_reply_chunk)
+ yield new_reply_chunk
+
+ async def _submit(self, prompt: str) -> str:
+ response = await self._client.submit(
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
+ )
+ return self._get_reply(response)
+
+ async def _submit_and_stream(self, prompt: str) -> AsyncGenerator[str, None]:
+ generator = self._client.submit_and_stream(
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
+ )
+ reply_size_so_far = 0
+ async for response in generator:
+ current_reply = self._get_reply(response)
+ new_reply_chunk = current_reply[reply_size_so_far:]
+ if not new_reply_chunk:
+ continue
+ reply_size_so_far += len(new_reply_chunk)
+ yield new_reply_chunk
+
+
+class TextCompletionCreator:
+ """Builder that can create text completions."""
+
+ def __init__(self, client: GradioClientWrapper):
+ self._client = client
+
+ def create(
+ self,
+ model: Union[None, Model, str] = None,
+ prompt_type: PromptType = PromptType.plain,
+ input_context_for_instruction: str = "",
+ enable_sampler=False,
+ temperature: float = 0.1,
+ top_p: float = 1.0,
+ top_k: int = 40,
+ penalty_alpha: float = 0.0,
+ beams: float = 1.0,
+ early_stopping: bool = False,
+ min_output_length: int = 0,
+ max_output_length: int = 1024,
+ max_time: int = 360,
+ repetition_penalty: float = 1.07,
+ number_returns: int = 1,
+ system_pre_context: str = "",
+ langchain_mode: LangChainMode = LangChainMode.DISABLED,
+ system_prompt: str = "",
+ add_search_to_context: bool = False,
+ text_context_list: List[str] = [],
+ docs_ordering_type: str = "reverse_ucurve_sort",
+ min_max_new_tokens: int = 256,
+ max_input_tokens: int = -1,
+ max_total_input_tokens: int = -1,
+ docs_token_handling: str = "split_or_merge",
+ docs_joiner: str = "\n\n",
+ hyde_level: int = 0,
+ hyde_template: Optional[str] = None,
+ hyde_show_only_final: bool = False,
+ doc_json_mode: bool = False,
+ chatbot_role="None",
+ speaker="None",
+ tts_language="autodetect",
+ tts_speed=1.0,
+ ) -> "TextCompletion":
+ """
+ Creates a new text completion.
+
+ :param model: model to be used, `None` means used the default model.
+ :param prompt_type: type of the prompt
+ :param input_context_for_instruction: input context for instruction
+ :param enable_sampler: enable or disable the sampler, required for use of
+ temperature, top_p, top_k
+ :param temperature: What sampling temperature to use, between 0 and 3.
+ Lower values will make it more focused and deterministic, but may lead
+ to repeat. Higher values will make the output more creative, but may
+ lead to hallucinations.
+ :param top_p: cumulative probability of tokens to sample from
+ :param top_k: number of tokens to sample from
+ :param penalty_alpha: >0 and top_k>1 enable contrastive search (not all models support)
+ :param beams: Number of searches for optimal overall probability.
+ Higher values uses more GPU memory and compute.
+ :param early_stopping: whether to stop early or not in beam search
+ :param min_output_length: minimum output length
+ :param max_output_length: maximum output length
+ :param max_time: maximum time to search optimal output
+ :param repetition_penalty: penalty for repetition
+ :param number_returns:
+ :param system_pre_context: directly pre-appended without prompt processing
+ :param langchain_mode: LangChain mode
+ :param system_prompt: Universal system prompt to override prompt_type's system
+ prompt
+ If pass 'None' or 'auto' or None, then automatic per-model value used
+ :param add_search_to_context: Whether to add web search of query to context
+ :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
+ :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
+ :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
+ :param max_input_tokens: Max input tokens to place into model context for each LLM call
+ -1 means auto, fully fill context for query, and fill by original document chunk for summarization
+ >=0 means use that to limit context filling to that many tokens
+ :param max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
+ :param docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
+ or top_k_docs original document chunks summarization
+ None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
+ :param docs_joiner: string to join lists of text when doing split_or_merge. None means '\n\n'
+ :param hyde_level: HYDE level for HYDE approach (https://arxiv.org/abs/2212.10496)
+ 0: No HYDE
+ 1: Use non-document-based LLM response and original query for embedding query
+ 2: Use document-based LLM response and original query for embedding query
+ 3+: Continue iterations of embedding prior answer and getting new response
+ :param hyde_template:
+ None, 'None', 'auto' uses internal value and enable
+ '{query}' is minimal template one can pass
+ :param hyde_show_only_final: See h2oGPT server docs
+ :param doc_json_mode: whether to give JSON to LLM and get JSON response back
+ :param chatbot_role: See h2oGPT server docs
+ :param speaker: See h2oGPT server docs
+ :param tts_language: See h2oGPT server docs
+ :param tts_speed: See h2oGPT server docs
+ """
+ args = locals().copy()
+ args["prompt_type"] = prompt_type.value # convert to serializable type
+ args["langchain_mode"] = langchain_mode.value # convert to serializable type
+ params = _to_h2ogpt_params({**_DEFAULT_PARAMETERS, **args})
+ params["instruction_nochat"] = None # future prompt
+ params["h2ogpt_key"] = self._client.h2ogpt_key
+ return TextCompletion(self._client, params)
+
+
+class TextCompletion(_Completion):
+ """Text completion."""
+
+ async def complete(
+ self, prompt: str, enable_streaming: bool = False
+ ) -> Union[str, AsyncGenerator[str, None]]:
+ """
+ Complete this text completion.
+
+ :param prompt: text prompt to generate completion for
+ :param enable_streaming: whether to enable or disable streaming the response
+ :return: response from the model
+ """
+ if enable_streaming:
+ params = self._get_parameters(prompt)
+ params["stream_output"] = True
+ return self._submit_and_stream(prompt)
+ else:
+ return await self._submit(prompt)
+
+ def complete_sync(
+ self, prompt: str, enable_streaming: bool = False
+ ) -> Union[str, Generator[str, None, None]]:
+ """
+ Complete this text completion synchronously.
+
+ :param prompt: text prompt to generate completion for
+ :param enable_streaming: whether to enable or disable streaming the response
+ :return: response from the model
+ """
+ if enable_streaming:
+ params = self._get_parameters(prompt)
+ params["stream_output"] = True
+ return self._predict_and_stream(prompt)
+ else:
+ return self._predict(prompt)
+
+
+class ChatCompletionCreator:
+ """Chat completion."""
+
+ def __init__(self, client: GradioClientWrapper):
+ self._client = client
+
+ def create(
+ self,
+ model: Union[None, Model, str] = None,
+ prompt_type: PromptType = PromptType.plain,
+ input_context_for_instruction: str = "",
+ enable_sampler=False,
+ temperature: float = 0.1,
+ top_p: float = 1.0,
+ top_k: int = 40,
+ penalty_alpha: float = 0.0,
+ beams: float = 1.0,
+ early_stopping: bool = False,
+ min_output_length: int = 0,
+ max_output_length: int = 1024,
+ max_time: int = 360,
+ repetition_penalty: float = 1.07,
+ number_returns: int = 1,
+ system_pre_context: str = "",
+ langchain_mode: LangChainMode = LangChainMode.DISABLED,
+ system_prompt: str = "",
+ add_search_to_context: bool = False,
+ text_context_list: List[str] = [],
+ docs_ordering_type: str = "reverse_ucurve_sort",
+ min_max_new_tokens: int = 256,
+ max_input_tokens: int = -1,
+ max_total_input_tokens: int = -1,
+ docs_token_handling: str = "split_or_merge",
+ docs_joiner: str = "\n\n",
+ hyde_level: int = 0,
+ hyde_template: Optional[str] = None,
+ hyde_show_only_final: bool = False,
+ doc_json_mode: bool = False,
+ chatbot_role="None",
+ speaker="None",
+ tts_language="autodetect",
+ tts_speed=1.0,
+ ) -> "ChatCompletion":
+ """
+ Creates a new chat completion.
+
+ :param model: model to be used, `None` means used the default model.
+ :param prompt_type: type of the prompt
+ :param input_context_for_instruction: input context for instruction
+ :param enable_sampler: enable or disable the sampler, required for use of
+ temperature, top_p, top_k
+ :param temperature: What sampling temperature to use, between 0 and 3.
+ Lower values will make it more focused and deterministic, but may lead
+ to repeat. Higher values will make the output more creative, but may
+ lead to hallucinations.
+ :param top_p: cumulative probability of tokens to sample from
+ :param top_k: number of tokens to sample from
+ :param penalty_alpha: >0 and top_k>1 enable contrastive search (not all models support)
+ :param beams: Number of searches for optimal overall probability.
+ Higher values uses more GPU memory and compute.
+ :param early_stopping: whether to stop early or not in beam search
+ :param min_output_length: minimum output length
+ :param max_output_length: maximum output length
+ :param max_time: maximum time to search optimal output
+ :param repetition_penalty: penalty for repetition
+ :param number_returns:
+ :param system_pre_context: directly pre-appended without prompt processing
+ :param langchain_mode: LangChain mode
+ :param system_prompt: Universal system prompt to override prompt_type's system
+ prompt
+ :param add_search_to_context: Whether to add web search of query to context
+ :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
+ :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
+ :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
+ :param max_input_tokens: Max input tokens to place into model context for each LLM call
+ -1 means auto, fully fill context for query, and fill by original document chunk for summarization
+ >=0 means use that to limit context filling to that many tokens
+ :param max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
+ :param docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
+ or top_k_docs original document chunks summarization
+ None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
+ :param docs_joiner: string to join lists of text when doing split_or_merge. None means '\n\n'
+ :param hyde_level: HYDE level for HYDE approach (https://arxiv.org/abs/2212.10496)
+ 0: No HYDE
+ 1: Use non-document-based LLM response and original query for embedding query
+ 2: Use document-based LLM response and original query for embedding query
+ 3+: Continue iterations of embedding prior answer and getting new response
+ :param hyde_template:
+ None, 'None', 'auto' uses internal value and enable
+ '{query}' is minimal template one can pass
+ :param hyde_show_only_final: See h2oGPT server docs
+ :param doc_json_mode: whether to give JSON to LLM and get JSON response back
+ :param chatbot_role: See h2oGPT server docs
+ :param speaker: See h2oGPT server docs
+ :param tts_language: See h2oGPT server docs
+ :param tts_speed: See h2oGPT server docs
+ """
+ args = locals().copy()
+ args["prompt_type"] = prompt_type.value # convert to serializable type
+ args["langchain_mode"] = langchain_mode.value # convert to serializable type
+ params = _to_h2ogpt_params({**_DEFAULT_PARAMETERS, **args})
+ params["instruction_nochat"] = None # future prompts
+ params["add_chat_history_to_context"] = True
+ params["h2ogpt_key"] = self._client.h2ogpt_key
+ params["chat_conversation"] = [] # chat history (FIXME: Only works if 1 model?)
+ return ChatCompletion(self._client, params)
+
+
+class ChatCompletion(_Completion):
+ """Chat completion."""
+
+ def _update_history(self, prompt: str, reply: str) -> None:
+ self._parameters["chat_conversation"].append((prompt, reply))
+
+ async def chat(self, prompt: str) -> Dict[str, str]:
+ """
+ Complete this chat completion.
+
+ :param prompt: text prompt to generate completions for
+ :returns chat reply
+ """
+ reply = await self._submit(prompt)
+ self._update_history(prompt, reply)
+ return {"user": prompt, "gpt": reply}
+
+ def chat_sync(self, prompt: str) -> Dict[str, str]:
+ """
+ Complete this chat completion.
+
+ :param prompt: text prompt to generate completions for
+ :returns chat reply
+ """
+ reply = self._predict(prompt)
+ self._update_history(prompt, reply)
+ return {"user": prompt, "gpt": reply}
+
+ def chat_history(self) -> List[Dict[str, str]]:
+ """Returns the full chat history."""
+ return [
+ {"user": i[0], "gpt": i[1]} for i in self._parameters["chat_conversation"]
+ ]
diff --git a/client/h2ogpt_client/_core.py b/client/h2ogpt_client/_core.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1b5ef4cd5e573b3e7f43d4e2e5bb40d9d6b2af1
--- /dev/null
+++ b/client/h2ogpt_client/_core.py
@@ -0,0 +1,50 @@
+from typing import Optional
+
+from h2ogpt_client._completion import ChatCompletionCreator, TextCompletionCreator
+from h2ogpt_client._gradio_client import GradioClientWrapper
+from h2ogpt_client._models import Models
+from h2ogpt_client._server import Server
+
+
+class Client:
+ """h2oGPT Client."""
+
+ def __init__(
+ self,
+ src: str,
+ h2ogpt_key: Optional[str] = None,
+ huggingface_token: Optional[str] = None,
+ ):
+ """
+ Creates a GPT client.
+ :param src: either the full URL to the hosted h2oGPT
+ (e.g. "http://0.0.0.0:7860", "https://fc752f297207f01c32.gradio.live")
+ or name of the Hugging Face Space to load, (e.g. "h2oai/h2ogpt-chatbot")
+ :param h2ogpt_key: access key to connect with a h2oGPT server
+ :param huggingface_token: Hugging Face token to use to access private Spaces
+ """
+ self._client = GradioClientWrapper(src, h2ogpt_key, huggingface_token)
+ self._text_completion = TextCompletionCreator(self._client)
+ self._chat_completion = ChatCompletionCreator(self._client)
+ self._models = Models(self._client)
+ self._server = Server(self._client)
+
+ @property
+ def text_completion(self) -> TextCompletionCreator:
+ """Text completion."""
+ return self._text_completion
+
+ @property
+ def chat_completion(self) -> ChatCompletionCreator:
+ """Chat completion."""
+ return self._chat_completion
+
+ @property
+ def models(self) -> Models:
+ """LL models."""
+ return self._models
+
+ @property
+ def server(self) -> Server:
+ """h2oGPT server."""
+ return self._server
diff --git a/client/h2ogpt_client/_gradio_client.py b/client/h2ogpt_client/_gradio_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0194a5f526565f17ef8e56a3a55435a03162922
--- /dev/null
+++ b/client/h2ogpt_client/_gradio_client.py
@@ -0,0 +1,54 @@
+import asyncio
+import time
+from typing import Any, AsyncGenerator, Generator, List, Optional
+
+import gradio_client # type: ignore
+
+
+class GradioClientWrapper:
+ def __init__(
+ self,
+ src: str,
+ h2ogpt_key: Optional[str] = None,
+ huggingface_token: Optional[str] = None,
+ ):
+ self._client = gradio_client.Client(
+ src=src, hf_token=huggingface_token, serialize=False, verbose=False
+ )
+ self.h2ogpt_key = h2ogpt_key
+
+ def predict(self, *args, api_name: str) -> Any:
+ return self._client.predict(*args, api_name=api_name)
+
+ def predict_and_stream(self, *args, api_name: str) -> Generator[str, None, None]:
+ job = self._client.submit(*args, api_name=api_name)
+ while not job.done():
+ outputs: List[str] = job.outputs()
+ if not len(outputs):
+ time.sleep(0.1)
+ continue
+ newest_response = outputs[-1]
+ yield newest_response
+
+ e = job.exception()
+ if e and isinstance(e, BaseException):
+ raise RuntimeError from e
+
+ async def submit(self, *args, api_name: str) -> Any:
+ return await asyncio.wrap_future(self._client.submit(*args, api_name=api_name))
+
+ async def submit_and_stream(
+ self, *args, api_name: str
+ ) -> AsyncGenerator[Any, None]:
+ job = self._client.submit(*args, api_name=api_name)
+ while not job.done():
+ outputs: List[str] = job.outputs()
+ if not len(outputs):
+ await asyncio.sleep(0.1)
+ continue
+ newest_response = outputs[-1]
+ yield newest_response
+
+ e = job.exception()
+ if e and isinstance(e, BaseException):
+ raise RuntimeError from e
diff --git a/client/h2ogpt_client/_models.py b/client/h2ogpt_client/_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e65d942bb1ba448a7ed7c739c35f054c0623fc3
--- /dev/null
+++ b/client/h2ogpt_client/_models.py
@@ -0,0 +1,35 @@
+import ast
+from typing import Any, Dict, List
+
+from h2ogpt_client._gradio_client import GradioClientWrapper
+
+
+class Model:
+ """Large language model in the h2oGPT server."""
+
+ def __init__(self, raw_info: Dict[str, Any]):
+ self._name = raw_info["base_model"]
+ self._raw_info = raw_info
+
+ @property
+ def name(self) -> str:
+ """Name of the model."""
+ return self._name
+
+ def __repr__(self) -> str:
+ return self.name.__repr__()
+
+ def __str__(self) -> str:
+ return self.name.__str__()
+
+
+class Models:
+ """Interact with LL Models in h2oGPT."""
+
+ def __init__(self, client: GradioClientWrapper):
+ self._client = client
+
+ def list(self) -> List[Model]:
+ """List all models available in the h2oGPT server."""
+ models = ast.literal_eval(self._client.predict(api_name="/model_names"))
+ return [Model(m) for m in models]
diff --git a/client/h2ogpt_client/_server.py b/client/h2ogpt_client/_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e1961260a4448af4bdccf8586bbd2d996ad56c7
--- /dev/null
+++ b/client/h2ogpt_client/_server.py
@@ -0,0 +1,18 @@
+from h2ogpt_client._gradio_client import GradioClientWrapper
+
+
+class Server:
+ """h2oGPT server."""
+
+ def __init__(self, client: GradioClientWrapper):
+ self._client = client
+
+ @property
+ def address(self) -> str:
+ """h2oGPT server address."""
+ return self._client._client.src
+
+ @property
+ def hash(self) -> str:
+ """h2oGPT server system hash."""
+ return str(self._client.predict(api_name="/system_hash"))
diff --git a/client/poetry.lock b/client/poetry.lock
new file mode 100644
index 0000000000000000000000000000000000000000..c5d316f8637392dc746c7178063445f5745e9d48
--- /dev/null
+++ b/client/poetry.lock
@@ -0,0 +1,856 @@
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+
+[[package]]
+name = "anyio"
+version = "3.6.2"
+description = "High level compatibility layer for multiple asynchronous event loop implementations"
+optional = false
+python-versions = ">=3.6.2"
+files = [
+ {file = "anyio-3.6.2-py3-none-any.whl", hash = "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3"},
+ {file = "anyio-3.6.2.tar.gz", hash = "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421"},
+]
+
+[package.dependencies]
+idna = ">=2.8"
+sniffio = ">=1.1"
+
+[package.extras]
+doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
+test = ["contextlib2", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (<0.15)", "uvloop (>=0.15)"]
+trio = ["trio (>=0.16,<0.22)"]
+
+[[package]]
+name = "attrs"
+version = "23.1.0"
+description = "Classes Without Boilerplate"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
+ {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
+]
+
+[package.extras]
+cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
+dev = ["attrs[docs,tests]", "pre-commit"]
+docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
+tests = ["attrs[tests-no-zope]", "zope-interface"]
+tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+
+[[package]]
+name = "black"
+version = "23.3.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"},
+ {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"},
+ {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"},
+ {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"},
+ {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"},
+ {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"},
+ {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"},
+ {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"},
+ {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"},
+ {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"},
+ {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"},
+ {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"},
+ {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"},
+ {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"},
+ {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"},
+ {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"},
+ {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"},
+ {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"},
+ {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"},
+ {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"},
+ {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"},
+ {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"},
+ {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"},
+ {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"},
+ {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
+[[package]]
+name = "certifi"
+version = "2023.5.7"
+description = "Python package for providing Mozilla's CA Bundle."
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"},
+ {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"},
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.1.0"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+optional = false
+python-versions = ">=3.7.0"
+files = [
+ {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"},
+ {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"},
+ {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"},
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"},
+ {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"},
+ {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"},
+ {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"},
+]
+
+[[package]]
+name = "click"
+version = "8.1.3"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
+ {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.1.1"
+description = "Backport of PEP 654 (exception groups)"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
+ {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
+]
+
+[package.extras]
+test = ["pytest (>=6)"]
+
+[[package]]
+name = "filelock"
+version = "3.12.0"
+description = "A platform independent file lock."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "filelock-3.12.0-py3-none-any.whl", hash = "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9"},
+ {file = "filelock-3.12.0.tar.gz", hash = "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.3.27)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"]
+
+[[package]]
+name = "flake8"
+version = "5.0.4"
+description = "the modular source code checker: pep8 pyflakes and co"
+optional = false
+python-versions = ">=3.6.1"
+files = [
+ {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"},
+ {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"},
+]
+
+[package.dependencies]
+mccabe = ">=0.7.0,<0.8.0"
+pycodestyle = ">=2.9.0,<2.10.0"
+pyflakes = ">=2.5.0,<2.6.0"
+
+[[package]]
+name = "flake8-pyproject"
+version = "1.2.3"
+description = "Flake8 plug-in loading the configuration from pyproject.toml"
+optional = false
+python-versions = ">= 3.6"
+files = [
+ {file = "flake8_pyproject-1.2.3-py3-none-any.whl", hash = "sha256:6249fe53545205af5e76837644dc80b4c10037e73a0e5db87ff562d75fb5bd4a"},
+]
+
+[package.dependencies]
+Flake8 = ">=5"
+TOMLi = {version = "*", markers = "python_version < \"3.11\""}
+
+[package.extras]
+dev = ["pyTest", "pyTest-cov"]
+
+[[package]]
+name = "fsspec"
+version = "2023.5.0"
+description = "File-system specification"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "fsspec-2023.5.0-py3-none-any.whl", hash = "sha256:51a4ad01a5bb66fcc58036e288c0d53d3975a0df2a5dc59a93b59bade0391f2a"},
+ {file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"},
+]
+
+[package.extras]
+abfs = ["adlfs"]
+adl = ["adlfs"]
+arrow = ["pyarrow (>=1)"]
+dask = ["dask", "distributed"]
+devel = ["pytest", "pytest-cov"]
+dropbox = ["dropbox", "dropboxdrivefs", "requests"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+fuse = ["fusepy"]
+gcs = ["gcsfs"]
+git = ["pygit2"]
+github = ["requests"]
+gs = ["gcsfs"]
+gui = ["panel"]
+hdfs = ["pyarrow (>=1)"]
+http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
+libarchive = ["libarchive-c"]
+oci = ["ocifs"]
+s3 = ["s3fs"]
+sftp = ["paramiko"]
+smb = ["smbprotocol"]
+ssh = ["paramiko"]
+tqdm = ["tqdm"]
+
+[[package]]
+name = "gradio-client"
+version = "0.6.1"
+description = "Python library for easily interacting with trained machine learning models"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "gradio_client-0.6.1-py3-none-any.whl", hash = "sha256:2f36a8467622f748539c84d142f1a71df6b83666d017ee4fb6ef8a2696f70f66"},
+ {file = "gradio_client-0.6.1.tar.gz", hash = "sha256:2f36a8467622f748539c84d142f1a71df6b83666d017ee4fb6ef8a2696f70f66"},
+]
+
+[package.dependencies]
+fsspec = "*"
+httpx = "*"
+huggingface-hub = ">=0.13.0"
+packaging = "*"
+requests = ">=2.0,<3.0"
+typing-extensions = ">=4.0,<5.0"
+websockets = ">=10.0,<12.0"
+
+[[package]]
+name = "h11"
+version = "0.14.0"
+description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
+ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
+]
+
+[[package]]
+name = "httpcore"
+version = "0.17.0"
+description = "A minimal low-level HTTP client."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "httpcore-0.17.0-py3-none-any.whl", hash = "sha256:0fdfea45e94f0c9fd96eab9286077f9ff788dd186635ae61b312693e4d943599"},
+ {file = "httpcore-0.17.0.tar.gz", hash = "sha256:cc045a3241afbf60ce056202301b4d8b6af08845e3294055eb26b09913ef903c"},
+]
+
+[package.dependencies]
+anyio = ">=3.0,<5.0"
+certifi = "*"
+h11 = ">=0.13,<0.15"
+sniffio = "==1.*"
+
+[package.extras]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+
+[[package]]
+name = "httpx"
+version = "0.24.0"
+description = "The next generation HTTP client."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "httpx-0.24.0-py3-none-any.whl", hash = "sha256:447556b50c1921c351ea54b4fe79d91b724ed2b027462ab9a329465d147d5a4e"},
+ {file = "httpx-0.24.0.tar.gz", hash = "sha256:507d676fc3e26110d41df7d35ebd8b3b8585052450f4097401c9be59d928c63e"},
+]
+
+[package.dependencies]
+certifi = "*"
+httpcore = ">=0.15.0,<0.18.0"
+idna = "*"
+sniffio = "*"
+
+[package.extras]
+brotli = ["brotli", "brotlicffi"]
+cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
+http2 = ["h2 (>=3,<5)"]
+socks = ["socksio (==1.*)"]
+
+[[package]]
+name = "huggingface-hub"
+version = "0.16.4"
+description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+ {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"},
+ {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"},
+]
+
+[package.dependencies]
+filelock = "*"
+fsspec = "*"
+packaging = ">=20.9"
+pyyaml = ">=5.1"
+requests = "*"
+tqdm = ">=4.42.1"
+typing-extensions = ">=3.7.4.3"
+
+[package.extras]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+cli = ["InquirerPy (==0.3.4)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
+quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"]
+tensorflow = ["graphviz", "pydot", "tensorflow"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "gradio", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"]
+torch = ["torch"]
+typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+
+[[package]]
+name = "idna"
+version = "3.4"
+description = "Internationalized Domain Names in Applications (IDNA)"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
+ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.0.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
+ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
+]
+
+[[package]]
+name = "isort"
+version = "5.12.0"
+description = "A Python utility / library to sort Python imports."
+optional = false
+python-versions = ">=3.8.0"
+files = [
+ {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
+ {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
+]
+
+[package.extras]
+colors = ["colorama (>=0.4.3)"]
+pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
+plugins = ["setuptools"]
+requirements-deprecated-finder = ["pip-api", "pipreqs"]
+
+[[package]]
+name = "mccabe"
+version = "0.7.0"
+description = "McCabe checker, plugin for flake8"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
+ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+]
+
+[[package]]
+name = "mypy"
+version = "1.3.0"
+description = "Optional static typing for Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"},
+ {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"},
+ {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"},
+ {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"},
+ {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"},
+ {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"},
+ {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"},
+ {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"},
+ {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"},
+ {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"},
+ {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"},
+ {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"},
+ {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"},
+ {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"},
+ {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"},
+ {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"},
+ {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"},
+ {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"},
+ {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"},
+ {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"},
+ {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"},
+ {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"},
+ {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"},
+ {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"},
+ {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"},
+ {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"},
+]
+
+[package.dependencies]
+mypy-extensions = ">=1.0.0"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = ">=3.10"
+
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+install-types = ["pip"]
+python2 = ["typed-ast (>=1.4.0,<2)"]
+reports = ["lxml"]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
+[[package]]
+name = "packaging"
+version = "23.1"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
+ {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
+]
+
+[[package]]
+name = "pathspec"
+version = "0.11.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"},
+ {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"},
+]
+
+[[package]]
+name = "platformdirs"
+version = "3.5.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "platformdirs-3.5.0-py3-none-any.whl", hash = "sha256:47692bc24c1958e8b0f13dd727307cff1db103fca36399f457da8e05f222fdc4"},
+ {file = "platformdirs-3.5.0.tar.gz", hash = "sha256:7954a68d0ba23558d753f73437c55f89027cf8f5108c19844d4b82e5af396335"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
+
+[[package]]
+name = "pluggy"
+version = "1.0.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
+ {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
+[[package]]
+name = "pycodestyle"
+version = "2.9.1"
+description = "Python style guide checker"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"},
+ {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"},
+]
+
+[[package]]
+name = "pyflakes"
+version = "2.5.0"
+description = "passive checker of Python programs"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"},
+ {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"},
+]
+
+[[package]]
+name = "pytest"
+version = "7.2.2"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"},
+ {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"},
+]
+
+[package.dependencies]
+attrs = ">=19.2.0"
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
+
+[[package]]
+name = "pytest-asyncio"
+version = "0.21.0"
+description = "Pytest support for asyncio"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "pytest-asyncio-0.21.0.tar.gz", hash = "sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b"},
+ {file = "pytest_asyncio-0.21.0-py3-none-any.whl", hash = "sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"},
+]
+
+[package.dependencies]
+pytest = ">=7.0.0"
+
+[package.extras]
+docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
+testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"]
+
+[[package]]
+name = "pyyaml"
+version = "6.0"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
+ {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
+ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
+ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
+ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
+ {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
+ {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
+ {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
+ {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
+ {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
+ {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
+ {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
+ {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
+ {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
+ {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
+ {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
+ {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
+ {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
+ {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
+ {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
+ {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
+ {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
+ {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
+ {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
+ {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
+ {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
+ {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
+ {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
+ {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
+ {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
+ {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
+ {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
+ {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
+ {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
+ {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
+ {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
+ {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
+ {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
+ {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
+ {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
+]
+
+[[package]]
+name = "requests"
+version = "2.30.0"
+description = "Python HTTP for Humans."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "requests-2.30.0-py3-none-any.whl", hash = "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294"},
+ {file = "requests-2.30.0.tar.gz", hash = "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4"},
+]
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<3"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+
+[[package]]
+name = "sniffio"
+version = "1.3.0"
+description = "Sniff out which async library your code is running under"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
+ {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
+]
+
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+
+[[package]]
+name = "tqdm"
+version = "4.65.0"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"},
+ {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["py-make (>=0.1.0)", "twine", "wheel"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
+[[package]]
+name = "typing-extensions"
+version = "4.5.0"
+description = "Backported and Experimental Type Hints for Python 3.7+"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"},
+ {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"},
+]
+
+[[package]]
+name = "urllib3"
+version = "1.26.16"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+files = [
+ {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"},
+ {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"},
+]
+
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
+secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
+socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
+
+[[package]]
+name = "websockets"
+version = "11.0.3"
+description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"},
+ {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"},
+ {file = "websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d27a4832cc1a0ee07cdcf2b0629a8a72db73f4cf6de6f0904f6661227f256f"},
+ {file = "websockets-11.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffd7dcaf744f25f82190856bc26ed81721508fc5cbf2a330751e135ff1283564"},
+ {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7622a89d696fc87af8e8d280d9b421db5133ef5b29d3f7a1ce9f1a7bf7fcfa11"},
+ {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bceab846bac555aff6427d060f2fcfff71042dba6f5fca7dc4f75cac815e57ca"},
+ {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54c6e5b3d3a8936a4ab6870d46bdd6ec500ad62bde9e44462c32d18f1e9a8e54"},
+ {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:41f696ba95cd92dc047e46b41b26dd24518384749ed0d99bea0a941ca87404c4"},
+ {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:86d2a77fd490ae3ff6fae1c6ceaecad063d3cc2320b44377efdde79880e11526"},
+ {file = "websockets-11.0.3-cp310-cp310-win32.whl", hash = "sha256:2d903ad4419f5b472de90cd2d40384573b25da71e33519a67797de17ef849b69"},
+ {file = "websockets-11.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:1d2256283fa4b7f4c7d7d3e84dc2ece74d341bce57d5b9bf385df109c2a1a82f"},
+ {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e848f46a58b9fcf3d06061d17be388caf70ea5b8cc3466251963c8345e13f7eb"},
+ {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa5003845cdd21ac0dc6c9bf661c5beddd01116f6eb9eb3c8e272353d45b3288"},
+ {file = "websockets-11.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b58cbf0697721120866820b89f93659abc31c1e876bf20d0b3d03cef14faf84d"},
+ {file = "websockets-11.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:660e2d9068d2bedc0912af508f30bbeb505bbbf9774d98def45f68278cea20d3"},
+ {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1f0524f203e3bd35149f12157438f406eff2e4fb30f71221c8a5eceb3617b6b"},
+ {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:def07915168ac8f7853812cc593c71185a16216e9e4fa886358a17ed0fd9fcf6"},
+ {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b30c6590146e53149f04e85a6e4fcae068df4289e31e4aee1fdf56a0dead8f97"},
+ {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:619d9f06372b3a42bc29d0cd0354c9bb9fb39c2cbc1a9c5025b4538738dbffaf"},
+ {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:01f5567d9cf6f502d655151645d4e8b72b453413d3819d2b6f1185abc23e82dd"},
+ {file = "websockets-11.0.3-cp311-cp311-win32.whl", hash = "sha256:e1459677e5d12be8bbc7584c35b992eea142911a6236a3278b9b5ce3326f282c"},
+ {file = "websockets-11.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:e7837cb169eca3b3ae94cc5787c4fed99eef74c0ab9506756eea335e0d6f3ed8"},
+ {file = "websockets-11.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f59a3c656fef341a99e3d63189852be7084c0e54b75734cde571182c087b152"},
+ {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2529338a6ff0eb0b50c7be33dc3d0e456381157a31eefc561771ee431134a97f"},
+ {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34fd59a4ac42dff6d4681d8843217137f6bc85ed29722f2f7222bd619d15e95b"},
+ {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:332d126167ddddec94597c2365537baf9ff62dfcc9db4266f263d455f2f031cb"},
+ {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6505c1b31274723ccaf5f515c1824a4ad2f0d191cec942666b3d0f3aa4cb4007"},
+ {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f467ba0050b7de85016b43f5a22b46383ef004c4f672148a8abf32bc999a87f0"},
+ {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9d9acd80072abcc98bd2c86c3c9cd4ac2347b5a5a0cae7ed5c0ee5675f86d9af"},
+ {file = "websockets-11.0.3-cp37-cp37m-win32.whl", hash = "sha256:e590228200fcfc7e9109509e4d9125eace2042fd52b595dd22bbc34bb282307f"},
+ {file = "websockets-11.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b16fff62b45eccb9c7abb18e60e7e446998093cdcb50fed33134b9b6878836de"},
+ {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fb06eea71a00a7af0ae6aefbb932fb8a7df3cb390cc217d51a9ad7343de1b8d0"},
+ {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8a34e13a62a59c871064dfd8ffb150867e54291e46d4a7cf11d02c94a5275bae"},
+ {file = "websockets-11.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4841ed00f1026dfbced6fca7d963c4e7043aa832648671b5138008dc5a8f6d99"},
+ {file = "websockets-11.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a073fc9ab1c8aff37c99f11f1641e16da517770e31a37265d2755282a5d28aa"},
+ {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68b977f21ce443d6d378dbd5ca38621755f2063d6fdb3335bda981d552cfff86"},
+ {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a99a7a71631f0efe727c10edfba09ea6bee4166a6f9c19aafb6c0b5917d09c"},
+ {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bee9fcb41db2a23bed96c6b6ead6489702c12334ea20a297aa095ce6d31370d0"},
+ {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4b253869ea05a5a073ebfdcb5cb3b0266a57c3764cf6fe114e4cd90f4bfa5f5e"},
+ {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1553cb82942b2a74dd9b15a018dce645d4e68674de2ca31ff13ebc2d9f283788"},
+ {file = "websockets-11.0.3-cp38-cp38-win32.whl", hash = "sha256:f61bdb1df43dc9c131791fbc2355535f9024b9a04398d3bd0684fc16ab07df74"},
+ {file = "websockets-11.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:03aae4edc0b1c68498f41a6772d80ac7c1e33c06c6ffa2ac1c27a07653e79d6f"},
+ {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:777354ee16f02f643a4c7f2b3eff8027a33c9861edc691a2003531f5da4f6bc8"},
+ {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c82f11964f010053e13daafdc7154ce7385ecc538989a354ccc7067fd7028fd"},
+ {file = "websockets-11.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3580dd9c1ad0701169e4d6fc41e878ffe05e6bdcaf3c412f9d559389d0c9e016"},
+ {file = "websockets-11.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f1a3f10f836fab6ca6efa97bb952300b20ae56b409414ca85bff2ad241d2a61"},
+ {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df41b9bc27c2c25b486bae7cf42fccdc52ff181c8c387bfd026624a491c2671b"},
+ {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279e5de4671e79a9ac877427f4ac4ce93751b8823f276b681d04b2156713b9dd"},
+ {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1fdf26fa8a6a592f8f9235285b8affa72748dc12e964a5518c6c5e8f916716f7"},
+ {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:69269f3a0b472e91125b503d3c0b3566bda26da0a3261c49f0027eb6075086d1"},
+ {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:97b52894d948d2f6ea480171a27122d77af14ced35f62e5c892ca2fae9344311"},
+ {file = "websockets-11.0.3-cp39-cp39-win32.whl", hash = "sha256:c7f3cb904cce8e1be667c7e6fef4516b98d1a6a0635a58a57528d577ac18a128"},
+ {file = "websockets-11.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c792ea4eabc0159535608fc5658a74d1a81020eb35195dd63214dcf07556f67e"},
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e58f2c36cc52d41f2659e4c0cbf7353e28c8c9e63e30d8c6d3494dc9fdedcf"},
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de36fe9c02995c7e6ae6efe2e205816f5f00c22fd1fbf343d4d18c3d5ceac2f5"},
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ac56b661e60edd453585f4bd68eb6a29ae25b5184fd5ba51e97652580458998"},
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e052b8467dd07d4943936009f46ae5ce7b908ddcac3fda581656b1b19c083d9b"},
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:42cc5452a54a8e46a032521d7365da775823e21bfba2895fb7b77633cce031bb"},
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e6316827e3e79b7b8e7d8e3b08f4e331af91a48e794d5d8b099928b6f0b85f20"},
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8531fdcad636d82c517b26a448dcfe62f720e1922b33c81ce695d0edb91eb931"},
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c114e8da9b475739dde229fd3bc6b05a6537a88a578358bc8eb29b4030fac9c9"},
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e063b1865974611313a3849d43f2c3f5368093691349cf3c7c8f8f75ad7cb280"},
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:92b2065d642bf8c0a82d59e59053dd2fdde64d4ed44efe4870fa816c1232647b"},
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0ee68fe502f9031f19d495dae2c268830df2760c0524cbac5d759921ba8c8e82"},
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcacf2c7a6c3a84e720d1bb2b543c675bf6c40e460300b628bab1b1efc7c034c"},
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b67c6f5e5a401fc56394f191f00f9b3811fe843ee93f4a70df3c389d1adf857d"},
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5023a4b6a5b183dc838808087033ec5df77580485fc533e7dab2567851b0a4"},
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ed058398f55163a79bb9f06a90ef9ccc063b204bb346c4de78efc5d15abfe602"},
+ {file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"},
+ {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"},
+]
+
+[metadata]
+lock-version = "2.0"
+python-versions = "^3.8"
+content-hash = "80634bedd72b53e96d00fe6cbad0d9bfbbdda1e017c24f19d6de41d046f566c7"
diff --git a/client/poetry.toml b/client/poetry.toml
new file mode 100644
index 0000000000000000000000000000000000000000..975eeee69606d1db8e9fb3bd4807215461b56fbe
--- /dev/null
+++ b/client/poetry.toml
@@ -0,0 +1 @@
+virtualenvs.in-project = true
diff --git a/client/pyproject.toml b/client/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..c039f82109b6d48b394e4348471e8d1b4caf587d
--- /dev/null
+++ b/client/pyproject.toml
@@ -0,0 +1,41 @@
+[tool.poetry]
+name = "h2ogpt-client"
+version = "0.1.0"
+description = ""
+authors = []
+readme = "README.md"
+include = ["h2ogpt_client/_h2ogpt*"]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+gradio-client = "^0.6.1"
+
+[tool.poetry.group.test.dependencies]
+pytest = "7.2.2"
+pytest-asyncio = "^0.21.0"
+
+[tool.poetry.group.dev.dependencies]
+mypy = "^1.3.0"
+black = "^23.3.0"
+flake8 = "5.0.4"
+isort = "^5.12.0"
+flake8-pyproject = "^1.2.3"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.isort]
+profile = "black"
+py_version = "auto"
+
+[tool.flake8]
+max-line-length = 88
+
+[tool.mypy]
+python_version = "3.8"
+
+[tool.pytest.ini_options]
+pythonpath = "h2ogpt_client"
+log_cli = true
+log_cli_level = "INFO"
diff --git a/client/tests/__init__.py b/client/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/client/tests/conftest.py b/client/tests/conftest.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b497e3c7aa4c044a93e237e4b738027e4d962ed
--- /dev/null
+++ b/client/tests/conftest.py
@@ -0,0 +1,57 @@
+import importlib.util
+import logging
+import os
+import sys
+from pathlib import Path
+from types import ModuleType
+
+import pytest
+
+LOGGER = logging.getLogger(__name__)
+
+
+@pytest.fixture(scope="module")
+def server_url():
+ server_url = os.getenv("H2OGPT_SERVER")
+ if not server_url:
+ LOGGER.info("Couldn't find a running h2oGPT server. Hence starting a one.")
+
+ generate = _import_module_from_h2ogpt("generate.py")
+ generate.main(
+ base_model="h2oai/h2ogpt-oig-oasst1-512-6_9b",
+ prompt_type="human_bot",
+ chat=False,
+ stream_output=False,
+ gradio=True,
+ num_beams=1,
+ block_gradio_exit=False,
+ )
+ server_url = "http://0.0.0.0:7860" # assume server started
+ LOGGER.info(f"h2oGPT server started at '{server_url}'.")
+ return server_url
+
+
+@pytest.fixture(scope="module")
+def h2ogpt_key():
+ return os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
+
+
+@pytest.fixture(scope="module")
+def eval_func_param_names():
+ parameters = _import_module_from_h2ogpt("src/evaluate_params.py")
+ return parameters.eval_func_param_names
+
+
+def _import_module_from_h2ogpt(file_name: str) -> ModuleType:
+ h2ogpt_dir = Path(__file__).parent.parent.parent
+ file_path = (h2ogpt_dir / file_name).absolute()
+ module_name = file_path.stem
+
+ LOGGER.info(f"Loading module '{module_name}' from '{file_path}'.")
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
+ if not spec:
+ raise Exception(f"Couldn't load module '{module_name}' from '{file_path}'.")
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[module_name] = module
+ spec.loader.exec_module(module) # type: ignore
+ return module
diff --git a/client/tests/test_client.py b/client/tests/test_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..2173aac29066b1b4db8a7c92d5b96d719fc4cd5b
--- /dev/null
+++ b/client/tests/test_client.py
@@ -0,0 +1,156 @@
+import platform
+
+import pytest
+
+from h2ogpt_client import Client
+
+platform.python_version()
+
+
+@pytest.fixture
+def client(server_url, h2ogpt_key) -> Client:
+ return Client(server_url, h2ogpt_key=h2ogpt_key)
+
+
+def _create_text_completion(client):
+ model = client.models.list()[-1]
+ return client.text_completion.create(model=model)
+
+
+@pytest.mark.asyncio
+async def test_text_completion(client):
+ text_completion = _create_text_completion(client)
+ response = await text_completion.complete(prompt="Hello world")
+ assert response
+ print(response)
+
+
+@pytest.mark.asyncio
+async def test_text_completion_stream(client):
+ text_completion = _create_text_completion(client)
+ response = await text_completion.complete(
+ prompt="Write a poem about the Amazon rainforest. End it with an emoji.",
+ enable_streaming=True,
+ )
+ async for token in response:
+ assert token
+ print(token, end="")
+
+
+def test_text_completion_sync(client):
+ text_completion = _create_text_completion(client)
+ response = text_completion.complete_sync(prompt="Hello world")
+ assert response
+ print(response)
+
+
+def test_text_completion_sync_stream(client):
+ text_completion = _create_text_completion(client)
+ response = text_completion.complete_sync(
+ prompt="Write a poem about the Amazon rainforest. End it with an emoji.",
+ enable_streaming=True,
+ )
+ for token in response:
+ assert token
+ print(token, end="")
+
+
+def _create_chat_completion(client):
+ model = client.models.list()[-1]
+ return client.chat_completion.create(model=model)
+
+
+@pytest.mark.asyncio
+async def test_chat_completion(client):
+ chat_completion = _create_chat_completion(client)
+
+ chat1 = await chat_completion.chat(prompt="Hey!")
+ assert chat1["user"] == "Hey!"
+ assert chat1["gpt"]
+
+ chat2 = await chat_completion.chat(prompt="What is the capital of USA?")
+ assert chat2["user"] == "What is the capital of USA?"
+ assert chat2["gpt"]
+
+ chat3 = await chat_completion.chat(prompt="What is the population in there?")
+ assert chat3["user"] == "What is the population in there?"
+ assert chat3["gpt"]
+
+ chat_history = chat_completion.chat_history()
+ assert chat_history == [chat1, chat2, chat3]
+ print(chat_history)
+
+
+def test_chat_completion_sync(client):
+ chat_completion = _create_chat_completion(client)
+
+ chat1 = chat_completion.chat_sync(prompt="What is UNESCO?")
+ assert chat1["user"] == "What is UNESCO?"
+ assert chat1["gpt"]
+
+ chat2 = chat_completion.chat_sync(prompt="Is it a part of the UN?")
+ assert chat2["user"] == "Is it a part of the UN?"
+ assert chat2["gpt"]
+
+ chat3 = chat_completion.chat_sync(prompt="Where is the headquarters?")
+ assert chat3["user"] == "Where is the headquarters?"
+ assert chat3["gpt"]
+
+ chat_history = chat_completion.chat_history()
+ assert chat_history == [chat1, chat2, chat3]
+ print(chat_history)
+
+
+def test_available_models(client):
+ models = client.models.list()
+ assert len(models)
+ print(models)
+
+
+def test_server_properties(client, server_url):
+ assert client.server.address.startswith(server_url)
+ assert client.server.hash
+
+
+def test_parameters_order(client, eval_func_param_names):
+ text_completion = client.text_completion.create()
+ assert eval_func_param_names == list(text_completion._parameters.keys())
+ chat_completion = client.chat_completion.create()
+ assert eval_func_param_names == list(chat_completion._parameters.keys())
+
+
+@pytest.mark.parametrize("local_server", [True, False])
+def test_readme_example(local_server):
+ # self-contained example used for readme,
+ # to be copied to client/README.md if changed, setting local_server = True at first
+ import asyncio
+ import os
+
+ from h2ogpt_client import Client
+
+ if local_server:
+ client = Client("http://0.0.0.0:7860")
+ else:
+ h2ogpt_key = os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
+ if h2ogpt_key is None:
+ return
+ # if you have API key for public instance:
+ client = Client("https://gpt.h2o.ai", h2ogpt_key=h2ogpt_key)
+
+ # Text completion
+ text_completion = client.text_completion.create()
+ response = asyncio.run(text_completion.complete("Hello world"))
+ print("asyncio text completion response: %s" % response)
+ # Text completion: synchronous
+ response = text_completion.complete_sync("Hello world")
+ print("sync text completion response: %s" % response)
+
+ # Chat completion
+ chat_completion = client.chat_completion.create()
+ reply = asyncio.run(chat_completion.chat("Hey!"))
+ print("asyncio text completion user: %s gpt: %s" % (reply["user"], reply["gpt"]))
+ chat_history = chat_completion.chat_history()
+ print("chat_history: %s" % chat_history)
+ # Chat completion: synchronous
+ reply = chat_completion.chat_sync("Hey!")
+ print("sync chat completion gpt: %s" % reply["gpt"])
diff --git a/cloud/packer/Jenkinsfile b/cloud/packer/Jenkinsfile
new file mode 100644
index 0000000000000000000000000000000000000000..a71ab8f75cfc3d9445604abd468709f478a17aa8
--- /dev/null
+++ b/cloud/packer/Jenkinsfile
@@ -0,0 +1,80 @@
+import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
+
+properties(
+ [
+ parameters(
+ [
+ string(name: 'BRANCH_TAG', defaultValue: 'origin/main'),
+ booleanParam(name: 'AZURE', defaultValue: true, description: 'Make Azure Machine Image/Not?'),
+ booleanParam(name: 'GCP', defaultValue: true, description: 'Make GCP Image/Not?'),
+ string(name: 'H2OGPT_VERSION', defaultValue: "010", description: 'Example: for version 1.10.5 use 1105')
+ ]
+ )
+ ]
+)
+
+node('linux && docker') {
+ stage('Init') {
+ cleanWs()
+ currentBuild.displayName = "#${BUILD_NUMBER} - Rel:${H2OGPT_VERSION}"
+ checkout scm
+ sh('ls -al')
+ }
+
+ stage('Build Images') {
+ try {
+ docker.image('harbor.h2o.ai/opsh2oai/h2oai-packer-build:2').inside {
+ parallel([
+ "GCP Ubuntu 20.04": {
+ withCredentials([file(credentialsId: 'GCP_MARKETPLACE_SERVICE_ACCOUNT', variable: 'GCP_ACCOUNT_FILE')]) {
+ dir('cloud/packer') {
+ if (params.GCP) {
+ sh("packer build \
+ --force \
+ -var 'project_id=h2o-gce' \
+ -var 'account_file=$GCP_ACCOUNT_FILE' \
+ -var 'h2ogpt_version=${H2OGPT_VERSION}' \
+ -var 'branch_tag=${BRANCH_TAG}' \
+ h2ogpt-gcp.json"
+ )
+ archiveArtifacts artifacts: '*-image-info.json'
+ }else {
+ Utils.markStageSkippedForConditional('GCP Ubuntu 20.04')
+ }
+ }
+ }
+ },
+
+ "AZURE Ubuntu 20.04": {
+ withCredentials([string(credentialsId: "AZURE_MARKETPLACE_CLIENT_ID", variable: "AZURE_CLIENT_ID"),
+ string(credentialsId: "AZURE_MARKETPLACE_CLIENT_SECRET", variable: "AZURE_CLIENT_SECRET"),
+ string(credentialsId: "AZURE_MARKETPLACE_SUBSCRIPTION_ID", variable: "AZURE_SUBSCRIPTION_ID"),
+ string(credentialsId: "AZURE_MARKETPLACE_TENANT_ID", variable: "AZURE_TENANT_ID")]) {
+ dir('cloud/packer') {
+ if (params.AZURE) {
+ sh("packer build \
+ --force \
+ -var 'client_id=$AZURE_CLIENT_ID' \
+ -var 'client_secret=$AZURE_CLIENT_SECRET' \
+ -var 'managed_image_resource_group_name=H2OIMAGES' \
+ -var 'subscription_id=$AZURE_SUBSCRIPTION_ID' \
+ -var 'tenant_id=$AZURE_TENANT_ID' \
+ -var 'h2ogpt_version=${H2OGPT_VERSION}' \
+ -var 'branch_tag=${BRANCH_TAG}' \
+ h2ogpt-azure.json"
+ )
+ archiveArtifacts artifacts: '*-image-info.json'
+ }else {
+ Utils.markStageSkippedForConditional('AZURE Ubuntu 20.04')
+ }
+ }
+ }
+ },
+
+ ])
+ }
+ } finally {
+ cleanWs()
+ }
+ }
+}
diff --git a/cloud/packer/README.md b/cloud/packer/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..866283fe0e00b2e82ea8a422e6bdccf4e7ba4c6d
--- /dev/null
+++ b/cloud/packer/README.md
@@ -0,0 +1,22 @@
+# h2oGPT Packer Templates
+
+These scripts help create images in public clouds that can then submitted to Azure/GCP Marketplace for commercial use.
+
+### Packer Scripts
+- Azure - `h2ogpt-azure.json`
+- GCP - `h2ogpt-gcp.json`
+
+### Provisioning Scripts
+ - `setup_environment.sh`
+ - Responsible for setting up CUDA, GCC, Nginx, Python
+- `install_h2ogpt.sh`
+ - Responsible for setting up h2oGPT with its dependencies
+- `h2oai-h2ogpt-4096-llama2-13b-chat.sh`
+ - Responsible for setting up default model h2oai-h2ogpt-4096-llama2-13b-chat with vLLM in port 80 via Nginx
+ - vLLM, h2oGPT and Nginx are executed through services
+ - Model is downloaded at the runtime
+
+__Jenkins Pipeline__: http://jenkins.h2o.local:8080/job/build-h2ogpt-cloud-images/
+
+### Notes:
+ - Since model is downloaded at the runtime after VM is provisioned it takes around 5 - 10 min start h2oGPT correctly
diff --git a/cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh b/cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d9dd6bf3fd6669643fdd9b4672af71fb773a7435
--- /dev/null
+++ b/cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh
@@ -0,0 +1,11 @@
+#!/bin/bash -e
+
+sudo systemctl daemon-reload
+sudo systemctl enable h2ogpt_nginx.service
+sudo systemctl enable vllm.service
+sudo systemctl enable h2ogpt.service
+
+cd "$HOME"
+# sudo rm -rf "$HOME"/.cache/huggingface/hub/
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y autoremove
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y clean
diff --git a/cloud/packer/h2ogpt-azure.json b/cloud/packer/h2ogpt-azure.json
new file mode 100644
index 0000000000000000000000000000000000000000..6f516a5ba1c942b5017662f72b7d129d9c19b597
--- /dev/null
+++ b/cloud/packer/h2ogpt-azure.json
@@ -0,0 +1,123 @@
+{
+ "variables": {
+ "client_id": "",
+ "client_secret": "",
+ "subscription_id": "92429150-401a-431f-8955-e69c0c119e68",
+ "tenant_id": "840229f2-c911-49e6-a73d-5b3a4311835a",
+ "managed_image_resource_group_name": "H2OIMAGES",
+ "h2ogpt_version": "010",
+ "branch_tag": "main",
+ "base_model": "h2oai-h2ogpt-4096-llama2-13b-chat"
+ },
+ "builders": [
+ {
+ "type": "azure-arm",
+ "client_id": "{{user `client_id`}}",
+ "client_secret": "{{user `client_secret`}}",
+ "subscription_id": "{{user `subscription_id`}}",
+ "tenant_id": "{{user `tenant_id`}}",
+ "capture_container_name": "h2ovhdimages",
+ "capture_name_prefix": "h2ogpt-{{user `h2ogpt_version`}}",
+ "resource_group_name": "{{user `managed_image_resource_group_name`}}",
+ "temp_resource_group_name": "Engineering_DevOps_h2oGPT-Ubuntu",
+ "storage_account": "h2ovhdimages",
+ "os_type": "Linux",
+ "image_publisher": "Canonical",
+ "image_offer": "0001-com-ubuntu-server-focal",
+ "image_sku": "20_04-lts",
+ "os_disk_size_gb": 512,
+ "azure_tags": {
+ "dept": "Engineering",
+ "task": "Image deployment",
+ "Name": "H2OGPT-CLOUD-IMAGES",
+ "Owner": "ops@h2o.ai",
+ "Project": "DevOps",
+ "Department": "Engineering",
+ "Environment": "Dev",
+ "Scheduling": "self-managed"
+ },
+ "location": "East US",
+ "vm_size": "Standard_NC24s_v3",
+ "ssh_username": "ubuntu"
+ }
+ ],
+ "post-processors": [
+ {
+ "type": "manifest",
+ "output": "azure-ubuntu-image-info.json",
+ "strip_path": true,
+ "custom_data": {
+ "base_image": "AZURE Ubuntu 20.04",
+ "h2ogpt_version": "{{user `h2ogpt_version`}}"
+ }
+ }
+ ],
+ "provisioners": [
+ {
+ "type": "shell",
+ "script": "setup_environment.sh",
+ "pause_before": "10s",
+ "pause_after": "10s"
+ },
+ {
+ "type": "shell",
+ "inline": ["sudo reboot now"],
+ "pause_after": "10s",
+ "expect_disconnect": true
+ },
+ {
+ "type": "shell",
+ "environment_vars": ["BRANCH_TAG={{user `branch_tag`}}"],
+ "script": "install_h2ogpt.sh",
+ "pause_after": "10s"
+ },
+ {
+ "type": "shell",
+ "inline": [
+ "sudo chown -R ubuntu:ubuntu /etc/nginx/conf.d",
+ "sudo chown -R ubuntu:ubuntu /etc/systemd/system/"
+ ],
+ "pause_before": "10s"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/run_nginx.sh",
+ "destination": "/workspace/run_nginx.sh"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/run_vllm.sh",
+ "destination": "/workspace/run_vllm.sh"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/run_h2ogpt.sh",
+ "destination": "/workspace/run_h2ogpt.sh"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/h2ogpt_nginx.service",
+ "destination": "/etc/systemd/system/h2ogpt_nginx.service"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/vllm.service",
+ "destination": "/etc/systemd/system/vllm.service"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/h2ogpt.service",
+ "destination": "/etc/systemd/system/h2ogpt.service"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/temp.conf",
+ "destination": "/workspace/temp.conf"
+ },
+ {
+ "type": "shell",
+ "script": "{{user `base_model`}}.sh",
+ "pause_after": "10s"
+ }
+ ]
+}
diff --git a/cloud/packer/h2ogpt-gcp.json b/cloud/packer/h2ogpt-gcp.json
new file mode 100644
index 0000000000000000000000000000000000000000..63321892b10458a27073d6bd823be070968e295f
--- /dev/null
+++ b/cloud/packer/h2ogpt-gcp.json
@@ -0,0 +1,107 @@
+{
+ "variables": {
+ "project_id": "eng-llm",
+ "account_file": "",
+ "h2ogpt_version": "010",
+ "branch_tag": "main",
+ "base_model": "h2oai-h2ogpt-4096-llama2-13b-chat"
+ },
+ "builders": [
+ {
+ "type": "googlecompute",
+ "project_id": "{{user `project_id`}}",
+ "account_file": "{{user `account_file`}}",
+ "machine_type": "n1-standard-8",
+ "on_host_maintenance": "TERMINATE",
+ "accelerator_type": "projects/{{user `project_id`}}/zones/us-west1-b/acceleratorTypes/nvidia-tesla-t4",
+ "accelerator_count": "4",
+ "source_image_family": "ubuntu-2004-lts",
+ "zone": "us-west1-b",
+ "image_description": "h2ogpt using Packer",
+ "image_name": "h2ogpt-{{user `h2ogpt_version`}}",
+ "disk_size": 512,
+ "disk_type": "pd-ssd",
+ "ssh_username": "ubuntu",
+ "tags": ["h2ogpt"]
+ }
+ ],
+ "post-processors": [
+ {
+ "type": "manifest",
+ "output": "gcp-image-info.json",
+ "strip_path": true,
+ "custom_data": {
+ "base_image": "GCP Ubuntu 20.04",
+ "h2ogpt_version": "{{user `h2ogpt_version`}}"
+ }
+ }
+ ],
+ "provisioners": [
+ {
+ "type": "shell",
+ "script": "setup_environment.sh",
+ "pause_before": "10s",
+ "pause_after": "10s"
+ },
+ {
+ "type": "shell",
+ "inline": ["sudo reboot now"],
+ "pause_after": "10s",
+ "expect_disconnect": true
+ },
+ {
+ "type": "shell",
+ "environment_vars": ["BRANCH_TAG={{user `branch_tag`}}"],
+ "script": "install_h2ogpt.sh",
+ "pause_after": "10s"
+ },
+ {
+ "type": "shell",
+ "inline": [
+ "sudo chown -R ubuntu:ubuntu /etc/nginx/conf.d",
+ "sudo chown -R ubuntu:ubuntu /etc/systemd/system/"
+ ],
+ "pause_before": "10s"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/run_nginx.sh",
+ "destination": "/workspace/run_nginx.sh"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/run_vllm.sh",
+ "destination": "/workspace/run_vllm.sh"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/run_h2ogpt.sh",
+ "destination": "/workspace/run_h2ogpt.sh"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/h2ogpt_nginx.service",
+ "destination": "/etc/systemd/system/h2ogpt_nginx.service"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/vllm.service",
+ "destination": "/etc/systemd/system/vllm.service"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/h2ogpt.service",
+ "destination": "/etc/systemd/system/h2ogpt.service"
+ },
+ {
+ "type": "file",
+ "source": "./startup-scripts/temp.conf",
+ "destination": "/workspace/temp.conf"
+ },
+ {
+ "type": "shell",
+ "script": "{{user `base_model`}}.sh",
+ "pause_after": "10s"
+ }
+ ]
+}
diff --git a/cloud/packer/install_h2ogpt.sh b/cloud/packer/install_h2ogpt.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2d2ed8b1b7629bf51cee70f3a61a30e5ed6d961c
--- /dev/null
+++ b/cloud/packer/install_h2ogpt.sh
@@ -0,0 +1,19 @@
+#!/bin/bash -e
+
+export PATH=$PATH:/home/ubuntu/.local/bin
+sudo mkdir -p /workspace && cd /workspace
+sudo chmod a+rwx .
+
+git config --global --add safe.directory /workspace
+git config --global advice.detachedHead false
+git clone https://github.com/h2oai/h2ogpt.git .
+
+if [ -z "$BRANCH_TAG" ]; then
+ echo "BRANCH_TAG environment variable is not set."
+ exit 1
+fi
+
+git checkout $BRANCH_TAG
+
+ls -la
+sudo ./docker_build_script_ubuntu.sh
diff --git a/cloud/packer/setup_environment.sh b/cloud/packer/setup_environment.sh
new file mode 100644
index 0000000000000000000000000000000000000000..997cf193f6944576e6a13bf3cef9b6ea4b5a5637
--- /dev/null
+++ b/cloud/packer/setup_environment.sh
@@ -0,0 +1,46 @@
+#!/bin/bash -e
+
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y update
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y --no-install-recommends install \
+ git \
+ software-properties-common \
+ pandoc \
+ curl \
+ apt-utils \
+ make \
+ build-essential \
+ wget \
+ gnupg2 \
+ ca-certificates \
+ lsb-release \
+ ubuntu-keyring
+
+curl https://nginx.org/keys/nginx_signing.key | gpg --dearmor | sudo tee /usr/share/keyrings/nginx-archive-keyring.gpg >/dev/null
+gpg --dry-run --quiet --no-keyring --import --import-options import-show /usr/share/keyrings/nginx-archive-keyring.gpg
+echo "deb [signed-by=/usr/share/keyrings/nginx-archive-keyring.gpg] http://nginx.org/packages/ubuntu `lsb_release -cs` nginx" sudo tee /etc/apt/sources.list.d/nginx.list
+echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nPin-Priority: 900\n" sudo tee /etc/apt/preferences.d/99nginx
+
+sudo DEBIAN_FRONTEND=noninteractive apt -y update
+sudo DEBIAN_FRONTEND=noninteractive apt -y install nginx
+
+MAX_GCC_VERSION=11
+sudo DEBIAN_FRONTEND=noninteractive add-apt-repository -y ppa:ubuntu-toolchain-r/test
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y install gcc-$MAX_GCC_VERSION g++-$MAX_GCC_VERSION
+
+sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$MAX_GCC_VERSION 100
+sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$MAX_GCC_VERSION 100
+sudo update-alternatives --set gcc /usr/bin/gcc-$MAX_GCC_VERSION
+sudo update-alternatives --set g++ /usr/bin/g++-$MAX_GCC_VERSION
+
+wget --quiet https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
+wget --quiet https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb
+sudo dpkg -i cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb
+sudo cp /var/cuda-repo-ubuntu2004-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y update
+sudo DEBIAN_FRONTEND=noninteractive apt-get -y install cuda
+sudo rm -rf "*.deb"
+
+sudo echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64/" >> ~/.bashrc
+sudo echo "export CUDA_HOME=/usr/local/cuda-11.8" >> ~/.bashrc
+sudo echo "export PATH=$PATH:/h2ogpt_conda/bin:/usr/local/cuda-11.8/bin/" >> ~/.bashrc
diff --git a/cloud/packer/startup-scripts/h2ogpt.service b/cloud/packer/startup-scripts/h2ogpt.service
new file mode 100644
index 0000000000000000000000000000000000000000..4308d989550d9643c2d4519dbb8437ef006947c0
--- /dev/null
+++ b/cloud/packer/startup-scripts/h2ogpt.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=h2oGPT Server
+After=network.target
+
+[Service]
+Type=simple
+User=ubuntu
+WorkingDirectory=/workspace
+ExecStart=/usr/bin/bash /workspace/run_h2ogpt.sh
+
+[Install]
+WantedBy=multi-user.target
diff --git a/cloud/packer/startup-scripts/h2ogpt_nginx.service b/cloud/packer/startup-scripts/h2ogpt_nginx.service
new file mode 100644
index 0000000000000000000000000000000000000000..7efff486a88c5e33b8bcd7ef3dd558cb9c7d604b
--- /dev/null
+++ b/cloud/packer/startup-scripts/h2ogpt_nginx.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=h2oGPT Nginx Server
+After=network.target
+
+[Service]
+Type=simple
+User=ubuntu
+WorkingDirectory=/workspace
+ExecStart=/usr/bin/bash /workspace/run_nginx.sh
+
+[Install]
+WantedBy=multi-user.target
diff --git a/cloud/packer/startup-scripts/run_h2ogpt.sh b/cloud/packer/startup-scripts/run_h2ogpt.sh
new file mode 100644
index 0000000000000000000000000000000000000000..735a6a6d3a4fef870776b407474bc5c4822bd766
--- /dev/null
+++ b/cloud/packer/startup-scripts/run_h2ogpt.sh
@@ -0,0 +1,26 @@
+#!/bin/bash -e
+
+while true; do
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "prompt": "San Francisco is a",
+ "max_tokens": 7,
+ "temperature": 0
+ }')
+
+ if [ "$http_code" -eq 200 ]; then
+ echo "Received HTTP 200 status code. Starting h2ogpt service"
+ CUDA_VISIBLE_DEVICES=$(seq -s, $(($(nvidia-smi -L | wc -l) / 2)) $(($(nvidia-smi -L | wc -l) - 1))) /h2ogpt_conda/bin/python3.10 \
+ /workspace/generate.py \
+ --inference_server="vllm:0.0.0.0:5000" \
+ --base_model=h2oai/h2ogpt-4096-llama2-13b-chat \
+ --langchain_mode=UserData
+ break
+ else
+ echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
+ sleep 5
+ fi
+done
+
diff --git a/cloud/packer/startup-scripts/run_nginx.sh b/cloud/packer/startup-scripts/run_nginx.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e1f253914702cf3b8f05b15cf0bf8d3538e6a1f9
--- /dev/null
+++ b/cloud/packer/startup-scripts/run_nginx.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+while true; do
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "h2oai/h2ogpt-4096-llama2-13b-chat",
+ "prompt": "San Francisco is a",
+ "max_tokens": 7,
+ "temperature": 0
+ }')
+
+ if [ "$http_code" -eq 200 ]; then
+ echo "Received HTTP 200 status code. Restarting Nginx for h2oGPT"
+ ip=$(dig +short myip.opendns.com @resolver1.opendns.com)
+ sed "s/<|_SUBST_PUBLIC_IP|>;/$ip;/g" /workspace/temp.conf > /etc/nginx/conf.d/h2ogpt.conf
+ sudo systemctl restart nginx.service
+ break
+ else
+ echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
+ sleep 5
+ fi
+done
diff --git a/cloud/packer/startup-scripts/run_vllm.sh b/cloud/packer/startup-scripts/run_vllm.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3ddbebd7d57b743cc25b43044ebd0ab446b42042
--- /dev/null
+++ b/cloud/packer/startup-scripts/run_vllm.sh
@@ -0,0 +1,10 @@
+#!/bin/bash -e
+
+tps=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l | awk '{if ($1 > 1) print int($1/2); else print 1}')
+NCCL_IGNORE_DISABLED_P2P=1 CUDA_VISIBLE_DEVICES=$(seq -s, 0 $(($(nvidia-smi -L | wc -l) > 1 ? $(nvidia-smi -L | wc -l) / 2 - 1 : 0))) \
+/h2ogpt_conda/vllm_env/bin/python3.10 -m vllm.entrypoints.openai.api_server \
+ --port=5000 \
+ --host=0.0.0.0 \
+ --model h2oai/h2ogpt-4096-llama2-13b-chat \
+ --tokenizer=hf-internal-testing/llama-tokenizer \
+ --tensor-parallel-size=$tps --seed 1234
diff --git a/cloud/packer/startup-scripts/temp.conf b/cloud/packer/startup-scripts/temp.conf
new file mode 100644
index 0000000000000000000000000000000000000000..563cfd759010440031c6adb34cef668a1216c8f0
--- /dev/null
+++ b/cloud/packer/startup-scripts/temp.conf
@@ -0,0 +1,14 @@
+server {
+ listen 80;
+ listen [::]:80;
+ server_name <|_SUBST_PUBLIC_IP|>; # Change this to your domain name
+
+ location / { # Change this if you'd like to server your Gradio app on a different path
+ proxy_pass http://0.0.0.0:7860/; # Change this if your Gradio app will be running on a different port
+ proxy_redirect off;
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+ proxy_set_header Host $host;
+ }
+}
diff --git a/cloud/packer/startup-scripts/vllm.service b/cloud/packer/startup-scripts/vllm.service
new file mode 100644
index 0000000000000000000000000000000000000000..c516098dab1e563d863e04d28901a67aad2033f3
--- /dev/null
+++ b/cloud/packer/startup-scripts/vllm.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=vLLM Server
+After=network.target
+
+[Service]
+Type=simple
+User=ubuntu
+WorkingDirectory=/workspace
+ExecStart=/usr/bin/bash /workspace/run_vllm.sh
+
+[Install]
+WantedBy=multi-user.target
diff --git a/data/NGSL_1.2_stats.csv.zip b/data/NGSL_1.2_stats.csv.zip
new file mode 100644
index 0000000000000000000000000000000000000000..bb919a514664f3e89b07c4e05e91e56ca6941eed
--- /dev/null
+++ b/data/NGSL_1.2_stats.csv.zip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34993377b20347d5c8837a1101bde4e403232f5f08c80f9441e16ac7a23228a7
+size 25168
diff --git a/data/README-template.md b/data/README-template.md
new file mode 100644
index 0000000000000000000000000000000000000000..98f33b1c4331835f8b4d15295ee837fd2aa13526
--- /dev/null
+++ b/data/README-template.md
@@ -0,0 +1,23 @@
+---
+license: apache-2.0
+language:
+- en
+thumbnail: https://h2o.ai/etc.clientlibs/h2o/clientlibs/clientlib-site/resources/images/favicon.ico
+tags:
+- gpt
+- llm
+- large language model
+- open-source
+---
+# h2oGPT Data Card
+## Summary
+
+H2O.ai's `<>` is an open-source instruct-type dataset for fine-tuning of large language models, licensed for commercial use.
+
+- Number of rows: `<>`
+- Number of columns: `<>`
+- Column names: `<>`
+
+## Source
+
+<>
diff --git a/data/Sample-Invoice-printable.png b/data/Sample-Invoice-printable.png
new file mode 100644
index 0000000000000000000000000000000000000000..5ba4d7a562d24053f6036ad43833ccb8bd16474d
Binary files /dev/null and b/data/Sample-Invoice-printable.png differ
diff --git a/data/censor_words.txt b/data/censor_words.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a5451a2f0fa0fa354200741d33fef7c7fd2bf49
--- /dev/null
+++ b/data/censor_words.txt
@@ -0,0 +1,10 @@
+fuck-buddy
+fuck-buddys
+clusterfuck
+fuckup
+fuckups
+dumbfuck
+dumbfucks
+mindfuck
+*fucking
+fuckin'
diff --git a/data/config.json b/data/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2876f929cf982a2e9068368bb57339c49f136886
--- /dev/null
+++ b/data/config.json
@@ -0,0 +1,57630 @@
+[
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does max_runtime_minutes do? : max runtime minutes config.toml: If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain max_runtime_minutes. : max runtime minutes config.toml: If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): . : Set the max runtime minutes config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_runtime_minutes",
+ "output": "max runtime minutes config.toml: If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_runtime_minutes",
+ "output": "max runtime minutes config.toml: Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max runtime minutes",
+ "output": "max runtime minutes config.toml: Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): ",
+ "output": "max runtime minutes config.toml: Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting max_runtime_minutes",
+ "output": "max runtime minutes config.toml: If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting max_runtime_minutes",
+ "output": "max runtime minutes config.toml: Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does min_auto_runtime_minutes do? : min auto runtime minutes config.toml: if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain min_auto_runtime_minutes. : min auto runtime minutes config.toml: if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Min. runtime in minutes for automatic runtime control (0 = disabled): . : Set the min auto runtime minutes config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "min_auto_runtime_minutes",
+ "output": "min auto runtime minutes config.toml: if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "min_auto_runtime_minutes",
+ "output": "min auto runtime minutes config.toml: Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "min auto runtime minutes",
+ "output": "min auto runtime minutes config.toml: Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Min. runtime in minutes for automatic runtime control (0 = disabled): ",
+ "output": "min auto runtime minutes config.toml: Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting min_auto_runtime_minutes",
+ "output": "min auto runtime minutes config.toml: if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting min_auto_runtime_minutes",
+ "output": "min auto runtime minutes config.toml: Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does max_runtime_minutes_smart do? : max runtime minutes smart config.toml: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain max_runtime_minutes_smart. : max runtime minutes smart config.toml: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Smart runtime mode: . : Set the max runtime minutes smart config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_runtime_minutes_smart",
+ "output": "max runtime minutes smart config.toml: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_runtime_minutes_smart",
+ "output": "max runtime minutes smart config.toml: Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max runtime minutes smart",
+ "output": "max runtime minutes smart config.toml: Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Smart runtime mode: ",
+ "output": "max runtime minutes smart config.toml: Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting max_runtime_minutes_smart",
+ "output": "max runtime minutes smart config.toml: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting max_runtime_minutes_smart",
+ "output": "max runtime minutes smart config.toml: Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does max_runtime_minutes_until_abort do? : max runtime minutes until abort config.toml: If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain max_runtime_minutes_until_abort. : max runtime minutes until abort config.toml: If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): . : Set the max runtime minutes until abort config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_runtime_minutes_until_abort",
+ "output": "max runtime minutes until abort config.toml: If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_runtime_minutes_until_abort",
+ "output": "max runtime minutes until abort config.toml: Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max runtime minutes until abort",
+ "output": "max runtime minutes until abort config.toml: Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): ",
+ "output": "max runtime minutes until abort config.toml: Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting max_runtime_minutes_until_abort",
+ "output": "max runtime minutes until abort config.toml: If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting max_runtime_minutes_until_abort",
+ "output": "max runtime minutes until abort config.toml: Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does strict_reproducible_for_max_runtime do? : strict reproducible for max runtime config.toml: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain strict_reproducible_for_max_runtime. : strict reproducible for max runtime config.toml: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Whether to disable time-based limits when reproducible is set: . : Set the strict reproducible for max runtime config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "strict_reproducible_for_max_runtime",
+ "output": "strict reproducible for max runtime config.toml: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "strict_reproducible_for_max_runtime",
+ "output": "strict reproducible for max runtime config.toml: Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "strict reproducible for max runtime",
+ "output": "strict reproducible for max runtime config.toml: Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Whether to disable time-based limits when reproducible is set: ",
+ "output": "strict reproducible for max runtime config.toml: Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting strict_reproducible_for_max_runtime",
+ "output": "strict reproducible for max runtime config.toml: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting strict_reproducible_for_max_runtime",
+ "output": "strict reproducible for max runtime config.toml: Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does enable_preview_time_estimate do? : enable preview time estimate config.toml: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain enable_preview_time_estimate. : enable preview time estimate config.toml: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Whether to have preview estimate runtime: . : Set the enable preview time estimate config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_preview_time_estimate",
+ "output": "enable preview time estimate config.toml: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_preview_time_estimate",
+ "output": "enable preview time estimate config.toml: Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable preview time estimate",
+ "output": "enable preview time estimate config.toml: Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Whether to have preview estimate runtime: ",
+ "output": "enable preview time estimate config.toml: Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting enable_preview_time_estimate",
+ "output": "enable preview time estimate config.toml: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting enable_preview_time_estimate",
+ "output": "enable preview time estimate config.toml: Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does enable_preview_mojo_size_estimate do? : enable preview mojo size estimate config.toml: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain enable_preview_mojo_size_estimate. : enable preview mojo size estimate config.toml: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Whether to have preview estimate mojo size: . : Set the enable preview mojo size estimate config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_preview_mojo_size_estimate",
+ "output": "enable preview mojo size estimate config.toml: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_preview_mojo_size_estimate",
+ "output": "enable preview mojo size estimate config.toml: Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable preview mojo size estimate",
+ "output": "enable preview mojo size estimate config.toml: Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Whether to have preview estimate mojo size: ",
+ "output": "enable preview mojo size estimate config.toml: Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting enable_preview_mojo_size_estimate",
+ "output": "enable preview mojo size estimate config.toml: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting enable_preview_mojo_size_estimate",
+ "output": "enable preview mojo size estimate config.toml: Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does enable_preview_cpu_memory_estimate do? : enable preview cpu memory estimate config.toml: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain enable_preview_cpu_memory_estimate. : enable preview cpu memory estimate config.toml: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Whether to have preview estimate max cpu memory: . : Set the enable preview cpu memory estimate config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_preview_cpu_memory_estimate",
+ "output": "enable preview cpu memory estimate config.toml: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_preview_cpu_memory_estimate",
+ "output": "enable preview cpu memory estimate config.toml: Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable preview cpu memory estimate",
+ "output": "enable preview cpu memory estimate config.toml: Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Whether to have preview estimate max cpu memory: ",
+ "output": "enable preview cpu memory estimate config.toml: Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting enable_preview_cpu_memory_estimate",
+ "output": "enable preview cpu memory estimate config.toml: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting enable_preview_cpu_memory_estimate",
+ "output": "enable preview cpu memory estimate config.toml: Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does time_abort do? : time abort config.toml: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain time_abort. : time abort config.toml: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Time to trigger the 'Abort' button.: . : Set the time abort config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time_abort",
+ "output": "time abort config.toml: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time_abort",
+ "output": "time abort config.toml: Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time abort",
+ "output": "time abort config.toml: Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Time to trigger the 'Abort' button.: ",
+ "output": "time abort config.toml: Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting time_abort",
+ "output": "time abort config.toml: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting time_abort",
+ "output": "time abort config.toml: Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does time_abort_format do? : time abort format config.toml: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain time_abort_format. : time abort format config.toml: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Time string format for time_abort.: . : Set the time abort format config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time_abort_format",
+ "output": "time abort format config.toml: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time_abort_format",
+ "output": "time abort format config.toml: Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time abort format",
+ "output": "time abort format config.toml: Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Time string format for time_abort.: ",
+ "output": "time abort format config.toml: Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting time_abort_format",
+ "output": "time abort format config.toml: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting time_abort_format",
+ "output": "time abort format config.toml: Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does time_abort_timezone do? : time abort timezone config.toml: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain time_abort_timezone. : time abort timezone config.toml: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Time zone for time_abort.: . : Set the time abort timezone config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time_abort_timezone",
+ "output": "time abort timezone config.toml: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time_abort_timezone",
+ "output": "time abort timezone config.toml: Time zone for time_abort.: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "time abort timezone",
+ "output": "time abort timezone config.toml: Time zone for time_abort.: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Time zone for time_abort.: ",
+ "output": "time abort timezone config.toml: Time zone for time_abort.: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting time_abort_timezone",
+ "output": "time abort timezone config.toml: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting time_abort_timezone",
+ "output": "time abort timezone config.toml: Time zone for time_abort.: Any time zone in format accepted by datetime.strptime."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does delete_model_dirs_and_files do? : delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain delete_model_dirs_and_files. : delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "delete_model_dirs_and_files",
+ "output": "delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "delete_model_dirs_and_files",
+ "output": "delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "delete model dirs and files",
+ "output": "delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting delete_model_dirs_and_files",
+ "output": "delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting delete_model_dirs_and_files",
+ "output": "delete model dirs and files config.toml: Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does delete_data_dirs_and_files do? : delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain delete_data_dirs_and_files. : delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "delete_data_dirs_and_files",
+ "output": "delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "delete_data_dirs_and_files",
+ "output": "delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "delete data dirs and files",
+ "output": "delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting delete_data_dirs_and_files",
+ "output": "delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting delete_data_dirs_and_files",
+ "output": "delete data dirs and files config.toml: Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does recipe do? : recipe config.toml: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain recipe. : recipe config.toml: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Pipeline Building Recipe: . : Set the recipe config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "recipe",
+ "output": "recipe config.toml: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "recipe",
+ "output": "recipe config.toml: Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "recipe",
+ "output": "recipe config.toml: Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Pipeline Building Recipe: ",
+ "output": "recipe config.toml: Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting recipe",
+ "output": "recipe config.toml: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting recipe",
+ "output": "recipe config.toml: Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does custom_unsupervised_expert_mode do? : custom unsupervised expert mode config.toml: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain custom_unsupervised_expert_mode. : custom unsupervised expert mode config.toml: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Whether to treat custom unsupervised model like UnsupervisedModel: . : Set the custom unsupervised expert mode config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "custom_unsupervised_expert_mode",
+ "output": "custom unsupervised expert mode config.toml: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "custom_unsupervised_expert_mode",
+ "output": "custom unsupervised expert mode config.toml: Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "custom unsupervised expert mode",
+ "output": "custom unsupervised expert mode config.toml: Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Whether to treat custom unsupervised model like UnsupervisedModel: ",
+ "output": "custom unsupervised expert mode config.toml: Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting custom_unsupervised_expert_mode",
+ "output": "custom unsupervised expert mode config.toml: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting custom_unsupervised_expert_mode",
+ "output": "custom unsupervised expert mode config.toml: Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does enable_genetic_algorithm do? : enable genetic algorithm config.toml: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain enable_genetic_algorithm. : enable genetic algorithm config.toml: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Enable genetic algorithm for selection and tuning of features and models: . : Set the enable genetic algorithm config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_genetic_algorithm",
+ "output": "enable genetic algorithm config.toml: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable_genetic_algorithm",
+ "output": "enable genetic algorithm config.toml: Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "enable genetic algorithm",
+ "output": "enable genetic algorithm config.toml: Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Enable genetic algorithm for selection and tuning of features and models: ",
+ "output": "enable genetic algorithm config.toml: Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting enable_genetic_algorithm",
+ "output": "enable genetic algorithm config.toml: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting enable_genetic_algorithm",
+ "output": "enable genetic algorithm config.toml: Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does feature_engineering_effort do? : feature engineering effort config.toml: How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain feature_engineering_effort. : feature engineering effort config.toml: How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Feature engineering effort (0..10): . : Set the feature engineering effort config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "feature_engineering_effort",
+ "output": "feature engineering effort config.toml: How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "feature_engineering_effort",
+ "output": "feature engineering effort config.toml: Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "feature engineering effort",
+ "output": "feature engineering effort config.toml: Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Feature engineering effort (0..10): ",
+ "output": "feature engineering effort config.toml: Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting feature_engineering_effort",
+ "output": "feature engineering effort config.toml: How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting feature_engineering_effort",
+ "output": "feature engineering effort config.toml: Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does check_distribution_shift do? : check distribution shift config.toml: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain check_distribution_shift. : check distribution shift config.toml: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Data distribution shift detection: . : Set the check distribution shift config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_distribution_shift",
+ "output": "check distribution shift config.toml: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_distribution_shift",
+ "output": "check distribution shift config.toml: Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check distribution shift",
+ "output": "check distribution shift config.toml: Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Data distribution shift detection: ",
+ "output": "check distribution shift config.toml: Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting check_distribution_shift",
+ "output": "check distribution shift config.toml: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting check_distribution_shift",
+ "output": "check distribution shift config.toml: Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does check_distribution_shift_transformed do? : check distribution shift transformed config.toml: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain check_distribution_shift_transformed. : check distribution shift transformed config.toml: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Data distribution shift detection on transformed features: . : Set the check distribution shift transformed config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_distribution_shift_transformed",
+ "output": "check distribution shift transformed config.toml: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_distribution_shift_transformed",
+ "output": "check distribution shift transformed config.toml: Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check distribution shift transformed",
+ "output": "check distribution shift transformed config.toml: Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Data distribution shift detection on transformed features: ",
+ "output": "check distribution shift transformed config.toml: Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting check_distribution_shift_transformed",
+ "output": "check distribution shift transformed config.toml: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting check_distribution_shift_transformed",
+ "output": "check distribution shift transformed config.toml: Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does check_distribution_shift_drop do? : check distribution shift drop config.toml: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain check_distribution_shift_drop. : check distribution shift drop config.toml: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Data distribution shift detection drop of features: . : Set the check distribution shift drop config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_distribution_shift_drop",
+ "output": "check distribution shift drop config.toml: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_distribution_shift_drop",
+ "output": "check distribution shift drop config.toml: Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check distribution shift drop",
+ "output": "check distribution shift drop config.toml: Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Data distribution shift detection drop of features: ",
+ "output": "check distribution shift drop config.toml: Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting check_distribution_shift_drop",
+ "output": "check distribution shift drop config.toml: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting check_distribution_shift_drop",
+ "output": "check distribution shift drop config.toml: Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does drop_features_distribution_shift_threshold_auc do? : drop features distribution shift threshold auc config.toml: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain drop_features_distribution_shift_threshold_auc. : drop features distribution shift threshold auc config.toml: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Max allowed feature shift (AUC) before dropping feature: . : Set the drop features distribution shift threshold auc config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "drop_features_distribution_shift_threshold_auc",
+ "output": "drop features distribution shift threshold auc config.toml: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "drop_features_distribution_shift_threshold_auc",
+ "output": "drop features distribution shift threshold auc config.toml: Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "drop features distribution shift threshold auc",
+ "output": "drop features distribution shift threshold auc config.toml: Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Max allowed feature shift (AUC) before dropping feature: ",
+ "output": "drop features distribution shift threshold auc config.toml: Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting drop_features_distribution_shift_threshold_auc",
+ "output": "drop features distribution shift threshold auc config.toml: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting drop_features_distribution_shift_threshold_auc",
+ "output": "drop features distribution shift threshold auc config.toml: Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does check_leakage do? : check leakage config.toml: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain check_leakage. : check leakage config.toml: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Leakage detection: . : Set the check leakage config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_leakage",
+ "output": "check leakage config.toml: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check_leakage",
+ "output": "check leakage config.toml: Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "check leakage",
+ "output": "check leakage config.toml: Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Leakage detection: ",
+ "output": "check leakage config.toml: Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting check_leakage",
+ "output": "check leakage config.toml: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting check_leakage",
+ "output": "check leakage config.toml: Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does drop_features_leakage_threshold_auc do? : drop features leakage threshold auc config.toml: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain drop_features_leakage_threshold_auc. : drop features leakage threshold auc config.toml: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Leakage detection dropping AUC/R2 threshold: . : Set the drop features leakage threshold auc config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "drop_features_leakage_threshold_auc",
+ "output": "drop features leakage threshold auc config.toml: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "drop_features_leakage_threshold_auc",
+ "output": "drop features leakage threshold auc config.toml: Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "drop features leakage threshold auc",
+ "output": "drop features leakage threshold auc config.toml: Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Leakage detection dropping AUC/R2 threshold: ",
+ "output": "drop features leakage threshold auc config.toml: Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting drop_features_leakage_threshold_auc",
+ "output": "drop features leakage threshold auc config.toml: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting drop_features_leakage_threshold_auc",
+ "output": "drop features leakage threshold auc config.toml: Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does leakage_max_data_size do? : leakage max data size config.toml: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain leakage_max_data_size. : leakage max data size config.toml: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Max rows x columns for leakage: . : Set the leakage max data size config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "leakage_max_data_size",
+ "output": "leakage max data size config.toml: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "leakage_max_data_size",
+ "output": "leakage max data size config.toml: Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "leakage max data size",
+ "output": "leakage max data size config.toml: Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Max rows x columns for leakage: ",
+ "output": "leakage max data size config.toml: Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting leakage_max_data_size",
+ "output": "leakage max data size config.toml: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting leakage_max_data_size",
+ "output": "leakage max data size config.toml: Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does max_features_importance do? : max features importance config.toml: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain max_features_importance. : max features importance config.toml: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Max. num. features for variable importance: . : Set the max features importance config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_features_importance",
+ "output": "max features importance config.toml: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max_features_importance",
+ "output": "max features importance config.toml: Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "max features importance",
+ "output": "max features importance config.toml: Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Max. num. features for variable importance: ",
+ "output": "max features importance config.toml: Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting max_features_importance",
+ "output": "max features importance config.toml: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting max_features_importance",
+ "output": "max features importance config.toml: Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does make_python_scoring_pipeline do? : make python scoring pipeline config.toml: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain make_python_scoring_pipeline. : make python scoring pipeline config.toml: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Make Python scoring pipeline: . : Set the make python scoring pipeline config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make_python_scoring_pipeline",
+ "output": "make python scoring pipeline config.toml: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make_python_scoring_pipeline",
+ "output": "make python scoring pipeline config.toml: Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make python scoring pipeline",
+ "output": "make python scoring pipeline config.toml: Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Make Python scoring pipeline: ",
+ "output": "make python scoring pipeline config.toml: Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting make_python_scoring_pipeline",
+ "output": "make python scoring pipeline config.toml: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting make_python_scoring_pipeline",
+ "output": "make python scoring pipeline config.toml: Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does make_mojo_scoring_pipeline do? : make mojo scoring pipeline config.toml: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain make_mojo_scoring_pipeline. : make mojo scoring pipeline config.toml: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Make MOJO scoring pipeline: . : Set the make mojo scoring pipeline config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make_mojo_scoring_pipeline",
+ "output": "make mojo scoring pipeline config.toml: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make_mojo_scoring_pipeline",
+ "output": "make mojo scoring pipeline config.toml: Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make mojo scoring pipeline",
+ "output": "make mojo scoring pipeline config.toml: Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Make MOJO scoring pipeline: ",
+ "output": "make mojo scoring pipeline config.toml: Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting make_mojo_scoring_pipeline",
+ "output": "make mojo scoring pipeline config.toml: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting make_mojo_scoring_pipeline",
+ "output": "make mojo scoring pipeline config.toml: Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does make_triton_scoring_pipeline do? : make triton scoring pipeline config.toml: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain make_triton_scoring_pipeline. : make triton scoring pipeline config.toml: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Make Triton scoring pipeline: . : Set the make triton scoring pipeline config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make_triton_scoring_pipeline",
+ "output": "make triton scoring pipeline config.toml: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make_triton_scoring_pipeline",
+ "output": "make triton scoring pipeline config.toml: Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "make triton scoring pipeline",
+ "output": "make triton scoring pipeline config.toml: Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Make Triton scoring pipeline: ",
+ "output": "make triton scoring pipeline config.toml: Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting make_triton_scoring_pipeline",
+ "output": "make triton scoring pipeline config.toml: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting make_triton_scoring_pipeline",
+ "output": "make triton scoring pipeline config.toml: Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does auto_deploy_triton_scoring_pipeline do? : auto deploy triton scoring pipeline config.toml: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain auto_deploy_triton_scoring_pipeline. : auto deploy triton scoring pipeline config.toml: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Whether to automatically deploy every model to built-in or remote Triton inference server.: . : Set the auto deploy triton scoring pipeline config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "auto_deploy_triton_scoring_pipeline",
+ "output": "auto deploy triton scoring pipeline config.toml: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "auto_deploy_triton_scoring_pipeline",
+ "output": "auto deploy triton scoring pipeline config.toml: Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "auto deploy triton scoring pipeline",
+ "output": "auto deploy triton scoring pipeline config.toml: Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Whether to automatically deploy every model to built-in or remote Triton inference server.: ",
+ "output": "auto deploy triton scoring pipeline config.toml: Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting auto_deploy_triton_scoring_pipeline",
+ "output": "auto deploy triton scoring pipeline config.toml: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting auto_deploy_triton_scoring_pipeline",
+ "output": "auto deploy triton scoring pipeline config.toml: Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does triton_dedup_local_tmp do? : triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain triton_dedup_local_tmp. : triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton_dedup_local_tmp",
+ "output": "triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton_dedup_local_tmp",
+ "output": "triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton dedup local tmp",
+ "output": "triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting triton_dedup_local_tmp",
+ "output": "triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting triton_dedup_local_tmp",
+ "output": "triton dedup local tmp config.toml: Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does triton_mini_acceptance_test_local do? : triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain triton_mini_acceptance_test_local. : triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Test local Triton deployments during creation of MOJO pipeline.: . : Set the triton mini acceptance test local config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton_mini_acceptance_test_local",
+ "output": "triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton_mini_acceptance_test_local",
+ "output": "triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton mini acceptance test local",
+ "output": "triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Test local Triton deployments during creation of MOJO pipeline.: ",
+ "output": "triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting triton_mini_acceptance_test_local",
+ "output": "triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting triton_mini_acceptance_test_local",
+ "output": "triton mini acceptance test local config.toml: Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does triton_mini_acceptance_test_remote do? : triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain triton_mini_acceptance_test_remote. : triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Test remote Triton deployments during creation of MOJO pipeline.: . : Set the triton mini acceptance test remote config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton_mini_acceptance_test_remote",
+ "output": "triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton_mini_acceptance_test_remote",
+ "output": "triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "triton mini acceptance test remote",
+ "output": "triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Test remote Triton deployments during creation of MOJO pipeline.: ",
+ "output": "triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting triton_mini_acceptance_test_remote",
+ "output": "triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting triton_mini_acceptance_test_remote",
+ "output": "triton mini acceptance test remote config.toml: Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_for_predictions_benchmark do? : mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_for_predictions_benchmark. : mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark",
+ "output": "mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark",
+ "output": "mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo for predictions benchmark",
+ "output": "mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark",
+ "output": "mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark",
+ "output": "mojo for predictions benchmark config.toml: Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_for_predictions_benchmark_slower_than_python_threshold do? : mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_for_predictions_benchmark_slower_than_python_threshold. : mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark_slower_than_python_threshold",
+ "output": "mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark_slower_than_python_threshold",
+ "output": "mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo for predictions benchmark slower than python threshold",
+ "output": "mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_threshold",
+ "output": "mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_threshold",
+ "output": "mojo for predictions benchmark slower than python threshold config.toml: Fail hard if MOJO scoring is this many times slower than Python scoring."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_for_predictions_benchmark_slower_than_python_min_rows do? : mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_for_predictions_benchmark_slower_than_python_min_rows. : mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark_slower_than_python_min_rows",
+ "output": "mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark_slower_than_python_min_rows",
+ "output": "mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo for predictions benchmark slower than python min rows",
+ "output": "mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_rows",
+ "output": "mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_rows",
+ "output": "mojo for predictions benchmark slower than python min rows config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_for_predictions_benchmark_slower_than_python_min_seconds do? : mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_for_predictions_benchmark_slower_than_python_min_seconds. : mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark_slower_than_python_min_seconds",
+ "output": "mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_benchmark_slower_than_python_min_seconds",
+ "output": "mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo for predictions benchmark slower than python min seconds",
+ "output": "mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_seconds",
+ "output": "mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_seconds",
+ "output": "mojo for predictions benchmark slower than python min seconds config.toml: Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does inject_mojo_for_predictions do? : inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain inject_mojo_for_predictions. : inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "inject_mojo_for_predictions",
+ "output": "inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "inject_mojo_for_predictions",
+ "output": "inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "inject mojo for predictions",
+ "output": "inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "",
+ "output": "inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting inject_mojo_for_predictions",
+ "output": "inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting inject_mojo_for_predictions",
+ "output": "inject mojo for predictions config.toml: Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_for_predictions do? : mojo for predictions config.toml: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_for_predictions. : mojo for predictions config.toml: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Allow use of MOJO for making predictions: . : Set the mojo for predictions config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions",
+ "output": "mojo for predictions config.toml: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions",
+ "output": "mojo for predictions config.toml: Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo for predictions",
+ "output": "mojo for predictions config.toml: Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Allow use of MOJO for making predictions: ",
+ "output": "mojo for predictions config.toml: Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_for_predictions",
+ "output": "mojo for predictions config.toml: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions",
+ "output": "mojo for predictions config.toml: Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_for_predictions_max_rows do? : mojo for predictions max rows config.toml: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_for_predictions_max_rows. : mojo for predictions max rows config.toml: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Max number of rows for C++ MOJO predictions: . : Set the mojo for predictions max rows config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_max_rows",
+ "output": "mojo for predictions max rows config.toml: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_max_rows",
+ "output": "mojo for predictions max rows config.toml: Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo for predictions max rows",
+ "output": "mojo for predictions max rows config.toml: Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Max number of rows for C++ MOJO predictions: ",
+ "output": "mojo for predictions max rows config.toml: Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_max_rows",
+ "output": "mojo for predictions max rows config.toml: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_max_rows",
+ "output": "mojo for predictions max rows config.toml: Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_for_predictions_batch_size do? : mojo for predictions batch size config.toml: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_for_predictions_batch_size. : mojo for predictions batch size config.toml: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Batch size for C++ MOJO predictions.: . : Set the mojo for predictions batch size config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_batch_size",
+ "output": "mojo for predictions batch size config.toml: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_for_predictions_batch_size",
+ "output": "mojo for predictions batch size config.toml: Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo for predictions batch size",
+ "output": "mojo for predictions batch size config.toml: Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Batch size for C++ MOJO predictions.: ",
+ "output": "mojo for predictions batch size config.toml: Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_batch_size",
+ "output": "mojo for predictions batch size config.toml: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_batch_size",
+ "output": "mojo for predictions batch size config.toml: Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_acceptance_test_rtol do? : mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_acceptance_test_rtol. : mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Relative tolerance for mini MOJO acceptance test.: . : Set the mojo acceptance test rtol config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_acceptance_test_rtol",
+ "output": "mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_acceptance_test_rtol",
+ "output": "mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo acceptance test rtol",
+ "output": "mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Relative tolerance for mini MOJO acceptance test.: ",
+ "output": "mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_acceptance_test_rtol",
+ "output": "mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_acceptance_test_rtol",
+ "output": "mojo acceptance test rtol config.toml: Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does mojo_acceptance_test_atol do? : mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain mojo_acceptance_test_atol. : mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Absolute tolerance for mini MOJO acceptance test.: . : Set the mojo acceptance test atol config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_acceptance_test_atol",
+ "output": "mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo_acceptance_test_atol",
+ "output": "mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "mojo acceptance test atol",
+ "output": "mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Absolute tolerance for mini MOJO acceptance test.: ",
+ "output": "mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting mojo_acceptance_test_atol",
+ "output": "mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting mojo_acceptance_test_atol",
+ "output": "mojo acceptance test atol config.toml: Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does reduce_mojo_size do? : reduce mojo size config.toml: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain reduce_mojo_size. : reduce mojo size config.toml: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": How can I do this: Attempt to reduce the size of the MOJO: . : Set the reduce mojo size config.toml"
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "reduce_mojo_size",
+ "output": "reduce mojo size config.toml: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "reduce_mojo_size",
+ "output": "reduce mojo size config.toml: Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "reduce mojo size",
+ "output": "reduce mojo size config.toml: Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Explain the following expert setting for Driverless AI",
+ "input": "Attempt to reduce the size of the MOJO: ",
+ "output": "reduce mojo size config.toml: Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a short explanation of the expert setting reduce_mojo_size",
+ "output": "reduce mojo size config.toml: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "human_bot",
+ "instruction": "Provide a detailed explanation of the expert setting reduce_mojo_size",
+ "output": "reduce mojo size config.toml: Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. "
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": What does make_pipeline_visualization do? : make pipeline visualization config.toml: Whether to create the pipeline visualization at the end of each experiment.Uses MOJO to show pipeline, input features, transformers, model, and outputs of model. MOJO-capable tree models show first tree."
+ },
+ {
+ "prompt_type": "plain",
+ "instruction": ": Explain make_pipeline_visualization.