|
import os |
|
import socket |
|
from types import SimpleNamespace |
|
|
|
|
|
def get_size(x): |
|
try: |
|
if x.endswith("TB"): |
|
return float(x.replace("TB", "")) * (2**40) |
|
if x.endswith("GB"): |
|
return float(x.replace("GB", "")) * (2**30) |
|
if x.endswith("MB"): |
|
return float(x.replace("MB", "")) * (2**20) |
|
if x.endswith("KB"): |
|
return float(x.replace("KB", "")) * (2**10) |
|
if x.endswith("B"): |
|
return float(x.replace("B", "")) |
|
return 2**31 |
|
except Exception: |
|
return 2**31 |
|
|
|
|
|
version = "1.6.0-dev" |
|
|
|
try: |
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) |
|
s.connect(("8.8.8.8", 80)) |
|
host = s.getsockname()[0] |
|
s.close() |
|
except OSError: |
|
host = "localhost" |
|
|
|
port = "10101" |
|
url = f"http://{host}:{port}/" |
|
|
|
|
|
default_cfg = { |
|
"url": url, |
|
"name": "H2O LLM Studio", |
|
"version": version, |
|
"github": "https://github.com/h2oai/h2o-llmstudio", |
|
"min_experiment_disk_space": get_size( |
|
os.getenv("MIN_DISK_SPACE_FOR_EXPERIMENTS", "2GB") |
|
), |
|
"allowed_file_extensions": os.getenv( |
|
"ALLOWED_FILE_EXTENSIONS", ".zip,.csv,.pq,.parquet" |
|
).split(","), |
|
"llm_studio_workdir": f"{os.getenv('H2O_LLM_STUDIO_WORKDIR', os.getcwd())}", |
|
"heap_mode": os.getenv("H2O_LLM_STUDIO_ENABLE_HEAP", "False") == "True", |
|
"data_folder": "data/", |
|
"output_folder": "output/", |
|
"s3_bucket": f"{os.getenv('AWS_BUCKET', 'bucket_name')}", |
|
"s3_filename": os.path.join( |
|
f"{os.getenv('AWS_BUCKET', 'bucket_name')}", |
|
"default.zip", |
|
), |
|
"cfg_file": "text_causal_language_modeling_config", |
|
"start_page": "home", |
|
"kaggle_command": ("kaggle competitions download -c " "dataset"), |
|
"problem_types": [ |
|
"text_causal_language_modeling_config", |
|
"text_dpo_modeling_config", |
|
"text_sequence_to_sequence_modeling_config", |
|
"text_causal_classification_modeling_config", |
|
], |
|
"problem_categories": ["text"], |
|
"dataset_keys": [ |
|
"train_dataframe", |
|
"validation_dataframe", |
|
"prompt_column", |
|
"answer_column", |
|
"parent_id_column", |
|
], |
|
"dataset_trigger_keys": [ |
|
"train_dataframe", |
|
"validation_dataframe", |
|
], |
|
"dataset_extra_keys": [ |
|
"validation_strategy", |
|
"data_sample", |
|
"data_sample_choice", |
|
], |
|
"dataset_folder_keys": [ |
|
"train_dataframe", |
|
"validation_dataframe", |
|
], |
|
"user_settings": { |
|
"theme_dark": True, |
|
"credential_saver": ".env File", |
|
"default_aws_bucket_name": f"{os.getenv('AWS_BUCKET', 'bucket_name')}", |
|
"default_aws_access_key": os.getenv("AWS_ACCESS_KEY_ID", ""), |
|
"default_aws_secret_key": os.getenv("AWS_SECRET_ACCESS_KEY", ""), |
|
"default_azure_conn_string": "", |
|
"default_azure_container": "", |
|
"default_kaggle_username": "", |
|
"default_kaggle_secret_key": "", |
|
"set_max_epochs": 50, |
|
"set_max_batch_size": 256, |
|
"set_max_gradient_clip": 10, |
|
"set_max_lora_r": 256, |
|
"set_max_lora_alpha": 256, |
|
"gpu_used_for_chat": 1, |
|
"default_number_of_workers": 8, |
|
"default_logger": "None", |
|
"default_neptune_project": os.getenv("NEPTUNE_PROJECT", ""), |
|
"default_neptune_api_token": os.getenv("NEPTUNE_API_TOKEN", ""), |
|
"default_huggingface_api_token": os.getenv("HUGGINGFACE_TOKEN", ""), |
|
"default_openai_azure": os.getenv("OPENAI_API_TYPE", "open_ai") == "azure", |
|
"default_openai_api_token": os.getenv("OPENAI_API_KEY", ""), |
|
"default_openai_api_base": os.getenv( |
|
"OPENAI_API_BASE", "https://example-endpoint.openai.azure.com" |
|
), |
|
"default_openai_api_deployment_id": os.getenv( |
|
"OPENAI_API_DEPLOYMENT_ID", "deployment-name" |
|
), |
|
"default_openai_api_version": os.getenv("OPENAI_API_VERSION", "2023-05-15"), |
|
"default_gpt_eval_max": os.getenv("GPT_EVAL_MAX", 100), |
|
"default_safe_serialization": True, |
|
"delete_dialogs": True, |
|
"chart_plot_max_points": 1000, |
|
}, |
|
} |
|
|
|
default_cfg = SimpleNamespace(**default_cfg) |
|
|