[API] anthropic_api_key = anthropic_model = claude-3-5-sonnet-20240620 cohere_api_key = cohere_model = command-r-plus groq_api_key = groq_model = llama3-70b-8192 openai_api_key = openai_model = gpt-4o huggingface_api_key = huggingface_model = CohereForAI/c4ai-command-r-plus openrouter_api_key = openrouter_model = mistralai/mistral-7b-instruct:free deepseek_api_key = deepseek_model = deepseek-chat mistral_model = mistral-large-latest mistral_api_key = custom_openai_api = custom_openai_api_ip = [Local-API] kobold_api_key = kobold_api_IP = http://127.0.0.1:5001/api/v1/generate llama_api_key = llama_api_IP = http://127.0.0.1:8080/completion ooba_api_key = ooba_api_IP = http://127.0.0.1:5000/v1/chat/completions tabby_api_IP = http://127.0.0.1:5000/v1/chat/completions tabby_api_key = vllm_api_IP = http://127.0.0.1:8000/v1/chat/completions vllm_model = ollama_api_IP = http://127.0.0.1:11434/api/generate ollama_api_key = ollama_model = aphrodite_api_IP = http://127.0.0.1:8080/completion aphrodite_api_key = [Paths] output_path = Results logging_file = Logs [Processing] processing_choice = cuda # Can swap 'cuda' with 'cpu' if you want to use your CPU for processing [Settings] chunk_duration = 30 words_per_second = 3 [Prompts] prompt_sample = "What is the meaning of life?" video_summarize_prompt = "Above is the transcript of a video. Please read through the transcript carefully. Identify the main topics that are discussed over the course of the transcript. Then, summarize the key points about each main topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, but should be much shorter than the full transcript. Please output your bullet point summary inside tags. Do not repeat yourself while writing the summary." [Database] type = sqlite sqlite_path = /Databases/media_summary.db elasticsearch_host = localhost elasticsearch_port = 9200 # Additionally you can use elasticsearch as the database type, just replace `sqlite` with `elasticsearch` for `type` and provide the `elasticsearch_host` and `elasticsearch_port` of your configured ES instance. chroma_db_path = chroma_db [Embeddings] embedding_provider = openai embedding_model = text-embedding-3-small embedding_api_url = http://localhost:8080/v1/embeddings embedding_api_key = your_api_key_here chunk_size = 400 overlap = 200 # 'embedding_provider' Can be 'openai', 'local', or 'huggingface' # `embedding_model` Set to the model name you want to use for embeddings. For OpenAI, this can be 'text-embedding-3-small', or 'text-embedding-3-large'. # huggingface: model = dunzhang/stella_en_400M_v5 [Chunking] method = words # 'method' Can be 'words' / 'sentences' / 'paragraphs' / 'semantic' / 'tokens' max_size = 400 overlap = 200 adaptive = false # Use ntlk+punkt to split text into sentences and then ID average sentence length and set that as the chunk size multi_level = false language = english #[Comments] #OpenAI Models: # f #Anthropic Models: # f #Cohere Models: # f #DeepSeek Models: # f #Groq Models: # f #Mistral Models: # mistral-large-latest # open-mistral-nemo # codestral-latest # mistral-embed # open-mistral-7b # open-mixtral-8x7b # open-mixtral-8x22b # open-codestral-mamba