arxiv beautifulsoup4 bert_score chardet chromadb docx2txt EbookLib elasticsearch fastapi faster_whisper fire FlashRank fugashi genanki gradio==4.44.1 html2text jieba Jinja2 joblib langchain langdetect mwparserfromhell mwxml nltk numpy onnxruntime openai pandas Pillow playwright psycopg2-binary pyannote.audio pyaudio pydub pymupdf pymupdf4llm docling pypandoc pypandoc_binary pytest python-dotenv python-json-logger PyYAML Requests rouge_score sacrebleu scikit_learn sentence_transformers SQLAlchemy streamlit tenacity textstat tiktoken toml tqdm trafilatura transformers urllib3 yt_dlp datasets tqdm lxml_html_clean --index-url https://download.pytorch.org/whl/cu124 --extra-index-url https://pypi.org/simple torch torchaudio torchvision