ucare

Runtime error

App Files Files Community

aperrot42wq commited on Sep 12, 2023

Commit

fc9da2a

•

1 Parent(s): 7cce5b7

- basic diarization

Browse files

- basic transcript
- basic summary

Files changed (13) hide show

.dockerignore +2 -0
.gitignore +163 -0
.idea/.gitignore +8 -0
.idea/Seamlessm4t_diarization_VAD.iml +8 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
Dockerfile +59 -0
app.py +252 -0
docker-compose.yaml +30 -0
packages.txt +1 -0
requirements.txt +6 -0
style.css +3 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ torch/
2	+ cache/

.gitignore ADDED Viewed

	@@ -0,0 +1,163 @@

+# venv
+.venv
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/Seamlessm4t_diarization_VAD.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Seamlessm4t_diarization_VAD.iml" filepath="$PROJECT_DIR$/.idea/Seamlessm4t_diarization_VAD.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

Dockerfile ADDED Viewed

	@@ -0,0 +1,59 @@

+FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    # gradio dependencies \
+    ffmpeg \
+    # fairseq2 dependencies \
+    libsndfile-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ARG PYTHON_VERSION=3.10.12
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel
+COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
+COPY --chown=1000 . ${HOME}/app
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces \
+    GRADIO_SERVER_PORT=7860
+EXPOSE 7860
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,252 @@

+## Dirty one file implementation for expermiental (and fun) purpose only
+import os
+import gradio as gr
+from dotenv import load_dotenv
+from pydub import AudioSegment
+from tqdm.auto import tqdm
+print('starting')
+load_dotenv()
+from gradio_client import Client
+HF_API = os.getenv("HF_API")
+SEAMLESS_API_URL = os.getenv("SEAMLESS_API_URL")  # path to Seamlessm4t API endpoint
+GPU_AVAILABLE = os.getenv("GPU_AVAILABLE")
+DEFAULT_TARGET_LANGUAGE = "French"
+MISTRAL_SUMMARY_URL= "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+LLAMA_SUMMARY_URL="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
+print('env setup ok')
+DESCRIPTION = """
+# Transcribe and create a summary of a conversation.
+"""
+DUPLICATE = """
+To duplicate this repo, you have to give permission from three reopsitories and accept all user conditions:
+1- https://huggingface.co/pyannote/voice-activity-detection
+2- https://hf.co/pyannote/segmentation
+3- https://hf.co/pyannote/speaker-diarization
+"""
+from pyannote.audio import Pipeline
+#initialize diarization pipeline
+diarizer = Pipeline.from_pretrained(
+    "pyannote/speaker-diarization-3.1",
+    use_auth_token=HF_API)
+# send pipeline to GPU (when available)
+import torch
+diarizer.to(torch.device(GPU_AVAILABLE))
+print('diarizer setup ok')
+# predict is a generator that incrementally yields recognized text with speaker label
+def predict(target_language, input_audio):
+    print('->predict started')
+    print(target_language, type(input_audio), input_audio)
+    print('-->diarization')
+    diarized = diarizer(input_audio, min_speakers=2, max_speakers=5)
+    print('-->automatic speech recognition')
+    # split audio according to diarization
+    song = AudioSegment.from_wav(input_audio)
+    client = Client(SEAMLESS_API_URL, hf_token=HF_API)
+    output_text = ""
+    for turn, _, speaker in diarized.itertracks(yield_label=True):
+        print(speaker, turn)
+        try:
+            clipped = song[turn.start * 1000 : turn.end * 1000]
+            clipped.export(f"my.wav", format="wav", bitrate=16000)
+            result = client.predict(
+		        f"my.wav",
+		        target_language,
+                api_name="/asr"
+            )
+            current_text = f"speaker: {speaker} text: {result} "
+            print(current_text)
+            if current_text is not None:
+                output_text = output_text + "\n" + current_text
+            yield output_text
+        except Exception as e:
+            print(e)
+import requests
+def generate_summary_llama3(language, transcript):
+    queryTxt = f'''
+<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+You are a helpful and truthful patient-doctor encounter summary writer.
+Users sends you transcripts of patient-doctor encounter and you create accurate and concise summaries.
+The summary only contains informations from the transcript.
+Your summary is written in {language}.
+The summary only includes relevant sections.
+    <template>
+    # Chief Complaint
+    # History of Present Illness (HPI)
+    # Relevant Past Medical History
+    # Physical Examination
+    # Assessment and Plan
+    # Follow-up
+    # Additional Notes
+    </template> <|eot_id|>
+<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+<transcript>
+{transcript}
+</transcript><|eot_id|>
+<|start_header_id|>assistant<|end_header_id|>
+'''
+    payload = {
+	"inputs": queryTxt,
+	"parameters": {
+		"return_full_text": False,
+		"wait_for_model": True,
+		"min_length": 1000
+	},
+	"options": {
+		"use_cache": False
+	}
+    }
+    response = requests.post(LLAMA_SUMMARY_URL, headers = {"Authorization": f"Bearer {HF_API}"}, json=payload)
+    print(response.json())
+    return response.json()[0]['generated_text'][len('<summary>'):]
+def generate_summary_mistral(language, transcript):
+    sysPrompt = f'''<s>[INST]
+You are a helpful and truthful patient-doctor encounter summary writer.
+Users sends you transcripts of patient-doctor encounter and you create accurate and concise summaries.
+The summary only contains informations from the transcript.
+Your summary is written in {language}.
+The summary only includes relevant sections.
+    <template>
+    # Chief Complaint
+    # History of Present Illness (HPI)
+    # Relevant Past Medical History
+    # Physical Examination
+    # Assessment and Plan
+    # Follow-up
+    # Additional Notes
+    </template>
+'''
+    queryTxt=f'''
+<transcript>
+{transcript}
+</transcript>
+[/INST]
+'''
+    payload = {
+	"inputs": sysPrompt + queryTxt,
+	"parameters": {
+		"return_full_text": False,
+		"wait_for_model": True,
+		"min_length": 1000
+	},
+	"options": {
+		"use_cache": False
+	}
+    }
+    response = requests.post(MISTRAL_SUMMARY_URL, headers = {"Authorization": f"Bearer {HF_API}"}, json=payload)
+    print(response.json())
+    return response.json()[0]['generated_text'][len('<summary>'):]
+def generate_summary(model, language, transcript):
+    match model:
+        case "Mistral-7B":
+            print("-> summarize with mistral")
+            return generate_summary_mistral( language, transcript)
+        case "LLAMA3":
+            print("-> summarize with llama3")
+            return generate_summary_llama3(language, transcript)
+        case _:
+            return f"Unknown model {model}"
+def update_audio_ui(audio_source: str) -> tuple[dict, dict]:
+    mic = audio_source == "microphone"
+    return (
+        gr.update(visible=mic, value=None),  # input_audio_mic
+        gr.update(visible=not mic, value=None),  # input_audio_file
+    )
+with gr.Blocks() as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Group():
+        with gr.Row():
+            target_language = gr.Dropdown(
+                choices= ["French", "English"],
+                label="Output Language",
+                value="French",
+                interactive=True,
+                info="Select your target language",
+            )
+        with gr.Row() as audio_box:
+            input_audio = gr.Audio(
+                type="filepath"
+            )
+        submit = gr.Button("Transcribe")
+        transcribe_output = gr.Textbox(
+            label="Transcribed Text",
+            value="",
+            interactive=False,
+            lines=10,
+            scale=10,
+            max_lines=100,
+        )
+        submit.click(
+            fn=predict,
+            inputs=[
+                target_language,
+                input_audio
+            ],
+            outputs=[transcribe_output],
+            api_name="predict",
+        )
+        with gr.Row():
+            sumary_model = gr.Dropdown(
+                choices= ["Mistral-7B", "LLAMA3"],
+                label="Summary model",
+                value="Mistral-7B",
+                interactive=True,
+                info="Select your summary model",
+            )
+        summarize = gr.Button("Summarize")
+        summary_output = gr.Textbox(
+            label="Summarized Text",
+            value="",
+            interactive=False,
+            lines=10,
+            scale=10,
+            max_lines=100,
+        )
+        summarize.click(
+            fn=generate_summary,
+            inputs=[
+                sumary_model,
+                target_language,
+                transcribe_output
+            ],
+            outputs=[summary_output],
+            api_name="predict",
+        )
+    gr.Markdown(DUPLICATE)
+demo.queue(max_size=50).launch()

docker-compose.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+version: "3.7"
+services:
+  seamless_diarization_VAD_service:
+    image: seamless_diarization_vad:v0.1
+    build:
+      context: .
+    container_name: seamless_diarization_VAD
+    user: root
+    environment:
+      - GIT_PYTHON_REFRESH=quiet
+      - HUGGINGFACE_HUB_CACHE=/home/user/temp/.cache/HUGGINGFACE_HUB_CACHE
+      - TRANSFORMERS_CACHE=/home/user/temp/.cache/TRANSFORMERS_CACHE
+      - HF_HOME=/home/user/temp/.cache/HF_HOME
+      - TORCH_HOME=/home/user/temp/torch
+      #- TRANSFORMERS_OFFLINE=1
+    tty: true
+    stdin_open: true
+    volumes:
+      - ./torch/:/home/user/temp/torch
+      - ./cache/:/home/user/.cache/
+    ports:
+      - 8005:7860
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["4"]
+              capabilities: [gpu]

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+pyannote.audio
+pydub
+gradio_client==0.16.0
+gradio==4.28.3
+python-dotenv==1.0.0
+torch

style.css ADDED Viewed

	@@ -0,0 +1,3 @@

+h1 {
+  text-align: center;
+}