Spaces:
Runtime error
Runtime error
import streamlit as st | |
import os | |
import json | |
from PIL import Image | |
from urllib.parse import quote # Ensure this import is included | |
import base64 | |
import glob | |
import json | |
import math | |
import openai | |
import os | |
import pytz | |
import re | |
import requests | |
import textract | |
import time | |
import zipfile | |
import huggingface_hub | |
import dotenv | |
from audio_recorder_streamlit import audio_recorder | |
from bs4 import BeautifulSoup | |
from collections import deque | |
from datetime import datetime | |
from dotenv import load_dotenv | |
from huggingface_hub import InferenceClient | |
from io import BytesIO | |
from openai import ChatCompletion | |
from PyPDF2 import PdfReader | |
#from templates import bot_template, css, user_template -- pattern with content | |
from xml.etree import ElementTree as ET | |
import streamlit.components.v1 as components # Import Streamlit Components for HTML5 | |
# Set page configuration with a title and favicon | |
st.set_page_config( | |
page_title="🌌🚀 Mixable AI - Voice Search", | |
page_icon="🌠", | |
layout="wide", | |
initial_sidebar_state="expanded", | |
menu_items={ | |
'Get Help': 'https://huggingface.co/awacke1', | |
'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload", | |
'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558" | |
} | |
) | |
if st.checkbox('Show Anatomy Table'): | |
st.markdown(""" | |
## Anatomy Head to Toe Table with Body Organs Costly Conditions, Spending, CPT Codes and Frequency | |
| Table Num | Body Part | Organ/Part | Description | 📈 Costly Condition | 💰 Spending (billions) | CPT Range Start | CPT Range Finish | Frequency | | |
|-----------|------------------|----------------------|-------------------------------|------------------------------|------------------------|-----------------|------------------|----------------| | |
| 1 | 🧠 Head | 🧠 Brain | Controls mental processes | 😨 Anxiety & Depression | 210 | 90791 | 90899 | 1 in 5 | | |
| 2 | 👀 Eyes | 👁️ Optic Nerve | Vision | 👓 Cataracts | 10.7 | 92002 | 92499 | 1 in 6 (over 40 years) | | |
| 3 | 👂 Ears | 🐚 Cochlea | Hearing | 📢 Hearing Loss | 7.1 | 92502 | 92700 | 1 in 8 (over 12 years) | | |
| 4 | 👃 Nose | 👃 Olfactory Bulb | Smell | 🤧 Allergies | 25 | 31231 | 31294 | 1 in 3 | | |
| 5 | 👄 Mouth | 👅 Tongue | Taste | 🦷 Dental Issues | 130 | 00100 | 00192 | 1 in 2 | | |
| 6 | 🫁 Neck | 🦋 Thyroid | Metabolism | 🦠 Hypothyroidism | 3.1 | 60210 | 60271 | 1 in 20 | | |
| 7 | 💪 Upper Body | ❤️ Heart | Circulation | 💔 Heart Disease | 230 | 92920 | 93799 | 1 in 4 (over 65 years) | | |
| 8 | 💪 Upper Body | 🫁 Lungs | Respiration | 😷 Chronic Obstructive Pulmonary Disease | 70 | 94002 | 94799 | 1 in 20 (over 45 years) | | |
| 9 | 💪 Upper Body | 🍷 Liver | Detoxification | 🍺 Liver Disease | 40 | 47000 | 47999 | 1 in 10 | | |
| 10 | 💪 Upper Body | 🍹 Kidneys | Filtration | 🌊 Chronic Kidney Disease | 110 | 50010 | 50999 | 1 in 7 | | |
| 11 | 💪 Upper Body | 💉 Pancreas | Insulin secretion | 🍬 Diabetes | 327 | 48100 | 48999 | 1 in 10 | | |
| 12 | 💪 Upper Body | 🍽️ Stomach | Digestion | 🔥 Gastroesophageal Reflux Disease | 17 | 43200 | 43289 | 1 in 5 | | |
| 13 | 💪 Upper Body | 🛡️ Spleen | Immune functions | 🩸 Anemia | 5.6 | 38100 | 38199 | 1 in 6 | | |
| 14 | 💪 Upper Body | 🫀 Blood Vessels | Circulation of blood | 🚑 Hypertension | 55 | 40110 | 40599 | 1 in 3 | | |
| 15 | 🦵 Lower Body | 🍝 Colon | Absorption of water, minerals | 🌟 Colorectal Cancer | 14 | 45378 | 45378 | 1 in 23 | | |
| 16 | 🦵 Lower Body | 🚽 Bladder | Urine excretion | 💧 Urinary Incontinence | 8 | 51700 | 51798 | 1 in 4 (over 65 years) | | |
| 17 | 🦵 Lower Body | 💞 Reproductive Organs | Sex hormone secretion | 🎗️ Endometriosis | 22 | 56405 | 58999 | 1 in 10 (women) | | |
| 18 | 🦶 Feet | 🎯 Nerve endings | Balance and movement | 🤕 Peripheral Neuropathy | 19 | 95900 | 96004 | 1 in 30 | | |
| 19 | 🦶 Feet | 🌡️ Skin | Temperature regulation | 🌞 Skin Cancer | 8.1 | 96910 | 96999 | 1 in 5 | | |
| 20 | 🦶 Feet | 💪 Muscles | Movement and strength | 🏋️♂️ Musculoskeletal Disorders | 176 | 97110 | 97799 | 1 in 2 | | |
""") | |
body_map_data = { | |
"🧠 Central Nervous System": { | |
"Brain": ["Cognitive functions", "Emotion regulation", "Neural coordination"], | |
"Spinal Cord": ["Nerve signal transmission", "Reflex actions", "Connects brain to body"], | |
}, | |
"👀 Sensory Organs": { | |
"Eyes": ["Vision", "Light perception", "Color differentiation"], | |
"Ears": ["Hearing", "Balance maintenance", "Sound localization"], | |
"Nose": ["Smell detection", "Olfactory signaling", "Air filtration"], | |
"Tongue": ["Taste perception", "Texture sensing", "Temperature feeling"], | |
"Skin": ["Touch sensation", "Temperature regulation", "Protection against pathogens"], | |
}, | |
"🫁 Respiratory System": { | |
"Lungs": ["Gas exchange", "Oxygen intake", "Carbon dioxide expulsion"], | |
"Trachea": ["Airway protection", "Mucus secretion", "Cough reflex"], | |
}, | |
"❤️ Circulatory System": { | |
"Heart": ["Blood pumping", "Circulatory regulation", "Oxygen and nutrients distribution"], | |
"Blood Vessels": ["Blood transport", "Nutrient delivery", "Waste removal"], | |
}, | |
"🍽️ Digestive System": { | |
"Stomach": ["Food breakdown", "Enzyme secretion", "Nutrient digestion"], | |
"Intestines": ["Nutrient absorption", "Waste processing", "Microbiome hosting"], | |
}, | |
"💪 Musculoskeletal System": { | |
"Bones": ["Structural support", "Protection of organs", "Mineral storage"], | |
"Muscles": ["Movement facilitation", "Posture maintenance", "Heat production"], | |
}, | |
"🚽 Excretory System": { | |
"Kidneys": ["Waste filtration", "Water balance", "Electrolyte regulation"], | |
"Bladder": ["Urine storage", "Excretion control", "Toxin removal"], | |
}, | |
"💞 Endocrine System": { | |
"Thyroid": ["Metabolic regulation", "Hormone secretion", "Energy management"], | |
"Adrenal Glands": ["Stress response", "Metabolism control", "Immune system regulation"], | |
}, | |
"🧬 Reproductive System": { | |
"Male Reproductive Organs": ["Sperm production", "Sexual function", "Hormone synthesis"], | |
"Female Reproductive Organs": ["Egg production", "Fetus gestation", "Hormone regulation"], | |
}, | |
"🩸 Immune System": { | |
"White Blood Cells": ["Pathogen defense", "Infection response", "Immunity maintenance"], | |
"Lymphatic System": ["Fluid balance", "Waste removal", "Antibody production"], | |
}, | |
"🧘 Integrative Body Functions": { | |
"Sleep Regulation": ["Rest and recovery", "Memory consolidation", "Energy conservation"], | |
"Stress Management": ["Coping mechanisms", "Hormonal balance", "Emotional regulation"], | |
}, | |
"🔬 Research and Innovations": { | |
"Genetic Studies": ["Disease predisposition", "Trait inheritance", "Gene therapy"], | |
"Biomedical Engineering": ["Medical devices", "Prosthetics design", "Healthcare technologies"], | |
}, | |
"🎓 Education and Awareness": { | |
"Anatomy and Physiology": ["Body structure", "Function understanding", "Health education"], | |
"Public Health Initiatives": ["Disease prevention", "Health promotion", "Community wellness"], | |
}, | |
} | |
# Ensure the directory for storing scores exists | |
score_dir = "scores" | |
os.makedirs(score_dir, exist_ok=True) | |
# Function to generate a unique key for each button, including an emoji | |
def generate_key(label, header, idx): | |
return f"{header}_{label}_{idx}_key" | |
# Function to increment and save score | |
def update_score(key, increment=1): | |
score_file = os.path.join(score_dir, f"{key}.json") | |
if os.path.exists(score_file): | |
with open(score_file, "r") as file: | |
score_data = json.load(file) | |
else: | |
score_data = {"clicks": 0, "score": 0} | |
score_data["clicks"] += 1 | |
score_data["score"] += increment | |
with open(score_file, "w") as file: | |
json.dump(score_data, file) | |
return score_data["score"] | |
# Function to load score | |
def load_score(key): | |
score_file = os.path.join(score_dir, f"{key}.json") | |
if os.path.exists(score_file): | |
with open(score_file, "r") as file: | |
score_data = json.load(file) | |
return score_data["score"] | |
return 0 | |
def search_glossary(query): | |
st.write('## ' + query) | |
all="" | |
st.write('## 🔍 Running with GPT.') # ------------------------------------------------------------------------------------------------- | |
response = chat_with_model(query) | |
#st.write(response) | |
filename = generate_filename(query + ' --- ' + response, "md") | |
create_file(filename, query, response, should_save) | |
#st.write('## 🔍 Running with Llama.') # ------------------------------------------------------------------------------------------------- | |
#response2 = StreamLLMChatResponse(query) | |
#st.write(response2) | |
filename_txt = generate_filename(query + ' --- ' + response2, "md") | |
create_file(filename_txt, query, response2, should_save) | |
all = '# Query: ' + query + '# Response: ' + response + '# Response2: ' + response2 | |
filename_txt2 = generate_filename(query + ' --- ' + all, "md") | |
create_file(filename_txt2, query, all, should_save) | |
SpeechSynthesis(all) | |
return all | |
# Function to display the glossary in a structured format | |
def display_glossary(glossary, area): | |
if area in glossary: | |
st.subheader(f"📘 Glossary for {area}") | |
for game, terms in glossary[area].items(): | |
st.markdown(f"### {game}") | |
for idx, term in enumerate(terms, start=1): | |
st.write(f"{idx}. {term}") | |
# Function to display the entire glossary in a grid format with links | |
def display_glossary_grid(body_map_data): | |
search_urls = { | |
"📖": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}", | |
"🔍": lambda k: f"https://www.google.com/search?q={quote(k)}", | |
"▶️": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}", | |
"🔎": lambda k: f"https://www.bing.com/search?q={quote(k)}", | |
"🎲": lambda k: f"https://huggingface.co/spaces/awacke1/AI-ChatGPT-CPT-Body-Map-Cost?q={quote(k)}", # this url plus query! | |
} | |
for category, details in body_map_data.items(): | |
st.write(f"### {category}") | |
cols = st.columns(len(details)) # Create dynamic columns based on the number of games | |
for idx, (game, terms) in enumerate(details.items()): | |
with cols[idx]: | |
st.markdown(f"#### {game}") | |
for term in terms: | |
links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()]) | |
st.markdown(f"{term} {links_md}", unsafe_allow_html=True) | |
game_emojis = { | |
"Dungeons and Dragons": "🐉", | |
"Call of Cthulhu": "🐙", | |
"GURPS": "🎲", | |
"Pathfinder": "🗺️", | |
"Kindred of the East": "🌅", | |
"Changeling": "🍃", | |
} | |
topic_emojis = { | |
"Core Rulebooks": "📚", | |
"Maps & Settings": "🗺️", | |
"Game Mechanics & Tools": "⚙️", | |
"Monsters & Adversaries": "👹", | |
"Campaigns & Adventures": "📜", | |
"Creatives & Assets": "🎨", | |
"Game Master Resources": "🛠️", | |
"Lore & Background": "📖", | |
"Character Development": "🧍", | |
"Homebrew Content": "🔧", | |
"General Topics": "🌍", | |
} | |
# Adjusted display_buttons_with_scores function | |
def display_buttons_with_scores(): | |
for category, games in body_map_data.items(): | |
category_emoji = topic_emojis.get(category, "🔍") # Default to search icon if no match | |
st.markdown(f"## {category_emoji} {category}") | |
for game, terms in games.items(): | |
game_emoji = game_emojis.get(game, "🎮") # Default to generic game controller if no match | |
for term in terms: | |
key = f"{category}_{game}_{term}".replace(' ', '_').lower() | |
score = load_score(key) | |
if st.button(f"{game_emoji} {term} {score}", key=key): | |
update_score(key) | |
# Create a dynamic query incorporating emojis and formatting for clarity | |
query_prefix = f"{category_emoji} {game_emoji} **{game} - {category}:**" | |
# ----------------------------------------------------------------- | |
# query_body = f"Create a detailed outline for **{term}** with subpoints highlighting key aspects, using emojis for visual engagement. Include step-by-step rules and boldface important entities and ruleset elements." | |
query_body = f"Create a streamlit python app.py that produces a detailed markdown outline and CSV dataset user interface with an outline for **{term}** with subpoints highlighting key aspects, using emojis for visual engagement. Include step-by-step rules and boldface important entities and ruleset elements." | |
response = search_glossary(query_prefix + query_body, body_map_data) | |
def fetch_wikipedia_summary(keyword): | |
# Placeholder function for fetching Wikipedia summaries | |
# In a real app, you might use requests to fetch from the Wikipedia API | |
return f"Summary for {keyword}. For more information, visit Wikipedia." | |
def create_search_url_youtube(keyword): | |
base_url = "https://www.youtube.com/results?search_query=" | |
return base_url + keyword.replace(' ', '+') | |
def create_search_url_bing(keyword): | |
base_url = "https://www.bing.com/search?q=" | |
return base_url + keyword.replace(' ', '+') | |
def create_search_url_wikipedia(keyword): | |
base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search=" | |
return base_url + keyword.replace(' ', '+') | |
def create_search_url_google(keyword): | |
base_url = "https://www.google.com/search?q=" | |
return base_url + keyword.replace(' ', '+') | |
def display_images_and_wikipedia_summaries(): | |
st.title('Gallery with Related Stories') | |
image_files = [f for f in os.listdir('.') if f.endswith('.png')] | |
if not image_files: | |
st.write("No PNG images found in the current directory.") | |
return | |
for image_file in image_files: | |
image = Image.open(image_file) | |
st.image(image, caption=image_file, use_column_width=True) | |
keyword = image_file.split('.')[0] # Assumes keyword is the file name without extension | |
# Display Wikipedia and Google search links | |
wikipedia_url = create_search_url_wikipedia(keyword) | |
google_url = create_search_url_google(keyword) | |
youtube_url = create_search_url_youtube(keyword) | |
bing_url = create_search_url_bing(keyword) | |
links_md = f""" | |
[Wikipedia]({wikipedia_url}) | | |
[Google]({google_url}) | | |
[YouTube]({youtube_url}) | | |
[Bing]({bing_url}) | |
""" | |
st.markdown(links_md) | |
def get_all_query_params(key): | |
return st.query_params().get(key, []) | |
def clear_query_params(): | |
st.query_params() | |
# Function to display content or image based on a query | |
def display_content_or_image(query): | |
# Check if the query matches any glossary term | |
for category, terms in transhuman_glossary.items(): | |
for term in terms: | |
if query.lower() in term.lower(): | |
st.subheader(f"Found in {category}:") | |
st.write(term) | |
return True # Return after finding and displaying the first match | |
# Check for an image match in a predefined directory (adjust path as needed) | |
image_dir = "images" # Example directory where images are stored | |
image_path = f"{image_dir}/{query}.png" # Construct image path with query | |
if os.path.exists(image_path): | |
st.image(image_path, caption=f"Image for {query}") | |
return True | |
# If no content or image is found | |
st.warning("No matching content or image found.") | |
return False | |
# 1. Constants and Top Level UI Variables | |
# My Inference API Copy | |
API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama | |
# Meta's Original - Chat HF Free Version: | |
#API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf" | |
API_KEY = os.getenv('API_KEY') | |
MODEL1="meta-llama/Llama-2-7b-chat-hf" | |
MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf" | |
HF_KEY = os.getenv('HF_KEY') | |
headers = { | |
"Authorization": f"Bearer {HF_KEY}", | |
"Content-Type": "application/json" | |
} | |
key = os.getenv('OPENAI_API_KEY') | |
prompt = f"Write instructions to teach discharge planning along with guidelines and patient education. List entities, features and relationships to CCDA and FHIR objects in boldface." | |
should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.") | |
def SpeechSynthesis(result): | |
documentHTML5=''' | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Read It Aloud</title> | |
<script type="text/javascript"> | |
function readAloud() { | |
const text = document.getElementById("textArea").value; | |
const speech = new SpeechSynthesisUtterance(text); | |
window.speechSynthesis.speak(speech); | |
} | |
</script> | |
</head> | |
<body> | |
<h1>🔊 Read It Aloud</h1> | |
<textarea id="textArea" rows="10" cols="80"> | |
''' | |
documentHTML5 = documentHTML5 + result | |
documentHTML5 = documentHTML5 + ''' | |
</textarea> | |
<br> | |
<button onclick="readAloud()">🔊 Read Aloud</button> | |
</body> | |
</html> | |
''' | |
components.html(documentHTML5, width=1280, height=300) | |
#return result | |
# 3. Stream Llama Response | |
# @st.cache_resource | |
def StreamLLMChatResponse(prompt): | |
try: | |
endpoint_url = API_URL | |
hf_token = API_KEY | |
st.write('Running client ' + endpoint_url) | |
client = InferenceClient(endpoint_url, token=hf_token) | |
gen_kwargs = dict( | |
max_new_tokens=512, | |
top_k=30, | |
top_p=0.9, | |
temperature=0.2, | |
repetition_penalty=1.02, | |
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"], | |
) | |
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs) | |
report=[] | |
res_box = st.empty() | |
collected_chunks=[] | |
collected_messages=[] | |
allresults='' | |
for r in stream: | |
if r.token.special: | |
continue | |
if r.token.text in gen_kwargs["stop_sequences"]: | |
break | |
collected_chunks.append(r.token.text) | |
chunk_message = r.token.text | |
collected_messages.append(chunk_message) | |
try: | |
report.append(r.token.text) | |
if len(r.token.text) > 0: | |
result="".join(report).strip() | |
res_box.markdown(f'*{result}*') | |
except: | |
st.write('Stream llm issue') | |
SpeechSynthesis(result) | |
return result | |
except: | |
st.write('Llama model is asleep. Starting up now on A10 - please give 5 minutes then retry as KEDA scales up from zero to activate running container(s).') | |
# 4. Run query with payload | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
st.markdown(response.json()) | |
return response.json() | |
def get_output(prompt): | |
return query({"inputs": prompt}) | |
# 5. Auto name generated output files from time and content | |
def generate_filename(prompt, file_type): | |
central = pytz.timezone('US/Central') | |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") | |
replaced_prompt = prompt.replace(" ", "_").replace("\n", "_") | |
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:255] # 255 is linux max, 260 is windows max | |
#safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:45] | |
return f"{safe_date_time}_{safe_prompt}.{file_type}" | |
# 6. Speech transcription via OpenAI service | |
def transcribe_audio(openai_key, file_path, model): | |
openai.api_key = openai_key | |
OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions" | |
headers = { | |
"Authorization": f"Bearer {openai_key}", | |
} | |
with open(file_path, 'rb') as f: | |
data = {'file': f} | |
st.write('STT transcript ' + OPENAI_API_URL) | |
response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model}) | |
if response.status_code == 200: | |
st.write(response.json()) | |
chatResponse = chat_with_model(response.json().get('text'), '') # ************************************* | |
transcript = response.json().get('text') | |
filename = generate_filename(transcript, 'txt') | |
response = chatResponse | |
user_prompt = transcript | |
create_file(filename, user_prompt, response, should_save) | |
return transcript | |
else: | |
st.write(response.json()) | |
st.error("Error in API call.") | |
return None | |
# 7. Auto stop on silence audio control for recording WAV files | |
def save_and_play_audio(audio_recorder): | |
audio_bytes = audio_recorder(key='audio_recorder') | |
if audio_bytes: | |
filename = generate_filename("Recording", "wav") | |
with open(filename, 'wb') as f: | |
f.write(audio_bytes) | |
st.audio(audio_bytes, format="audio/wav") | |
return filename | |
return None | |
# 8. File creator that interprets type and creates output file for text, markdown and code | |
def create_file(filename, prompt, response, should_save=True): | |
if not should_save: | |
return | |
base_filename, ext = os.path.splitext(filename) | |
if ext in ['.txt', '.htm', '.md']: | |
with open(f"{base_filename}.md", 'w') as file: | |
try: | |
content = prompt.strip() + '\r\n' + response | |
file.write(content) | |
except: | |
st.write('.') | |
#has_python_code = re.search(r"```python([\s\S]*?)```", prompt.strip() + '\r\n' + response) | |
#has_python_code = bool(re.search(r"```python([\s\S]*?)```", prompt.strip() + '\r\n' + response)) | |
#if has_python_code: | |
# python_code = re.findall(r"```python([\s\S]*?)```", response)[0].strip() | |
# with open(f"{base_filename}-Code.py", 'w') as file: | |
# file.write(python_code) | |
# with open(f"{base_filename}.md", 'w') as file: | |
# content = prompt.strip() + '\r\n' + response | |
# file.write(content) | |
def truncate_document(document, length): | |
return document[:length] | |
def divide_document(document, max_length): | |
return [document[i:i+max_length] for i in range(0, len(document), max_length)] | |
# 9. Sidebar with UI controls to review and re-run prompts and continue responses | |
def get_table_download_link(file_path): | |
with open(file_path, 'r') as file: | |
data = file.read() | |
b64 = base64.b64encode(data.encode()).decode() | |
file_name = os.path.basename(file_path) | |
ext = os.path.splitext(file_name)[1] # get the file extension | |
if ext == '.txt': | |
mime_type = 'text/plain' | |
elif ext == '.py': | |
mime_type = 'text/plain' | |
elif ext == '.xlsx': | |
mime_type = 'text/plain' | |
elif ext == '.csv': | |
mime_type = 'text/plain' | |
elif ext == '.htm': | |
mime_type = 'text/html' | |
elif ext == '.md': | |
mime_type = 'text/markdown' | |
elif ext == '.wav': | |
mime_type = 'audio/wav' | |
else: | |
mime_type = 'application/octet-stream' # general binary data type | |
href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>' | |
return href | |
def CompressXML(xml_text): | |
root = ET.fromstring(xml_text) | |
for elem in list(root.iter()): | |
if isinstance(elem.tag, str) and 'Comment' in elem.tag: | |
elem.parent.remove(elem) | |
return ET.tostring(root, encoding='unicode', method="xml") | |
# 10. Read in and provide UI for past files | |
def read_file_content(file,max_length): | |
if file.type == "application/json": | |
content = json.load(file) | |
return str(content) | |
elif file.type == "text/html" or file.type == "text/htm": | |
content = BeautifulSoup(file, "html.parser") | |
return content.text | |
elif file.type == "application/xml" or file.type == "text/xml": | |
tree = ET.parse(file) | |
root = tree.getroot() | |
xml = CompressXML(ET.tostring(root, encoding='unicode')) | |
return xml | |
elif file.type == "text/markdown" or file.type == "text/md": | |
md = mistune.create_markdown() | |
content = md(file.read().decode()) | |
return content | |
elif file.type == "text/plain": | |
return file.getvalue().decode() | |
else: | |
return "" | |
# 11. Chat with GPT - Caution on quota | |
def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'): | |
model = model_choice | |
conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}] | |
conversation.append({'role': 'user', 'content': prompt}) | |
if len(document_section)>0: | |
conversation.append({'role': 'assistant', 'content': document_section}) | |
start_time = time.time() | |
report = [] | |
res_box = st.empty() | |
collected_chunks = [] | |
collected_messages = [] | |
st.write('LLM stream ' + 'gpt-3.5-turbo') | |
for chunk in openai.ChatCompletion.create(model='gpt-3.5-turbo', messages=conversation, temperature=0.5, stream=True): | |
collected_chunks.append(chunk) | |
chunk_message = chunk['choices'][0]['delta'] | |
collected_messages.append(chunk_message) | |
content=chunk["choices"][0].get("delta",{}).get("content") | |
try: | |
report.append(content) | |
if len(content) > 0: | |
result = "".join(report).strip() | |
res_box.markdown(f'*{result}*') | |
except: | |
st.write(' ') | |
full_reply_content = ''.join([m.get('content', '') for m in collected_messages]) | |
st.write("Elapsed time:") | |
st.write(time.time() - start_time) | |
return full_reply_content | |
def extract_mime_type(file): | |
if isinstance(file, str): | |
pattern = r"type='(.*?)'" | |
match = re.search(pattern, file) | |
if match: | |
return match.group(1) | |
else: | |
raise ValueError(f"Unable to extract MIME type from {file}") | |
elif isinstance(file, streamlit.UploadedFile): | |
return file.type | |
else: | |
raise TypeError("Input should be a string or a streamlit.UploadedFile object") | |
def extract_file_extension(file): | |
# get the file name directly from the UploadedFile object | |
file_name = file.name | |
pattern = r".*?\.(.*?)$" | |
match = re.search(pattern, file_name) | |
if match: | |
return match.group(1) | |
else: | |
raise ValueError(f"Unable to extract file extension from {file_name}") | |
# Normalize input as text from PDF and other formats | |
def pdf2txt(docs): | |
text = "" | |
for file in docs: | |
file_extension = extract_file_extension(file) | |
st.write(f"File type extension: {file_extension}") | |
if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']: | |
text += file.getvalue().decode('utf-8') | |
elif file_extension.lower() == 'pdf': | |
from PyPDF2 import PdfReader | |
pdf = PdfReader(BytesIO(file.getvalue())) | |
for page in range(len(pdf.pages)): | |
text += pdf.pages[page].extract_text() # new PyPDF2 syntax | |
return text | |
def txt2chunks(text): | |
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len) | |
return text_splitter.split_text(text) | |
# Vector Store using FAISS | |
def vector_store(text_chunks): | |
embeddings = OpenAIEmbeddings(openai_api_key=key) | |
return FAISS.from_texts(texts=text_chunks, embedding=embeddings) | |
# Memory and Retrieval chains | |
def get_chain(vectorstore): | |
llm = ChatOpenAI() | |
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) | |
return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory) | |
def process_user_input(user_question): | |
response = st.session_state.conversation({'question': user_question}) | |
st.session_state.chat_history = response['chat_history'] | |
for i, message in enumerate(st.session_state.chat_history): | |
template = user_template if i % 2 == 0 else bot_template | |
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True) | |
filename = generate_filename(user_question, 'txt') | |
response = message.content | |
user_prompt = user_question | |
create_file(filename, user_prompt, response, should_save) | |
def divide_prompt(prompt, max_length): | |
words = prompt.split() | |
chunks = [] | |
current_chunk = [] | |
current_length = 0 | |
for word in words: | |
if len(word) + current_length <= max_length: | |
current_length += len(word) + 1 | |
current_chunk.append(word) | |
else: | |
chunks.append(' '.join(current_chunk)) | |
current_chunk = [word] | |
current_length = len(word) | |
chunks.append(' '.join(current_chunk)) | |
return chunks | |
# 13. Provide way of saving all and deleting all to give way of reviewing output and saving locally before clearing it | |
def create_zip_of_files(files): | |
zip_name = "all_files.zip" | |
with zipfile.ZipFile(zip_name, 'w') as zipf: | |
for file in files: | |
zipf.write(file) | |
return zip_name | |
def get_zip_download_link(zip_file): | |
with open(zip_file, 'rb') as f: | |
data = f.read() | |
b64 = base64.b64encode(data).decode() | |
href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>' | |
return href | |
# 14. Inference Endpoints for Whisper (best fastest STT) on NVIDIA T4 and Llama (best fastest AGI LLM) on NVIDIA A10 | |
# My Inference Endpoint | |
API_URL_IE = f'https://tonpixzfvq3791u9.us-east-1.aws.endpoints.huggingface.cloud' | |
# Original | |
API_URL_IE = "https://api-inference.huggingface.co/models/openai/whisper-small.en" | |
MODEL2 = "openai/whisper-small.en" | |
MODEL2_URL = "https://huggingface.co/openai/whisper-small.en" | |
#headers = { | |
# "Authorization": "Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", | |
# "Content-Type": "audio/wav" | |
#} | |
# HF_KEY = os.getenv('HF_KEY') | |
HF_KEY = st.secrets['HF_KEY'] | |
headers = { | |
"Authorization": f"Bearer {HF_KEY}", | |
"Content-Type": "audio/wav" | |
} | |
#@st.cache_resource | |
def query(filename): | |
with open(filename, "rb") as f: | |
data = f.read() | |
response = requests.post(API_URL_IE, headers=headers, data=data) | |
return response.json() | |
def generate_filename(prompt, file_type): | |
central = pytz.timezone('US/Central') | |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") | |
replaced_prompt = prompt.replace(" ", "_").replace("\n", "_") | |
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90] | |
return f"{safe_date_time}_{safe_prompt}.{file_type}" | |
# 15. Audio recorder to Wav file | |
def save_and_play_audio(audio_recorder): | |
audio_bytes = audio_recorder() | |
if audio_bytes: | |
filename = generate_filename("Recording", "wav") | |
with open(filename, 'wb') as f: | |
f.write(audio_bytes) | |
st.audio(audio_bytes, format="audio/wav") | |
return filename | |
# 16. Speech transcription to file output | |
def transcribe_audio(filename): | |
output = query(filename) | |
return output | |
def whisper_main(): | |
#st.title("Speech to Text") | |
#st.write("Record your speech and get the text.") | |
# Audio, transcribe, GPT: | |
filename = save_and_play_audio(audio_recorder) | |
if filename is not None: | |
transcription = transcribe_audio(filename) | |
try: | |
transcript = transcription['text'] | |
st.write(transcript) | |
except: | |
transcript='' | |
st.write(transcript) | |
# Whisper to GPT: New!! --------------------------------------------------------------------- | |
st.write('Reasoning with your inputs with GPT..') | |
response = chat_with_model(transcript) | |
st.write('Response:') | |
st.write(response) | |
filename = generate_filename(response, "txt") | |
create_file(filename, transcript, response, should_save) | |
# Whisper to GPT: New!! --------------------------------------------------------------------- | |
# Whisper to Llama: | |
#response = StreamLLMChatResponse(transcript) | |
#filename_txt = generate_filename(transcript, "md") | |
#create_file(filename_txt, transcript, response, should_save) | |
#filename_wav = filename_txt.replace('.txt', '.wav') | |
#import shutil | |
#try: | |
# if os.path.exists(filename): | |
# shutil.copyfile(filename, filename_wav) | |
#except: | |
# st.write('.') | |
#if os.path.exists(filename): | |
# os.remove(filename) | |
#st.experimental_rerun() | |
#except: | |
# st.write('Starting Whisper Model on GPU. Please retry in 30 seconds.') | |
# Sample function to demonstrate a response, replace with your own logic | |
def StreamMedChatResponse(topic): | |
st.write(f"Showing resources or questions related to: {topic}") | |
# 17. Main | |
def main(): | |
prompt = f"Write ten funny jokes that are tweet length stories that make you laugh. Show as markdown outline with emojis for each." | |
# Add Wit and Humor buttons | |
# add_witty_humor_buttons() | |
# add_medical_exam_buttons() | |
with st.expander("Prompts 📚", expanded=False): | |
example_input = st.text_input("Enter your prompt text for Llama:", value=prompt, help="Enter text to get a response from DromeLlama.") | |
if st.button("Run Prompt With Llama model", help="Click to run the prompt."): | |
try: | |
response=StreamLLMChatResponse(example_input) | |
create_file(filename, example_input, response, should_save) | |
except: | |
st.write('Llama model is asleep. Starting now on A10 GPU. Please wait one minute then retry. KEDA triggered.') | |
openai.api_key = os.getenv('OPENAI_API_KEY') | |
if openai.api_key == None: openai.api_key = st.secrets['OPENAI_API_KEY'] | |
menu = ["txt", "htm", "xlsx", "csv", "md", "py"] | |
choice = st.sidebar.selectbox("Output File Type:", menu) | |
model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301')) | |
user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100) | |
collength, colupload = st.columns([2,3]) # adjust the ratio as needed | |
with collength: | |
max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000) | |
with colupload: | |
uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"]) | |
document_sections = deque() | |
document_responses = {} | |
if uploaded_file is not None: | |
file_content = read_file_content(uploaded_file, max_length) | |
document_sections.extend(divide_document(file_content, max_length)) | |
if len(document_sections) > 0: | |
if st.button("👁️ View Upload"): | |
st.markdown("**Sections of the uploaded file:**") | |
for i, section in enumerate(list(document_sections)): | |
st.markdown(f"**Section {i+1}**\n{section}") | |
st.markdown("**Chat with the model:**") | |
for i, section in enumerate(list(document_sections)): | |
if i in document_responses: | |
st.markdown(f"**Section {i+1}**\n{document_responses[i]}") | |
else: | |
if st.button(f"Chat about Section {i+1}"): | |
st.write('Reasoning with your inputs...') | |
#response = chat_with_model(user_prompt, section, model_choice) | |
st.write('Response:') | |
st.write(response) | |
document_responses[i] = response | |
filename = generate_filename(f"{user_prompt}_section_{i+1}", choice) | |
create_file(filename, user_prompt, response, should_save) | |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True) | |
if st.button('💬 Chat'): | |
st.write('Reasoning with your inputs...') | |
user_prompt_sections = divide_prompt(user_prompt, max_length) | |
full_response = '' | |
for prompt_section in user_prompt_sections: | |
response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice) | |
full_response += response + '\n' # Combine the responses | |
response = full_response | |
st.write('Response:') | |
st.write(response) | |
filename = generate_filename(user_prompt, choice) | |
create_file(filename, user_prompt, response, should_save) | |
# Compose a file sidebar of markdown md files: | |
all_files = glob.glob("*.md") | |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names | |
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order | |
if st.sidebar.button("🗑 Delete All Text"): | |
for file in all_files: | |
os.remove(file) | |
st.experimental_rerun() | |
if st.sidebar.button("⬇️ Download All"): | |
zip_file = create_zip_of_files(all_files) | |
st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True) | |
file_contents='' | |
next_action='' | |
for file in all_files: | |
col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed | |
with col1: | |
if st.button("🌐", key="md_"+file): # md emoji button | |
with open(file, 'r') as f: | |
file_contents = f.read() | |
next_action='md' | |
with col2: | |
st.markdown(get_table_download_link(file), unsafe_allow_html=True) | |
with col3: | |
if st.button("📂", key="open_"+file): # open emoji button | |
with open(file, 'r') as f: | |
file_contents = f.read() | |
next_action='open' | |
with col4: | |
if st.button("🔍", key="read_"+file): # search emoji button | |
with open(file, 'r') as f: | |
file_contents = f.read() | |
next_action='search' | |
with col5: | |
if st.button("🗑", key="delete_"+file): | |
os.remove(file) | |
st.experimental_rerun() | |
if len(file_contents) > 0: | |
if next_action=='open': | |
file_content_area = st.text_area("File Contents:", file_contents, height=500) | |
if next_action=='md': | |
st.markdown(file_contents) | |
buttonlabel = '🔍Run with Llama and GPT.' | |
if st.button(key='RunWithLlamaandGPT', label = buttonlabel): | |
user_prompt = file_contents | |
# Llama versus GPT Battle! | |
all="" | |
try: | |
st.write('🔍Running with Llama.') | |
response = StreamLLMChatResponse(file_contents) | |
filename = generate_filename(user_prompt, "md") | |
create_file(filename, file_contents, response, should_save) | |
all=response | |
#SpeechSynthesis(response) | |
except: | |
st.markdown('Llama is sleeping. Restart ETA 30 seconds.') | |
# gpt | |
try: | |
st.write('🔍Running with GPT.') | |
response2 = chat_with_model(user_prompt, file_contents, model_choice) | |
filename2 = generate_filename(file_contents, choice) | |
create_file(filename2, user_prompt, response, should_save) | |
all=all+response2 | |
#SpeechSynthesis(response2) | |
except: | |
st.markdown('GPT is sleeping. Restart ETA 30 seconds.') | |
SpeechSynthesis(all) | |
if next_action=='search': | |
file_content_area = st.text_area("File Contents:", file_contents, height=500) | |
st.write('🔍Running with Llama and GPT.') | |
user_prompt = file_contents | |
# Llama versus GPT Battle! | |
all="" | |
try: | |
st.write('🔍Running with Llama.') | |
response = StreamLLMChatResponse(file_contents) | |
filename = generate_filename(user_prompt, ".md") | |
create_file(filename, file_contents, response, should_save) | |
all=response | |
#SpeechSynthesis(response) | |
except: | |
st.markdown('Llama is sleeping. Restart ETA 30 seconds.') | |
# gpt | |
try: | |
st.write('🔍Running with GPT.') | |
response2 = chat_with_model(user_prompt, file_contents, model_choice) | |
filename2 = generate_filename(file_contents, choice) | |
create_file(filename2, user_prompt, response, should_save) | |
all=all+response2 | |
#SpeechSynthesis(response2) | |
except: | |
st.markdown('GPT is sleeping. Restart ETA 30 seconds.') | |
SpeechSynthesis(all) | |
# Function to encode file to base64 | |
def get_base64_encoded_file(file_path): | |
with open(file_path, "rb") as file: | |
return base64.b64encode(file.read()).decode() | |
# Function to create a download link | |
def get_audio_download_link(file_path): | |
base64_file = get_base64_encoded_file(file_path) | |
return f'<a href="data:file/wav;base64,{base64_file}" download="{os.path.basename(file_path)}">⬇️ Download Audio</a>' | |
# Compose a file sidebar of past encounters | |
all_files = glob.glob("*.wav") | |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names | |
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order | |
filekey = 'delall' | |
if st.sidebar.button("🗑 Delete All Audio", key=filekey): | |
for file in all_files: | |
os.remove(file) | |
st.experimental_rerun() | |
for file in all_files: | |
col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed | |
with col1: | |
st.markdown(file) | |
if st.button("🎵", key="play_" + file): # play emoji button | |
audio_file = open(file, 'rb') | |
audio_bytes = audio_file.read() | |
st.audio(audio_bytes, format='audio/wav') | |
#st.markdown(get_audio_download_link(file), unsafe_allow_html=True) | |
#st.text_input(label="", value=file) | |
with col2: | |
if st.button("🗑", key="delete_" + file): | |
os.remove(file) | |
st.experimental_rerun() | |
# Feedback | |
# Step: Give User a Way to Upvote or Downvote | |
GiveFeedback=False | |
if GiveFeedback: | |
with st.expander("Give your feedback 👍", expanded=False): | |
feedback = st.radio("Step 8: Give your feedback", ("👍 Upvote", "👎 Downvote")) | |
if feedback == "👍 Upvote": | |
st.write("You upvoted 👍. Thank you for your feedback!") | |
else: | |
st.write("You downvoted 👎. Thank you for your feedback!") | |
load_dotenv() | |
st.write(css, unsafe_allow_html=True) | |
st.header("Chat with documents :books:") | |
user_question = st.text_input("Ask a question about your documents:") | |
if user_question: | |
process_user_input(user_question) | |
with st.sidebar: | |
st.subheader("Your documents") | |
docs = st.file_uploader("import documents", accept_multiple_files=True) | |
with st.spinner("Processing"): | |
raw = pdf2txt(docs) | |
if len(raw) > 0: | |
length = str(len(raw)) | |
text_chunks = txt2chunks(raw) | |
vectorstore = vector_store(text_chunks) | |
st.session_state.conversation = get_chain(vectorstore) | |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing | |
filename = generate_filename(raw, 'txt') | |
create_file(filename, raw, '', should_save) | |
# Relocated! Hope you like your new space - enjoy! | |
# Display instructions and handle query parameters | |
#st.markdown("## Glossary Lookup\nEnter a term in the URL query, like `?q=Nanotechnology` or `?query=Martian Syndicate`.") | |
st.markdown(''' | |
### Mixable AI 🃏🚀📚 | |
''') | |
try: | |
query_params = st.query_params | |
#query = (query_params.get('q') or query_params.get('query') or [''])[0] | |
query = (query_params.get('q') or query_params.get('query') or ['']) | |
st.markdown('# Running query: ' + query) | |
if query: search_glossary(query) | |
except: | |
st.markdown('No glossary lookup') | |
# Display the glossary grid | |
st.title("Body Map Glossary 🎲") | |
display_glossary_grid(body_map_data) | |
st.title("🎲🗺️ Card Game Universe") | |
st.markdown("## Explore the vast universes of Dungeons and Dragons, Call of Cthulhu, GURPS, and more through interactive storytelling and encyclopedic knowledge.🌠") | |
display_buttons_with_scores() | |
display_images_and_wikipedia_summaries() | |
# Assuming the transhuman_glossary and other setup code remains the same | |
#st.write("Current Query Parameters:", st.query_params) | |
#st.markdown("### Query Parameters - These Deep Link Map to Remixable Methods, Navigate or Trigger Functionalities") | |
# Example: Using query parameters to navigate or trigger functionalities | |
if 'action' in st.query_params: | |
action = st.query_params()['action'][0] # Get the first (or only) 'action' parameter | |
if action == 'show_message': | |
st.success("Showing a message because 'action=show_message' was found in the URL.") | |
elif action == 'clear': | |
clear_query_params() | |
st.experimental_rerun() | |
# Handling repeated keys | |
if 'multi' in st.query_params: | |
multi_values = get_all_query_params('multi') | |
st.write("Values for 'multi':", multi_values) | |
# Manual entry for demonstration | |
st.write("Enter query parameters in the URL like this: ?action=show_message&multi=1&multi=2") | |
if 'query' in st.query_params: | |
query = st.query_params['query'][0] # Get the query parameter | |
# Display content or image based on the query | |
display_content_or_image(query) | |
# Add a clear query parameters button for convenience | |
if st.button("Clear Query Parameters", key='ClearQueryParams'): | |
# This will clear the browser URL's query parameters | |
st.experimental_set_query_params | |
st.experimental_rerun() | |
# 18. Run AI Pipeline | |
if __name__ == "__main__": | |
whisper_main() | |
main() |