Spaces:
Runtime error
Runtime error
import streamlit as st | |
import os | |
import json | |
from PIL import Image | |
from urllib.parse import quote # Ensure this import is included | |
import base64 | |
import glob | |
import json | |
import math | |
import openai | |
import os | |
import pytz | |
import re | |
import requests | |
import textract | |
import time | |
import zipfile | |
import huggingface_hub | |
import dotenv | |
from audio_recorder_streamlit import audio_recorder | |
from bs4 import BeautifulSoup | |
from collections import deque | |
from datetime import datetime | |
from dotenv import load_dotenv | |
from huggingface_hub import InferenceClient | |
from io import BytesIO | |
from openai import ChatCompletion | |
from PyPDF2 import PdfReader | |
#from templates import bot_template, css, user_template -- pattern with content | |
from xml.etree import ElementTree as ET | |
import streamlit.components.v1 as components # Import Streamlit Components for HTML5 | |
# Set page configuration with a title and favicon | |
st.set_page_config( | |
page_title="๐๐ Mixable AI - Voice Search", | |
page_icon="๐ ", | |
layout="wide", | |
initial_sidebar_state="expanded", | |
menu_items={ | |
'Get Help': 'https://huggingface.co/awacke1', | |
'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload", | |
'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558" | |
} | |
) | |
st.markdown(""" | |
## Anatomy Head to Toe Table with Body Organs Costly Conditions, Spending, CPT Codes and Frequency | |
| Table Num | Body Part | Organ/Part | Description | ๐ Costly Condition | ๐ฐ Spending (billions) | CPT Range Start | CPT Range Finish | Frequency | | |
|-----------|------------------|----------------------|-------------------------------|------------------------------|------------------------|-----------------|------------------|----------------| | |
| 1 | ๐ง Head | ๐ง Brain | Controls mental processes | ๐จ Anxiety & Depression | 210 | 90791 | 90899 | 1 in 5 | | |
| 2 | ๐ Eyes | ๐๏ธ Optic Nerve | Vision | ๐ Cataracts | 10.7 | 92002 | 92499 | 1 in 6 (over 40 years) | | |
| 3 | ๐ Ears | ๐ Cochlea | Hearing | ๐ข Hearing Loss | 7.1 | 92502 | 92700 | 1 in 8 (over 12 years) | | |
| 4 | ๐ Nose | ๐ Olfactory Bulb | Smell | ๐คง Allergies | 25 | 31231 | 31294 | 1 in 3 | | |
| 5 | ๐ Mouth | ๐ Tongue | Taste | ๐ฆท Dental Issues | 130 | 00100 | 00192 | 1 in 2 | | |
| 6 | ๐ซ Neck | ๐ฆ Thyroid | Metabolism | ๐ฆ Hypothyroidism | 3.1 | 60210 | 60271 | 1 in 20 | | |
| 7 | ๐ช Upper Body | โค๏ธ Heart | Circulation | ๐ Heart Disease | 230 | 92920 | 93799 | 1 in 4 (over 65 years) | | |
| 8 | ๐ช Upper Body | ๐ซ Lungs | Respiration | ๐ท Chronic Obstructive Pulmonary Disease | 70 | 94002 | 94799 | 1 in 20 (over 45 years) | | |
| 9 | ๐ช Upper Body | ๐ท Liver | Detoxification | ๐บ Liver Disease | 40 | 47000 | 47999 | 1 in 10 | | |
| 10 | ๐ช Upper Body | ๐น Kidneys | Filtration | ๐ Chronic Kidney Disease | 110 | 50010 | 50999 | 1 in 7 | | |
| 11 | ๐ช Upper Body | ๐ Pancreas | Insulin secretion | ๐ฌ Diabetes | 327 | 48100 | 48999 | 1 in 10 | | |
| 12 | ๐ช Upper Body | ๐ฝ๏ธ Stomach | Digestion | ๐ฅ Gastroesophageal Reflux Disease | 17 | 43200 | 43289 | 1 in 5 | | |
| 13 | ๐ช Upper Body | ๐ก๏ธ Spleen | Immune functions | ๐ฉธ Anemia | 5.6 | 38100 | 38199 | 1 in 6 | | |
| 14 | ๐ช Upper Body | ๐ซ Blood Vessels | Circulation of blood | ๐ Hypertension | 55 | 40110 | 40599 | 1 in 3 | | |
| 15 | ๐ฆต Lower Body | ๐ Colon | Absorption of water, minerals | ๐ Colorectal Cancer | 14 | 45378 | 45378 | 1 in 23 | | |
| 16 | ๐ฆต Lower Body | ๐ฝ Bladder | Urine excretion | ๐ง Urinary Incontinence | 8 | 51700 | 51798 | 1 in 4 (over 65 years) | | |
| 17 | ๐ฆต Lower Body | ๐ Reproductive Organs | Sex hormone secretion | ๐๏ธ Endometriosis | 22 | 56405 | 58999 | 1 in 10 (women) | | |
| 18 | ๐ฆถ Feet | ๐ฏ Nerve endings | Balance and movement | ๐ค Peripheral Neuropathy | 19 | 95900 | 96004 | 1 in 30 | | |
| 19 | ๐ฆถ Feet | ๐ก๏ธ Skin | Temperature regulation | ๐ Skin Cancer | 8.1 | 96910 | 96999 | 1 in 5 | | |
| 20 | ๐ฆถ Feet | ๐ช Muscles | Movement and strength | ๐๏ธโโ๏ธ Musculoskeletal Disorders | 176 | 97110 | 97799 | 1 in 2 | | |
""") | |
roleplaying_glossary = { | |
"๐ด Traditional Card Games": { | |
"Bridge": ["Trick-taking", "Bidding and partnership", "Complex scoring"], | |
"Poker": ["Betting/Card ranking", "Bluffing and hand management", "Various play styles"], | |
"Hearts": ["Trick-avoidance", "Passing cards strategy", "Shooting the moon"], | |
"Spades": ["Trick-taking", "Partnership and bidding", "Blind bidding"], | |
"Rummy": ["Matching", "Set and run formation", "Point scoring"], | |
}, | |
"๐ฎ Collectible Card Games (CCGs)": { | |
"Magic: The Gathering": ["Deck building", "Resource management", "Strategic play"], | |
"Yu-Gi-Oh!": ["Dueling", "Summoning strategies", "Trap and spell cards"], | |
"Pokรฉmon TCG": ["Collectible", "Type advantages", "Energy management"], | |
"KeyForge": ["Unique deck", "No deck building", "Chain system"], | |
"Legend of the Five Rings": ["Living Card Game", "Honor and conflict", "Clan loyalty"], | |
}, | |
"๐น๏ธ Digital Card Games": { | |
"Hearthstone": ["Digital CCG", "Hero powers", "Expansive card sets"], | |
"Gwent": ["Strategic depth", "Row-based play", "Witcher universe"], | |
"Slay the Spire": ["Roguelike deck-builder", "Card drafting", "Relic synergies"], | |
"Eternal Card Game": ["Digital CCG", "Cross-platform", "Drafting and events"], | |
}, | |
"๐ป Card Battler Video Games": { | |
"Yu-Gi-Oh! Duel Links": ["Speed Duel format", "Mobile and PC", "Competitive ladder"], | |
"Magic: The Gathering Arena": ["Digital adaptation", "Regular updates", "Esports"], | |
"Monster Train": ["Roguelike", "Multi-tiered defense", "Clan synergies"], | |
"Legends of Runeterra": ["League of Legends universe", "Dynamic combat", "Champion leveling"], | |
}, | |
"๐ง Game Design and Dynamics": { | |
"Deck Building Strategies": ["Card synergy", "Mana curve", "Meta considerations"], | |
"Gameplay Mechanics": ["Turn-based", "Resource management", "Combat dynamics"], | |
"Player Engagement": ["Replayability", "Strategic depth", "Social play"], | |
}, | |
"๐ Lore & Background": { | |
"Magic: The Gathering": ["Rich lore", "Multiverse settings", "Planeswalker stories"], | |
"Yu-Gi-Oh!": ["Anime-based", "Duel Monsters", "Egyptian mythology"], | |
"Legends of Runeterra": ["Expansive lore", "Champion backstories", "Faction conflicts"], | |
}, | |
"๐ ๏ธ Digital Tools & Platforms": { | |
"Online Play": ["Remote gameplay", "Digital tournaments", "Community events"], | |
"Deck Building Tools": ["Card database access", "Deck testing", "Community sharing"], | |
"Strategy Guides": ["Meta analysis", "Deck guides", "Tournament reports"], | |
}, | |
"๐๏ธ Competitive Scene": { | |
"Tournaments": ["Local game stores", "Regional competitions", "World championships"], | |
"Ranking Systems": ["Elo ratings", "Ladder rankings", "Seasonal rewards"], | |
"Esports": ["Live-streamed events", "Professional teams", "Sponsorships"], | |
}, | |
} | |
# Ensure the directory for storing scores exists | |
score_dir = "scores" | |
os.makedirs(score_dir, exist_ok=True) | |
# Function to generate a unique key for each button, including an emoji | |
def generate_key(label, header, idx): | |
return f"{header}_{label}_{idx}_key" | |
# Function to increment and save score | |
def update_score(key, increment=1): | |
score_file = os.path.join(score_dir, f"{key}.json") | |
if os.path.exists(score_file): | |
with open(score_file, "r") as file: | |
score_data = json.load(file) | |
else: | |
score_data = {"clicks": 0, "score": 0} | |
score_data["clicks"] += 1 | |
score_data["score"] += increment | |
with open(score_file, "w") as file: | |
json.dump(score_data, file) | |
return score_data["score"] | |
# Function to load score | |
def load_score(key): | |
score_file = os.path.join(score_dir, f"{key}.json") | |
if os.path.exists(score_file): | |
with open(score_file, "r") as file: | |
score_data = json.load(file) | |
return score_data["score"] | |
return 0 | |
def search_glossary(query): | |
for category, terms in roleplaying_glossary.items(): | |
if query.lower() in (term.lower() for term in terms): | |
st.markdown(f"#### {category}") | |
st.write(f"- {query}") | |
st.write('## ' + query) | |
all="" | |
st.write('## ๐ Running with GPT.') # ------------------------------------------------------------------------------------------------- | |
response = chat_with_model(query) | |
#st.write(response) | |
filename = generate_filename(query + ' --- ' + response, "md") | |
create_file(filename, query, response, should_save) | |
st.write('## ๐ Running with Llama.') # ------------------------------------------------------------------------------------------------- | |
response2 = StreamLLMChatResponse(query) | |
#st.write(response2) | |
filename_txt = generate_filename(query + ' --- ' + response2, "md") | |
create_file(filename_txt, query, response2, should_save) | |
all = '# Query: ' + query + '# Response: ' + response + '# Response2: ' + response2 | |
filename_txt2 = generate_filename(query + ' --- ' + all, "md") | |
create_file(filename_txt2, query, all, should_save) | |
SpeechSynthesis(all) | |
return all | |
# Function to display the glossary in a structured format | |
def display_glossary(glossary, area): | |
if area in glossary: | |
st.subheader(f"๐ Glossary for {area}") | |
for game, terms in glossary[area].items(): | |
st.markdown(f"### {game}") | |
for idx, term in enumerate(terms, start=1): | |
st.write(f"{idx}. {term}") | |
# Function to display the entire glossary in a grid format with links | |
def display_glossary_grid(roleplaying_glossary): | |
search_urls = { | |
"๐": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}", | |
"๐": lambda k: f"https://www.google.com/search?q={quote(k)}", | |
"โถ๏ธ": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}", | |
"๐": lambda k: f"https://www.bing.com/search?q={quote(k)}", | |
"๐ฒ": lambda k: f"https://huggingface.co/spaces/awacke1/MixableCardGameAI?q={quote(k)}", # this url plus query! | |
} | |
for category, details in roleplaying_glossary.items(): | |
st.write(f"### {category}") | |
cols = st.columns(len(details)) # Create dynamic columns based on the number of games | |
for idx, (game, terms) in enumerate(details.items()): | |
with cols[idx]: | |
st.markdown(f"#### {game}") | |
for term in terms: | |
links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()]) | |
st.markdown(f"{term} {links_md}", unsafe_allow_html=True) | |
game_emojis = { | |
"Dungeons and Dragons": "๐", | |
"Call of Cthulhu": "๐", | |
"GURPS": "๐ฒ", | |
"Pathfinder": "๐บ๏ธ", | |
"Kindred of the East": "๐ ", | |
"Changeling": "๐", | |
} | |
topic_emojis = { | |
"Core Rulebooks": "๐", | |
"Maps & Settings": "๐บ๏ธ", | |
"Game Mechanics & Tools": "โ๏ธ", | |
"Monsters & Adversaries": "๐น", | |
"Campaigns & Adventures": "๐", | |
"Creatives & Assets": "๐จ", | |
"Game Master Resources": "๐ ๏ธ", | |
"Lore & Background": "๐", | |
"Character Development": "๐ง", | |
"Homebrew Content": "๐ง", | |
"General Topics": "๐", | |
} | |
# Adjusted display_buttons_with_scores function | |
def display_buttons_with_scores(): | |
for category, games in roleplaying_glossary.items(): | |
category_emoji = topic_emojis.get(category, "๐") # Default to search icon if no match | |
st.markdown(f"## {category_emoji} {category}") | |
for game, terms in games.items(): | |
game_emoji = game_emojis.get(game, "๐ฎ") # Default to generic game controller if no match | |
for term in terms: | |
key = f"{category}_{game}_{term}".replace(' ', '_').lower() | |
score = load_score(key) | |
if st.button(f"{game_emoji} {term} {score}", key=key): | |
update_score(key) | |
# Create a dynamic query incorporating emojis and formatting for clarity | |
query_prefix = f"{category_emoji} {game_emoji} **{game} - {category}:**" | |
# ----------------------------------------------------------------- | |
# query_body = f"Create a detailed outline for **{term}** with subpoints highlighting key aspects, using emojis for visual engagement. Include step-by-step rules and boldface important entities and ruleset elements." | |
query_body = f"Create a streamlit python app.py that produces a detailed markdown outline and CSV dataset user interface with an outline for **{term}** with subpoints highlighting key aspects, using emojis for visual engagement. Include step-by-step rules and boldface important entities and ruleset elements." | |
response = search_glossary(query_prefix + query_body, roleplaying_glossary) | |
def fetch_wikipedia_summary(keyword): | |
# Placeholder function for fetching Wikipedia summaries | |
# In a real app, you might use requests to fetch from the Wikipedia API | |
return f"Summary for {keyword}. For more information, visit Wikipedia." | |
def create_search_url_youtube(keyword): | |
base_url = "https://www.youtube.com/results?search_query=" | |
return base_url + keyword.replace(' ', '+') | |
def create_search_url_bing(keyword): | |
base_url = "https://www.bing.com/search?q=" | |
return base_url + keyword.replace(' ', '+') | |
def create_search_url_wikipedia(keyword): | |
base_url = "https://www.wikipedia.org/search-redirect.php?family=wikipedia&language=en&search=" | |
return base_url + keyword.replace(' ', '+') | |
def create_search_url_google(keyword): | |
base_url = "https://www.google.com/search?q=" | |
return base_url + keyword.replace(' ', '+') | |
def display_images_and_wikipedia_summaries(): | |
st.title('Gallery with Related Stories') | |
image_files = [f for f in os.listdir('.') if f.endswith('.png')] | |
if not image_files: | |
st.write("No PNG images found in the current directory.") | |
return | |
for image_file in image_files: | |
image = Image.open(image_file) | |
st.image(image, caption=image_file, use_column_width=True) | |
keyword = image_file.split('.')[0] # Assumes keyword is the file name without extension | |
# Display Wikipedia and Google search links | |
wikipedia_url = create_search_url_wikipedia(keyword) | |
google_url = create_search_url_google(keyword) | |
youtube_url = create_search_url_youtube(keyword) | |
bing_url = create_search_url_bing(keyword) | |
links_md = f""" | |
[Wikipedia]({wikipedia_url}) | | |
[Google]({google_url}) | | |
[YouTube]({youtube_url}) | | |
[Bing]({bing_url}) | |
""" | |
st.markdown(links_md) | |
def get_all_query_params(key): | |
return st.query_params().get(key, []) | |
def clear_query_params(): | |
st.query_params() | |
# Function to display content or image based on a query | |
def display_content_or_image(query): | |
# Check if the query matches any glossary term | |
for category, terms in transhuman_glossary.items(): | |
for term in terms: | |
if query.lower() in term.lower(): | |
st.subheader(f"Found in {category}:") | |
st.write(term) | |
return True # Return after finding and displaying the first match | |
# Check for an image match in a predefined directory (adjust path as needed) | |
image_dir = "images" # Example directory where images are stored | |
image_path = f"{image_dir}/{query}.png" # Construct image path with query | |
if os.path.exists(image_path): | |
st.image(image_path, caption=f"Image for {query}") | |
return True | |
# If no content or image is found | |
st.warning("No matching content or image found.") | |
return False | |
# 1. Constants and Top Level UI Variables | |
# My Inference API Copy | |
API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama | |
# Meta's Original - Chat HF Free Version: | |
#API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf" | |
API_KEY = os.getenv('API_KEY') | |
MODEL1="meta-llama/Llama-2-7b-chat-hf" | |
MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf" | |
HF_KEY = os.getenv('HF_KEY') | |
headers = { | |
"Authorization": f"Bearer {HF_KEY}", | |
"Content-Type": "application/json" | |
} | |
key = os.getenv('OPENAI_API_KEY') | |
prompt = f"Write instructions to teach discharge planning along with guidelines and patient education. List entities, features and relationships to CCDA and FHIR objects in boldface." | |
should_save = st.sidebar.checkbox("๐พ Save", value=True, help="Save your session data.") | |
def SpeechSynthesis(result): | |
documentHTML5=''' | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Read It Aloud</title> | |
<script type="text/javascript"> | |
function readAloud() { | |
const text = document.getElementById("textArea").value; | |
const speech = new SpeechSynthesisUtterance(text); | |
window.speechSynthesis.speak(speech); | |
} | |
</script> | |
</head> | |
<body> | |
<h1>๐ Read It Aloud</h1> | |
<textarea id="textArea" rows="10" cols="80"> | |
''' | |
documentHTML5 = documentHTML5 + result | |
documentHTML5 = documentHTML5 + ''' | |
</textarea> | |
<br> | |
<button onclick="readAloud()">๐ Read Aloud</button> | |
</body> | |
</html> | |
''' | |
components.html(documentHTML5, width=1280, height=300) | |
#return result | |
# 3. Stream Llama Response | |
# @st.cache_resource | |
def StreamLLMChatResponse(prompt): | |
try: | |
endpoint_url = API_URL | |
hf_token = API_KEY | |
st.write('Running client ' + endpoint_url) | |
client = InferenceClient(endpoint_url, token=hf_token) | |
gen_kwargs = dict( | |
max_new_tokens=512, | |
top_k=30, | |
top_p=0.9, | |
temperature=0.2, | |
repetition_penalty=1.02, | |
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"], | |
) | |
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs) | |
report=[] | |
res_box = st.empty() | |
collected_chunks=[] | |
collected_messages=[] | |
allresults='' | |
for r in stream: | |
if r.token.special: | |
continue | |
if r.token.text in gen_kwargs["stop_sequences"]: | |
break | |
collected_chunks.append(r.token.text) | |
chunk_message = r.token.text | |
collected_messages.append(chunk_message) | |
try: | |
report.append(r.token.text) | |
if len(r.token.text) > 0: | |
result="".join(report).strip() | |
res_box.markdown(f'*{result}*') | |
except: | |
st.write('Stream llm issue') | |
SpeechSynthesis(result) | |
return result | |
except: | |
st.write('Llama model is asleep. Starting up now on A10 - please give 5 minutes then retry as KEDA scales up from zero to activate running container(s).') | |
# 4. Run query with payload | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
st.markdown(response.json()) | |
return response.json() | |
def get_output(prompt): | |
return query({"inputs": prompt}) | |
# 5. Auto name generated output files from time and content | |
def generate_filename(prompt, file_type): | |
central = pytz.timezone('US/Central') | |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") | |
replaced_prompt = prompt.replace(" ", "_").replace("\n", "_") | |
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:255] # 255 is linux max, 260 is windows max | |
#safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:45] | |
return f"{safe_date_time}_{safe_prompt}.{file_type}" | |
# 6. Speech transcription via OpenAI service | |
def transcribe_audio(openai_key, file_path, model): | |
openai.api_key = openai_key | |
OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions" | |
headers = { | |
"Authorization": f"Bearer {openai_key}", | |
} | |
with open(file_path, 'rb') as f: | |
data = {'file': f} | |
st.write('STT transcript ' + OPENAI_API_URL) | |
response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model}) | |
if response.status_code == 200: | |
st.write(response.json()) | |
chatResponse = chat_with_model(response.json().get('text'), '') # ************************************* | |
transcript = response.json().get('text') | |
filename = generate_filename(transcript, 'txt') | |
response = chatResponse | |
user_prompt = transcript | |
create_file(filename, user_prompt, response, should_save) | |
return transcript | |
else: | |
st.write(response.json()) | |
st.error("Error in API call.") | |
return None | |
# 7. Auto stop on silence audio control for recording WAV files | |
def save_and_play_audio(audio_recorder): | |
audio_bytes = audio_recorder(key='audio_recorder') | |
if audio_bytes: | |
filename = generate_filename("Recording", "wav") | |
with open(filename, 'wb') as f: | |
f.write(audio_bytes) | |
st.audio(audio_bytes, format="audio/wav") | |
return filename | |
return None | |
# 8. File creator that interprets type and creates output file for text, markdown and code | |
def create_file(filename, prompt, response, should_save=True): | |
if not should_save: | |
return | |
base_filename, ext = os.path.splitext(filename) | |
if ext in ['.txt', '.htm', '.md']: | |
with open(f"{base_filename}.md", 'w') as file: | |
try: | |
content = prompt.strip() + '\r\n' + response | |
file.write(content) | |
except: | |
st.write('.') | |
#has_python_code = re.search(r"```python([\s\S]*?)```", prompt.strip() + '\r\n' + response) | |
#has_python_code = bool(re.search(r"```python([\s\S]*?)```", prompt.strip() + '\r\n' + response)) | |
#if has_python_code: | |
# python_code = re.findall(r"```python([\s\S]*?)```", response)[0].strip() | |
# with open(f"{base_filename}-Code.py", 'w') as file: | |
# file.write(python_code) | |
# with open(f"{base_filename}.md", 'w') as file: | |
# content = prompt.strip() + '\r\n' + response | |
# file.write(content) | |
def truncate_document(document, length): | |
return document[:length] | |
def divide_document(document, max_length): | |
return [document[i:i+max_length] for i in range(0, len(document), max_length)] | |
# 9. Sidebar with UI controls to review and re-run prompts and continue responses | |
def get_table_download_link(file_path): | |
with open(file_path, 'r') as file: | |
data = file.read() | |
b64 = base64.b64encode(data.encode()).decode() | |
file_name = os.path.basename(file_path) | |
ext = os.path.splitext(file_name)[1] # get the file extension | |
if ext == '.txt': | |
mime_type = 'text/plain' | |
elif ext == '.py': | |
mime_type = 'text/plain' | |
elif ext == '.xlsx': | |
mime_type = 'text/plain' | |
elif ext == '.csv': | |
mime_type = 'text/plain' | |
elif ext == '.htm': | |
mime_type = 'text/html' | |
elif ext == '.md': | |
mime_type = 'text/markdown' | |
elif ext == '.wav': | |
mime_type = 'audio/wav' | |
else: | |
mime_type = 'application/octet-stream' # general binary data type | |
href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>' | |
return href | |
def CompressXML(xml_text): | |
root = ET.fromstring(xml_text) | |
for elem in list(root.iter()): | |
if isinstance(elem.tag, str) and 'Comment' in elem.tag: | |
elem.parent.remove(elem) | |
return ET.tostring(root, encoding='unicode', method="xml") | |
# 10. Read in and provide UI for past files | |
def read_file_content(file,max_length): | |
if file.type == "application/json": | |
content = json.load(file) | |
return str(content) | |
elif file.type == "text/html" or file.type == "text/htm": | |
content = BeautifulSoup(file, "html.parser") | |
return content.text | |
elif file.type == "application/xml" or file.type == "text/xml": | |
tree = ET.parse(file) | |
root = tree.getroot() | |
xml = CompressXML(ET.tostring(root, encoding='unicode')) | |
return xml | |
elif file.type == "text/markdown" or file.type == "text/md": | |
md = mistune.create_markdown() | |
content = md(file.read().decode()) | |
return content | |
elif file.type == "text/plain": | |
return file.getvalue().decode() | |
else: | |
return "" | |
# 11. Chat with GPT - Caution on quota | |
def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'): | |
model = model_choice | |
conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}] | |
conversation.append({'role': 'user', 'content': prompt}) | |
if len(document_section)>0: | |
conversation.append({'role': 'assistant', 'content': document_section}) | |
start_time = time.time() | |
report = [] | |
res_box = st.empty() | |
collected_chunks = [] | |
collected_messages = [] | |
st.write('LLM stream ' + 'gpt-3.5-turbo') | |
for chunk in openai.ChatCompletion.create(model='gpt-3.5-turbo', messages=conversation, temperature=0.5, stream=True): | |
collected_chunks.append(chunk) | |
chunk_message = chunk['choices'][0]['delta'] | |
collected_messages.append(chunk_message) | |
content=chunk["choices"][0].get("delta",{}).get("content") | |
try: | |
report.append(content) | |
if len(content) > 0: | |
result = "".join(report).strip() | |
res_box.markdown(f'*{result}*') | |
except: | |
st.write(' ') | |
full_reply_content = ''.join([m.get('content', '') for m in collected_messages]) | |
st.write("Elapsed time:") | |
st.write(time.time() - start_time) | |
return full_reply_content | |
def extract_mime_type(file): | |
if isinstance(file, str): | |
pattern = r"type='(.*?)'" | |
match = re.search(pattern, file) | |
if match: | |
return match.group(1) | |
else: | |
raise ValueError(f"Unable to extract MIME type from {file}") | |
elif isinstance(file, streamlit.UploadedFile): | |
return file.type | |
else: | |
raise TypeError("Input should be a string or a streamlit.UploadedFile object") | |
def extract_file_extension(file): | |
# get the file name directly from the UploadedFile object | |
file_name = file.name | |
pattern = r".*?\.(.*?)$" | |
match = re.search(pattern, file_name) | |
if match: | |
return match.group(1) | |
else: | |
raise ValueError(f"Unable to extract file extension from {file_name}") | |
# Normalize input as text from PDF and other formats | |
def pdf2txt(docs): | |
text = "" | |
for file in docs: | |
file_extension = extract_file_extension(file) | |
st.write(f"File type extension: {file_extension}") | |
if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']: | |
text += file.getvalue().decode('utf-8') | |
elif file_extension.lower() == 'pdf': | |
from PyPDF2 import PdfReader | |
pdf = PdfReader(BytesIO(file.getvalue())) | |
for page in range(len(pdf.pages)): | |
text += pdf.pages[page].extract_text() # new PyPDF2 syntax | |
return text | |
def txt2chunks(text): | |
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len) | |
return text_splitter.split_text(text) | |
# Vector Store using FAISS | |
def vector_store(text_chunks): | |
embeddings = OpenAIEmbeddings(openai_api_key=key) | |
return FAISS.from_texts(texts=text_chunks, embedding=embeddings) | |
# Memory and Retrieval chains | |
def get_chain(vectorstore): | |
llm = ChatOpenAI() | |
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) | |
return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory) | |
def process_user_input(user_question): | |
response = st.session_state.conversation({'question': user_question}) | |
st.session_state.chat_history = response['chat_history'] | |
for i, message in enumerate(st.session_state.chat_history): | |
template = user_template if i % 2 == 0 else bot_template | |
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True) | |
filename = generate_filename(user_question, 'txt') | |
response = message.content | |
user_prompt = user_question | |
create_file(filename, user_prompt, response, should_save) | |
def divide_prompt(prompt, max_length): | |
words = prompt.split() | |
chunks = [] | |
current_chunk = [] | |
current_length = 0 | |
for word in words: | |
if len(word) + current_length <= max_length: | |
current_length += len(word) + 1 | |
current_chunk.append(word) | |
else: | |
chunks.append(' '.join(current_chunk)) | |
current_chunk = [word] | |
current_length = len(word) | |
chunks.append(' '.join(current_chunk)) | |
return chunks | |
# 13. Provide way of saving all and deleting all to give way of reviewing output and saving locally before clearing it | |
def create_zip_of_files(files): | |
zip_name = "all_files.zip" | |
with zipfile.ZipFile(zip_name, 'w') as zipf: | |
for file in files: | |
zipf.write(file) | |
return zip_name | |
def get_zip_download_link(zip_file): | |
with open(zip_file, 'rb') as f: | |
data = f.read() | |
b64 = base64.b64encode(data).decode() | |
href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>' | |
return href | |
# 14. Inference Endpoints for Whisper (best fastest STT) on NVIDIA T4 and Llama (best fastest AGI LLM) on NVIDIA A10 | |
# My Inference Endpoint | |
API_URL_IE = f'https://tonpixzfvq3791u9.us-east-1.aws.endpoints.huggingface.cloud' | |
# Original | |
API_URL_IE = "https://api-inference.huggingface.co/models/openai/whisper-small.en" | |
MODEL2 = "openai/whisper-small.en" | |
MODEL2_URL = "https://huggingface.co/openai/whisper-small.en" | |
#headers = { | |
# "Authorization": "Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", | |
# "Content-Type": "audio/wav" | |
#} | |
# HF_KEY = os.getenv('HF_KEY') | |
HF_KEY = st.secrets['HF_KEY'] | |
headers = { | |
"Authorization": f"Bearer {HF_KEY}", | |
"Content-Type": "audio/wav" | |
} | |
#@st.cache_resource | |
def query(filename): | |
with open(filename, "rb") as f: | |
data = f.read() | |
response = requests.post(API_URL_IE, headers=headers, data=data) | |
return response.json() | |
def generate_filename(prompt, file_type): | |
central = pytz.timezone('US/Central') | |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") | |
replaced_prompt = prompt.replace(" ", "_").replace("\n", "_") | |
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90] | |
return f"{safe_date_time}_{safe_prompt}.{file_type}" | |
# 15. Audio recorder to Wav file | |
def save_and_play_audio(audio_recorder): | |
audio_bytes = audio_recorder() | |
if audio_bytes: | |
filename = generate_filename("Recording", "wav") | |
with open(filename, 'wb') as f: | |
f.write(audio_bytes) | |
st.audio(audio_bytes, format="audio/wav") | |
return filename | |
# 16. Speech transcription to file output | |
def transcribe_audio(filename): | |
output = query(filename) | |
return output | |
def whisper_main(): | |
#st.title("Speech to Text") | |
#st.write("Record your speech and get the text.") | |
# Audio, transcribe, GPT: | |
filename = save_and_play_audio(audio_recorder) | |
if filename is not None: | |
transcription = transcribe_audio(filename) | |
try: | |
transcript = transcription['text'] | |
st.write(transcript) | |
except: | |
transcript='' | |
st.write(transcript) | |
# Whisper to GPT: New!! --------------------------------------------------------------------- | |
st.write('Reasoning with your inputs with GPT..') | |
response = chat_with_model(transcript) | |
st.write('Response:') | |
st.write(response) | |
filename = generate_filename(response, "txt") | |
create_file(filename, transcript, response, should_save) | |
# Whisper to GPT: New!! --------------------------------------------------------------------- | |
# Whisper to Llama: | |
response = StreamLLMChatResponse(transcript) | |
filename_txt = generate_filename(transcript, "md") | |
create_file(filename_txt, transcript, response, should_save) | |
filename_wav = filename_txt.replace('.txt', '.wav') | |
import shutil | |
try: | |
if os.path.exists(filename): | |
shutil.copyfile(filename, filename_wav) | |
except: | |
st.write('.') | |
if os.path.exists(filename): | |
os.remove(filename) | |
#st.experimental_rerun() | |
#except: | |
# st.write('Starting Whisper Model on GPU. Please retry in 30 seconds.') | |
# Sample function to demonstrate a response, replace with your own logic | |
def StreamMedChatResponse(topic): | |
st.write(f"Showing resources or questions related to: {topic}") | |
# 17. Main | |
def main(): | |
prompt = f"Write ten funny jokes that are tweet length stories that make you laugh. Show as markdown outline with emojis for each." | |
# Add Wit and Humor buttons | |
# add_witty_humor_buttons() | |
# add_medical_exam_buttons() | |
with st.expander("Prompts ๐", expanded=False): | |
example_input = st.text_input("Enter your prompt text for Llama:", value=prompt, help="Enter text to get a response from DromeLlama.") | |
if st.button("Run Prompt With Llama model", help="Click to run the prompt."): | |
try: | |
response=StreamLLMChatResponse(example_input) | |
create_file(filename, example_input, response, should_save) | |
except: | |
st.write('Llama model is asleep. Starting now on A10 GPU. Please wait one minute then retry. KEDA triggered.') | |
openai.api_key = os.getenv('OPENAI_API_KEY') | |
if openai.api_key == None: openai.api_key = st.secrets['OPENAI_API_KEY'] | |
menu = ["txt", "htm", "xlsx", "csv", "md", "py"] | |
choice = st.sidebar.selectbox("Output File Type:", menu) | |
model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301')) | |
user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100) | |
collength, colupload = st.columns([2,3]) # adjust the ratio as needed | |
with collength: | |
max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000) | |
with colupload: | |
uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"]) | |
document_sections = deque() | |
document_responses = {} | |
if uploaded_file is not None: | |
file_content = read_file_content(uploaded_file, max_length) | |
document_sections.extend(divide_document(file_content, max_length)) | |
if len(document_sections) > 0: | |
if st.button("๐๏ธ View Upload"): | |
st.markdown("**Sections of the uploaded file:**") | |
for i, section in enumerate(list(document_sections)): | |
st.markdown(f"**Section {i+1}**\n{section}") | |
st.markdown("**Chat with the model:**") | |
for i, section in enumerate(list(document_sections)): | |
if i in document_responses: | |
st.markdown(f"**Section {i+1}**\n{document_responses[i]}") | |
else: | |
if st.button(f"Chat about Section {i+1}"): | |
st.write('Reasoning with your inputs...') | |
#response = chat_with_model(user_prompt, section, model_choice) | |
st.write('Response:') | |
st.write(response) | |
document_responses[i] = response | |
filename = generate_filename(f"{user_prompt}_section_{i+1}", choice) | |
create_file(filename, user_prompt, response, should_save) | |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True) | |
if st.button('๐ฌ Chat'): | |
st.write('Reasoning with your inputs...') | |
user_prompt_sections = divide_prompt(user_prompt, max_length) | |
full_response = '' | |
for prompt_section in user_prompt_sections: | |
response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice) | |
full_response += response + '\n' # Combine the responses | |
response = full_response | |
st.write('Response:') | |
st.write(response) | |
filename = generate_filename(user_prompt, choice) | |
create_file(filename, user_prompt, response, should_save) | |
# Compose a file sidebar of markdown md files: | |
all_files = glob.glob("*.md") | |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names | |
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order | |
if st.sidebar.button("๐ Delete All Text"): | |
for file in all_files: | |
os.remove(file) | |
st.experimental_rerun() | |
if st.sidebar.button("โฌ๏ธ Download All"): | |
zip_file = create_zip_of_files(all_files) | |
st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True) | |
file_contents='' | |
next_action='' | |
for file in all_files: | |
col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed | |
with col1: | |
if st.button("๐", key="md_"+file): # md emoji button | |
with open(file, 'r') as f: | |
file_contents = f.read() | |
next_action='md' | |
with col2: | |
st.markdown(get_table_download_link(file), unsafe_allow_html=True) | |
with col3: | |
if st.button("๐", key="open_"+file): # open emoji button | |
with open(file, 'r') as f: | |
file_contents = f.read() | |
next_action='open' | |
with col4: | |
if st.button("๐", key="read_"+file): # search emoji button | |
with open(file, 'r') as f: | |
file_contents = f.read() | |
next_action='search' | |
with col5: | |
if st.button("๐", key="delete_"+file): | |
os.remove(file) | |
st.experimental_rerun() | |
if len(file_contents) > 0: | |
if next_action=='open': | |
file_content_area = st.text_area("File Contents:", file_contents, height=500) | |
if next_action=='md': | |
st.markdown(file_contents) | |
buttonlabel = '๐Run with Llama and GPT.' | |
if st.button(key='RunWithLlamaandGPT', label = buttonlabel): | |
user_prompt = file_contents | |
# Llama versus GPT Battle! | |
all="" | |
try: | |
st.write('๐Running with Llama.') | |
response = StreamLLMChatResponse(file_contents) | |
filename = generate_filename(user_prompt, "md") | |
create_file(filename, file_contents, response, should_save) | |
all=response | |
#SpeechSynthesis(response) | |
except: | |
st.markdown('Llama is sleeping. Restart ETA 30 seconds.') | |
# gpt | |
try: | |
st.write('๐Running with GPT.') | |
response2 = chat_with_model(user_prompt, file_contents, model_choice) | |
filename2 = generate_filename(file_contents, choice) | |
create_file(filename2, user_prompt, response, should_save) | |
all=all+response2 | |
#SpeechSynthesis(response2) | |
except: | |
st.markdown('GPT is sleeping. Restart ETA 30 seconds.') | |
SpeechSynthesis(all) | |
if next_action=='search': | |
file_content_area = st.text_area("File Contents:", file_contents, height=500) | |
st.write('๐Running with Llama and GPT.') | |
user_prompt = file_contents | |
# Llama versus GPT Battle! | |
all="" | |
try: | |
st.write('๐Running with Llama.') | |
response = StreamLLMChatResponse(file_contents) | |
filename = generate_filename(user_prompt, ".md") | |
create_file(filename, file_contents, response, should_save) | |
all=response | |
#SpeechSynthesis(response) | |
except: | |
st.markdown('Llama is sleeping. Restart ETA 30 seconds.') | |
# gpt | |
try: | |
st.write('๐Running with GPT.') | |
response2 = chat_with_model(user_prompt, file_contents, model_choice) | |
filename2 = generate_filename(file_contents, choice) | |
create_file(filename2, user_prompt, response, should_save) | |
all=all+response2 | |
#SpeechSynthesis(response2) | |
except: | |
st.markdown('GPT is sleeping. Restart ETA 30 seconds.') | |
SpeechSynthesis(all) | |
# Function to encode file to base64 | |
def get_base64_encoded_file(file_path): | |
with open(file_path, "rb") as file: | |
return base64.b64encode(file.read()).decode() | |
# Function to create a download link | |
def get_audio_download_link(file_path): | |
base64_file = get_base64_encoded_file(file_path) | |
return f'<a href="data:file/wav;base64,{base64_file}" download="{os.path.basename(file_path)}">โฌ๏ธ Download Audio</a>' | |
# Compose a file sidebar of past encounters | |
all_files = glob.glob("*.wav") | |
all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names | |
all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order | |
filekey = 'delall' | |
if st.sidebar.button("๐ Delete All Audio", key=filekey): | |
for file in all_files: | |
os.remove(file) | |
st.experimental_rerun() | |
for file in all_files: | |
col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed | |
with col1: | |
st.markdown(file) | |
if st.button("๐ต", key="play_" + file): # play emoji button | |
audio_file = open(file, 'rb') | |
audio_bytes = audio_file.read() | |
st.audio(audio_bytes, format='audio/wav') | |
#st.markdown(get_audio_download_link(file), unsafe_allow_html=True) | |
#st.text_input(label="", value=file) | |
with col2: | |
if st.button("๐", key="delete_" + file): | |
os.remove(file) | |
st.experimental_rerun() | |
# Feedback | |
# Step: Give User a Way to Upvote or Downvote | |
GiveFeedback=False | |
if GiveFeedback: | |
with st.expander("Give your feedback ๐", expanded=False): | |
feedback = st.radio("Step 8: Give your feedback", ("๐ Upvote", "๐ Downvote")) | |
if feedback == "๐ Upvote": | |
st.write("You upvoted ๐. Thank you for your feedback!") | |
else: | |
st.write("You downvoted ๐. Thank you for your feedback!") | |
load_dotenv() | |
st.write(css, unsafe_allow_html=True) | |
st.header("Chat with documents :books:") | |
user_question = st.text_input("Ask a question about your documents:") | |
if user_question: | |
process_user_input(user_question) | |
with st.sidebar: | |
st.subheader("Your documents") | |
docs = st.file_uploader("import documents", accept_multiple_files=True) | |
with st.spinner("Processing"): | |
raw = pdf2txt(docs) | |
if len(raw) > 0: | |
length = str(len(raw)) | |
text_chunks = txt2chunks(raw) | |
vectorstore = vector_store(text_chunks) | |
st.session_state.conversation = get_chain(vectorstore) | |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing | |
filename = generate_filename(raw, 'txt') | |
create_file(filename, raw, '', should_save) | |
# Relocated! Hope you like your new space - enjoy! | |
# Display instructions and handle query parameters | |
#st.markdown("## Glossary Lookup\nEnter a term in the URL query, like `?q=Nanotechnology` or `?query=Martian Syndicate`.") | |
st.markdown(''' | |
### Mixable AI ๐๐๐ | |
''') | |
try: | |
query_params = st.query_params | |
#query = (query_params.get('q') or query_params.get('query') or [''])[0] | |
query = (query_params.get('q') or query_params.get('query') or ['']) | |
st.markdown('# Running query: ' + query) | |
if query: search_glossary(query) | |
except: | |
st.markdown('No glossary lookup') | |
# Display the glossary grid | |
st.title("Card Games Glossary ๐ฒ") | |
display_glossary_grid(roleplaying_glossary) | |
st.title("๐ฒ๐บ๏ธ Card Game Universe") | |
st.markdown("## Explore the vast universes of Dungeons and Dragons, Call of Cthulhu, GURPS, and more through interactive storytelling and encyclopedic knowledge.๐ ") | |
display_buttons_with_scores() | |
display_images_and_wikipedia_summaries() | |
# Assuming the transhuman_glossary and other setup code remains the same | |
#st.write("Current Query Parameters:", st.query_params) | |
#st.markdown("### Query Parameters - These Deep Link Map to Remixable Methods, Navigate or Trigger Functionalities") | |
# Example: Using query parameters to navigate or trigger functionalities | |
if 'action' in st.query_params: | |
action = st.query_params()['action'][0] # Get the first (or only) 'action' parameter | |
if action == 'show_message': | |
st.success("Showing a message because 'action=show_message' was found in the URL.") | |
elif action == 'clear': | |
clear_query_params() | |
st.experimental_rerun() | |
# Handling repeated keys | |
if 'multi' in st.query_params: | |
multi_values = get_all_query_params('multi') | |
st.write("Values for 'multi':", multi_values) | |
# Manual entry for demonstration | |
st.write("Enter query parameters in the URL like this: ?action=show_message&multi=1&multi=2") | |
if 'query' in st.query_params: | |
query = st.query_params['query'][0] # Get the query parameter | |
# Display content or image based on the query | |
display_content_or_image(query) | |
# Add a clear query parameters button for convenience | |
if st.button("Clear Query Parameters", key='ClearQueryParams'): | |
# This will clear the browser URL's query parameters | |
st.experimental_set_query_params | |
st.experimental_rerun() | |
# 18. Run AI Pipeline | |
if __name__ == "__main__": | |
whisper_main() | |
main() |