Spaces:
Running
Running
# generate_transcript.py | |
import pickle | |
from tqdm import tqdm | |
import warnings | |
from groq import Groq | |
import os | |
import re | |
warnings.filterwarnings('ignore') | |
class TranscriptProcessor: | |
""" | |
A class to generate and rewrite podcast-style transcripts using a specified language model. | |
""" | |
def __init__(self, text_file_path,transcript_output_path,tts_output_path, model_name="llama3-70b-8192"): | |
""" | |
Initialize with the path to the cleaned text file and the model name. | |
Args: | |
text_file_path (str): Path to the file containing cleaned PDF text. | |
model_name (str): Name of the language model to use. | |
""" | |
self.text_file_path = text_file_path | |
self.transcript_output_path = transcript_output_path | |
self.tts_output_path = tts_output_path | |
self.model_name = model_name | |
self.transcript_prompt = """ | |
You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. | |
We are in an alternate universe where actually you have been writing every line they say and they just stream it into their brains. | |
You have won multiple podcast awards for your writing. | |
Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. | |
Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc | |
Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes | |
Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions | |
Make sure the tangents speaker 2 provides are quite wild or interesting. | |
It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait | |
ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: | |
DO NOT GIVE EPISODE TITLES SEPERATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH | |
DO NOT GIVE CHAPTER TITLES | |
IT SHOULD STRICTLY BE THE DIALOGUES | |
""" | |
self.rewrite_prompt = """ | |
You are an international oscar winnning screenwriter | |
You have been working with multiple award winning podcasters. | |
Your job is to use the podcast transcript written below to re-write it for an AI Text-To-Speech Pipeline. A very dumb AI had written this so you have to step up for your kind. | |
Make it as engaging as possible, Speaker 1 and 2 will be simulated by different voice engines | |
Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc | |
Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes | |
Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions | |
Make sure the tangents speaker 2 provides are quite wild or interesting. | |
REMEMBER THIS WITH YOUR HEART | |
It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait | |
Please re-write to make it as characteristic as possible | |
START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: | |
STRICTLY RETURN YOUR RESPONSE AS A LIST OF TUPLES OK? | |
IT WILL START DIRECTLY WITH THE LIST AND END WITH THE LIST NOTHING ELSE | |
Example of response: | |
[ | |
("Speaker 1", "Welcome to our podcast, where we explore the latest advancements in AI and technology. I'm your host, and today we're joined by a renowned expert in the field of AI. We're going to dive into the exciting world of Llama 3.2, the latest release from Meta AI."), | |
("Speaker 2", "Hi, I'm excited to be here! So, what is Llama 3.2?"), | |
("Speaker 1", "Ah, great question! Llama 3.2 is an open-source AI model that allows developers to fine-tune, distill, and deploy AI models anywhere. It's a significant update from the previous version, with improved performance, efficiency, and customization options."), | |
("Speaker 2", "That sounds amazing! What are some of the key features of Llama 3.2?") | |
] | |
""" | |
def load_text(self): | |
""" | |
Reads the cleaned text file and returns its content. | |
Returns: | |
str: Content of the cleaned text file. | |
""" | |
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1'] | |
for encoding in encodings: | |
try: | |
with open(self.text_file_path, 'r', encoding=encoding) as file: | |
content = file.read() | |
print(f"Successfully read file using {encoding} encoding.") | |
return content | |
except (UnicodeDecodeError, FileNotFoundError): | |
continue | |
print(f"Error: Could not decode file '{self.text_file_path}' with any common encoding.") | |
return None | |
def generate_transcript(self): | |
""" | |
Generates a podcast-style transcript and saves it as a pickled file. | |
Returns: | |
str: Path to the file where the transcript is saved. | |
""" | |
input_text = self.load_text() | |
if input_text is None: | |
return None | |
messages = [ | |
{"role": "system", "content": self.transcript_prompt}, | |
{"role": "user", "content": input_text} | |
] | |
client = Groq( | |
api_key=os.environ.get("GROQ_API_KEY"), | |
) | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model=self.model_name, | |
) | |
transcript = chat_completion.choices[0].message.content | |
# Save the transcript as a pickle file | |
with open(self.transcript_output_path, 'wb') as f: | |
pickle.dump(transcript, f) | |
return self.transcript_output_path | |
def extract_tuple(self,text): | |
match = re.search(r'\[.*\]', text, re.DOTALL) | |
if match: | |
return match.group(0) | |
return None | |
def rewrite_transcript(self): | |
""" | |
Refines the transcript for TTS, adding expressive elements and saving as a list of tuples. | |
Returns: | |
str: Path to the file where the TTS-ready transcript is saved. | |
""" | |
# Load the initial generated transcript | |
with open(self.transcript_output_path, 'rb') as file: | |
input_transcript = pickle.load(file) | |
messages = [ | |
{"role": "system", "content": self.rewrite_prompt}, | |
{"role": "user", "content": input_transcript} | |
] | |
client = Groq( | |
api_key=os.environ.get("GROQ_API_KEY"), | |
) | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model=self.model_name, | |
) | |
rewritten_transcript = self.extract_tuple(chat_completion.choices[0].message.content) | |
# Save the rewritten transcript as a pickle file | |
with open(self.tts_output_path, 'wb') as f: | |
pickle.dump(rewritten_transcript, f) | |
return self.tts_output_path | |