Spaces:
Runtime error
Runtime error
Eric Botti
commited on
Commit
·
8aa24e3
1
Parent(s):
ea15890
timestamps and markdown formatting
Browse files
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
venv
|
2 |
transcript.txt
|
3 |
notes.txt
|
|
|
4 |
config.ini
|
|
|
1 |
venv
|
2 |
transcript.txt
|
3 |
notes.txt
|
4 |
+
notes.md
|
5 |
config.ini
|
README.md
CHANGED
@@ -1,8 +1,12 @@
|
|
1 |
-
# Transcript
|
2 |
|
3 |
## Description
|
4 |
|
5 |
-
A python script designed to create relevant notes from a transcript of a meeting.
|
|
|
|
|
|
|
|
|
6 |
|
7 |
You will need an OpenAI API key to use this project.
|
8 |
|
|
|
1 |
+
# Google Meet Transcript AI Notes
|
2 |
|
3 |
## Description
|
4 |
|
5 |
+
A python script designed to create relevant notes from a transcript of a Google Meet meeting. Currently, with the proper,
|
6 |
+
options configured, Google Meet will automatically create an AI transcript of your meetings which is saved to Google Drive.
|
7 |
+
Often it is more useful to see just the notes from a meeting rather than the full transcript. This script uses OpenAI
|
8 |
+
prompts to create a detailed summary of the meeting from the transcript, as if it was taking notes in real time during
|
9 |
+
the meeting.
|
10 |
|
11 |
You will need an OpenAI API key to use this project.
|
12 |
|
main.py
CHANGED
@@ -2,10 +2,10 @@
|
|
2 |
import configparser
|
3 |
import os
|
4 |
import time
|
|
|
5 |
# 3rd party
|
6 |
from langchain.llms import OpenAI
|
7 |
from langchain import LLMChain
|
8 |
-
from langchain.document_loaders import UnstructuredFileLoader
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
from langchain import PromptTemplate
|
11 |
|
@@ -13,19 +13,18 @@ from langchain import PromptTemplate
|
|
13 |
config = configparser.ConfigParser()
|
14 |
config.read('config.ini')
|
15 |
|
16 |
-
def summarize_chunks(chunks):
|
17 |
-
number_of_chunks = len(chunks)
|
18 |
-
print(f"Summarizing: {number_of_chunks} chunks")
|
19 |
-
chunk_summaries = []
|
20 |
-
start_time = time.time()
|
21 |
-
for i, chunk in enumerate(chunks, 1):
|
22 |
-
chunk_summaries.append(chain.run(chunk))
|
23 |
-
# info
|
24 |
-
elapsed_time = time.time() - start_time
|
25 |
-
minutes = elapsed_time // 60
|
26 |
-
print(f"Completed Summary {i}/{number_of_chunks}, {minutes:.0f} minutes {elapsed_time - 60 * minutes:.2f} seconds elapsed")
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
if __name__ == '__main__':
|
@@ -37,13 +36,16 @@ if __name__ == '__main__':
|
|
37 |
|
38 |
llm = OpenAI(temperature=0)
|
39 |
|
40 |
-
|
41 |
-
transcript = loader.load()
|
42 |
|
43 |
-
#
|
44 |
-
|
45 |
-
|
46 |
|
|
|
|
|
|
|
|
|
47 |
prompt = PromptTemplate(
|
48 |
template="Write a concise summary of the following: {transcript}",
|
49 |
input_variables=['transcript']
|
@@ -55,9 +57,39 @@ if __name__ == '__main__':
|
|
55 |
verbose=False
|
56 |
)
|
57 |
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
f.write(meeting_notes)
|
|
|
2 |
import configparser
|
3 |
import os
|
4 |
import time
|
5 |
+
import re
|
6 |
# 3rd party
|
7 |
from langchain.llms import OpenAI
|
8 |
from langchain import LLMChain
|
|
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
from langchain import PromptTemplate
|
11 |
|
|
|
13 |
config = configparser.ConfigParser()
|
14 |
config.read('config.ini')
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
def load_transcript(path: str):
|
18 |
+
# Google Meet Transcripts have a header which we don't want to be summarized
|
19 |
+
header_lines = 5
|
20 |
+
|
21 |
+
with open(path, 'r') as input_file:
|
22 |
+
file_text = input_file.readlines()
|
23 |
+
|
24 |
+
head = file_text[:header_lines]
|
25 |
+
transcript = "".join(file_text[header_lines:])
|
26 |
+
|
27 |
+
return head, transcript
|
28 |
|
29 |
|
30 |
if __name__ == '__main__':
|
|
|
36 |
|
37 |
llm = OpenAI(temperature=0)
|
38 |
|
39 |
+
head, transcript = load_transcript(transcript_filepath)
|
|
|
40 |
|
41 |
+
# split the transcript on the 5-min timestamps
|
42 |
+
regex_pattern = r"[0-9]{2}:[0-9]{2}:[0-9]{2}"
|
43 |
+
five_min_chunks = re.split(regex_pattern, transcript)
|
44 |
|
45 |
+
# create a textsplitter to subdivide those chunks into appropriately sized chunks.
|
46 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
|
47 |
+
|
48 |
+
# prompt
|
49 |
prompt = PromptTemplate(
|
50 |
template="Write a concise summary of the following: {transcript}",
|
51 |
input_variables=['transcript']
|
|
|
57 |
verbose=False
|
58 |
)
|
59 |
|
60 |
+
# list the meeting time and the chunks associated with it
|
61 |
+
timestamped_summaries = []
|
62 |
+
|
63 |
+
print(f"Summarizing {len(five_min_chunks)*5} minute meeting")
|
64 |
+
start_time = time.time()
|
65 |
+
# summarize the
|
66 |
+
for i, five_minutes_chunk in enumerate(five_min_chunks):
|
67 |
+
timestamp = time.strftime('%H:%M:%S', time.gmtime(60 * 5 * i))
|
68 |
+
sub_chunks = text_splitter.split_text(five_minutes_chunk)
|
69 |
+
|
70 |
+
summaries = []
|
71 |
+
for j, chunk in enumerate(sub_chunks):
|
72 |
+
summaries.append(chain.run(chunk))
|
73 |
+
print(f"{timestamp}: Chunk {j}/{len(sub_chunks)}")
|
74 |
+
|
75 |
+
timestamped_summaries.append((timestamp, summaries))
|
76 |
+
|
77 |
+
elapsed_time = time.time() - start_time
|
78 |
+
minutes = elapsed_time // 60
|
79 |
+
print(f"Summarized first {5 * (i+1)} minutes of meeting, {minutes:.0f} minutes {elapsed_time - 60 * minutes:.2f} seconds elapsed")
|
80 |
+
|
81 |
+
first_line = re.split(r"[()]", head[0])
|
82 |
|
83 |
+
# Write summaries to file
|
84 |
+
with open(notes_filepath, 'w+') as f:
|
85 |
+
f.write(f"# {first_line[0]}\n")
|
86 |
+
f.write(f"{first_line[1]}\n")
|
87 |
+
f.write("## Attendees\n")
|
88 |
+
f.write(f"{head[2]}\n")
|
89 |
+
f.write('## Meeting Notes\n')
|
90 |
+
for timestamp, summaries in timestamped_summaries:
|
91 |
+
f.write(f"### {timestamp}\n")
|
92 |
+
for summary in summaries:
|
93 |
+
f.write(f"- {summary.strip()}\n")
|
94 |
|
95 |
+
print(f"Export to file {notes_filepath} completed")
|
|
setup.py
CHANGED
@@ -19,8 +19,8 @@ config['REQUIRED'] = {
|
|
19 |
# Optional
|
20 |
config['OPTIONAL'] = {
|
21 |
'transcript-filepath': 'transcript.txt',
|
22 |
-
'notes-filepath': 'notes.
|
23 |
}
|
24 |
|
25 |
with open('config.ini', 'w') as configfile:
|
26 |
-
config.write(configfile)
|
|
|
19 |
# Optional
|
20 |
config['OPTIONAL'] = {
|
21 |
'transcript-filepath': 'transcript.txt',
|
22 |
+
'notes-filepath': 'notes.md'
|
23 |
}
|
24 |
|
25 |
with open('config.ini', 'w') as configfile:
|
26 |
+
config.write(configfile)
|