import re
def make_pairs(lst):
"""from a list of even lenght, make tupple pairs"""
return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
def serialize_docs(docs):
new_docs = []
for doc in docs:
new_doc = {}
new_doc["page_content"] = doc.page_content
new_doc["metadata"] = doc.metadata
new_docs.append(new_doc)
return new_docs
def parse_output_llm_with_sources(output):
# Split the content into a list of text and "[Doc X]" references
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
parts = []
for part in content_parts:
if part.startswith("Doc"):
subparts = part.split(",")
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
subparts = [f"""{subpart}""" for subpart in subparts]
parts.append("".join(subparts))
else:
parts.append(part)
content_parts = "".join(parts)
return content_parts
from collections import defaultdict
def generate_html_graphs(graphs):
# Organize graphs by category
categories = defaultdict(list)
for graph in graphs:
category = graph['metadata']['category']
categories[category].append(graph['embedding'])
# Begin constructing the HTML
html_code = '''
Graphs by Category
'''
# Add buttons for each category
for i, category in enumerate(categories.keys()):
active_class = 'active' if i == 0 else ''
html_code += f''
html_code += '
'
# Add content for each category
for i, (category, embeds) in enumerate(categories.items()):
active_class = 'active' if i == 0 else ''
html_code += f'
'
for embed in embeds:
html_code += embed
html_code += '
'
html_code += '''
'''
return html_code
def make_html_source(source,i):
meta = source.metadata
# content = source.page_content.split(":",1)[1].strip()
content = source.page_content.strip()
toc_levels = []
for j in range(2):
level = meta[f"toc_level{j}"]
if level != "N/A":
toc_levels.append(level)
else:
break
toc_levels = " > ".join(toc_levels)
if len(toc_levels) > 0:
name = f"{toc_levels} {meta['name']}"
else:
name = meta['name']
score = meta['reranking_score']
if score > 0.8:
color = "score-green"
elif score > 0.4:
color = "score-orange"
else:
color = "score-red"
relevancy_score = f"