import re def make_pairs(lst): """from a list of even lenght, make tupple pairs""" return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)] def serialize_docs(docs): new_docs = [] for doc in docs: new_doc = {} new_doc["page_content"] = doc.page_content new_doc["metadata"] = doc.metadata new_docs.append(new_doc) return new_docs def parse_output_llm_with_sources(output): # Split the content into a list of text and "[Doc X]" references content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output) parts = [] for part in content_parts: if part.startswith("Doc"): subparts = part.split(",") subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts] subparts = [f"""{subpart}""" for subpart in subparts] parts.append("".join(subparts)) else: parts.append(part) content_parts = "".join(parts) return content_parts def make_html_source(source,i): meta = source.metadata # content = source.page_content.split(":",1)[1].strip() content = source.page_content.strip() toc_levels = [] for j in range(2): level = meta[f"toc_level{j}"] if level != "N/A": toc_levels.append(level) else: break toc_levels = " > ".join(toc_levels) if len(toc_levels) > 0: name = f"{toc_levels}
{meta['name']}" else: name = meta['name'] score = meta['reranking_score'] if score > 0.8: color = "score-green" elif score > 0.5: color = "score-orange" else: color = "score-red" relevancy_score = f"

Relevancy score: {score:.1%}

" if meta["chunk_type"] == "text": card = f"""

Doc {i} - {meta['short_name']} - Page {int(meta['page_number'])}

{content}

{relevancy_score}
""" else: if meta["figure_code"] != "N/A": title = f"{meta['figure_code']} - {meta['short_name']}" else: title = f"{meta['short_name']}" card = f"""

Image {i} - {title} - Page {int(meta['page_number'])}

AI-generated description

{content}

{relevancy_score}
""" return card def make_html_df(df,i): title = df['title'][i] content = df['abstract'][i] url = df['doi'][i] publication_date = df['publication_year'][i] card = f"""

Doc {i+1} - {title}

{content}

""" return card def make_html_figure_sources(source,i,img_str): meta = source.metadata content = source.page_content.strip() score = meta['reranking_score'] if score > 0.8: color = "score-green" elif score > 0.5: color = "score-orange" else: color = "score-red" toc_levels = [] if len(toc_levels) > 0: name = f"{toc_levels}
{meta['name']}" else: name = meta['name'] relevancy_score = f"

Relevancy score: {score:.1%}

" if meta["figure_code"] != "N/A": title = f"{meta['figure_code']} - {meta['short_name']}" else: title = f"{meta['short_name']}" card = f"""

Image {i} - {title} - Page {int(meta['page_number'])}

Alt text

AI-generated description

{content}

{relevancy_score}
""" return card def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"): if checked: span = "" else: span = "" # toolbox = f""" # # """ toolbox = f""" """ return toolbox