|
|
|
import re |
|
|
|
def make_pairs(lst): |
|
"""from a list of even lenght, make tupple pairs""" |
|
return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)] |
|
|
|
|
|
def serialize_docs(docs): |
|
new_docs = [] |
|
for doc in docs: |
|
new_doc = {} |
|
new_doc["page_content"] = doc.page_content |
|
new_doc["metadata"] = doc.metadata |
|
new_docs.append(new_doc) |
|
return new_docs |
|
|
|
|
|
|
|
def parse_output_llm_with_sources(output): |
|
|
|
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output) |
|
parts = [] |
|
for part in content_parts: |
|
if part.startswith("Doc"): |
|
subparts = part.split(",") |
|
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts] |
|
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts] |
|
parts.append("".join(subparts)) |
|
else: |
|
parts.append(part) |
|
content_parts = "".join(parts) |
|
return content_parts |
|
|
|
|
|
def make_html_source(source,i): |
|
meta = source.metadata |
|
|
|
content = source.page_content.strip() |
|
|
|
toc_levels = [] |
|
for j in range(2): |
|
level = meta[f"toc_level{j}"] |
|
if level != "N/A": |
|
toc_levels.append(level) |
|
else: |
|
break |
|
toc_levels = " > ".join(toc_levels) |
|
|
|
if len(toc_levels) > 0: |
|
name = f"<b>{toc_levels}</b><br/>{meta['name']}" |
|
else: |
|
name = meta['name'] |
|
|
|
score = meta['reranking_score'] |
|
if score > 0.8: |
|
color = "score-green" |
|
elif score > 0.5: |
|
color = "score-orange" |
|
else: |
|
color = "score-red" |
|
|
|
relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>" |
|
|
|
if meta["chunk_type"] == "text": |
|
|
|
card = f""" |
|
<div class="card" id="doc{i}"> |
|
<div class="card-content"> |
|
<h2>Doc {i} - {meta['short_name']} - Page {int(meta['page_number'])}</h2> |
|
<p>{content}</p> |
|
{relevancy_score} |
|
</div> |
|
<div class="card-footer"> |
|
<span>{name}</span> |
|
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link"> |
|
<span role="img" aria-label="Open PDF">π</span> |
|
</a> |
|
</div> |
|
</div> |
|
""" |
|
|
|
else: |
|
|
|
if meta["figure_code"] != "N/A": |
|
title = f"{meta['figure_code']} - {meta['short_name']}" |
|
else: |
|
title = f"{meta['short_name']}" |
|
|
|
card = f""" |
|
<div class="card card-image"> |
|
<div class="card-content"> |
|
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2> |
|
<p class='ai-generated'>AI-generated description</p> |
|
<p>{content}</p> |
|
|
|
{relevancy_score} |
|
</div> |
|
<div class="card-footer"> |
|
<span>{name}</span> |
|
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link"> |
|
<span role="img" aria-label="Open PDF">π</span> |
|
</a> |
|
</div> |
|
</div> |
|
""" |
|
|
|
return card |
|
|
|
|
|
def make_html_df(df,i): |
|
title = df['title'][i] |
|
content = df['abstract'][i] |
|
url = df['doi'][i] |
|
publication_date = df['publication_year'][i] |
|
|
|
card = f""" |
|
<div class="card" id="doc{i}"> |
|
<div class="card-content"> |
|
<h2>Doc {i+1} - {title}</h2> |
|
<p>{content}</p> |
|
</div> |
|
<div class="card-footer"> |
|
<span>{publication_date}</span> |
|
<a href="{url}" target="_blank" class="pdf-link"> |
|
</div> |
|
</div> |
|
""" |
|
|
|
return card |
|
|
|
|
|
def make_html_figure_sources(source,i,img_str): |
|
meta = source.metadata |
|
content = source.page_content.strip() |
|
|
|
score = meta['reranking_score'] |
|
if score > 0.8: |
|
color = "score-green" |
|
elif score > 0.5: |
|
color = "score-orange" |
|
else: |
|
color = "score-red" |
|
|
|
toc_levels = [] |
|
if len(toc_levels) > 0: |
|
name = f"<b>{toc_levels}</b><br/>{meta['name']}" |
|
else: |
|
name = meta['name'] |
|
|
|
relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>" |
|
|
|
if meta["figure_code"] != "N/A": |
|
title = f"{meta['figure_code']} - {meta['short_name']}" |
|
else: |
|
title = f"{meta['short_name']}" |
|
|
|
card = f""" |
|
<div class="card card-image"> |
|
<div class="card-content"> |
|
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2> |
|
<img src="data:image/png;base64, { img_str }" alt="Alt text" /> |
|
<p class='ai-generated'>AI-generated description</p> |
|
|
|
<p>{content}</p> |
|
|
|
{relevancy_score} |
|
</div> |
|
<div class="card-footer"> |
|
<span>{name}</span> |
|
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link"> |
|
<span role="img" aria-label="Open PDF">π</span> |
|
</a> |
|
</div> |
|
</div> |
|
""" |
|
return card |
|
|
|
|
|
|
|
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"): |
|
|
|
if checked: |
|
span = "<span class='checkmark'>✓</span>" |
|
else: |
|
span = "<span class='loader'></span>" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
toolbox = f""" |
|
<div class="dropdown"> |
|
<label for="{elem_id}" class="dropdown-toggle"> |
|
{span} |
|
{tool_name} |
|
</label> |
|
</div> |
|
""" |
|
|
|
return toolbox |
|
|