Spaces:
Running
Running
fix errors
Browse files
common.py
CHANGED
@@ -104,16 +104,6 @@ def dedup_pairs_bands():
|
|
104 |
}
|
105 |
).to_html(index=False, border=0)
|
106 |
|
107 |
-
# Get the HTML table
|
108 |
-
table_html_data = dedup_pairs_bands()
|
109 |
-
|
110 |
-
# Wrap the table in a Div for styling
|
111 |
-
table_div_data = Div(
|
112 |
-
text=table_html_data,
|
113 |
-
style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;"
|
114 |
-
)
|
115 |
-
|
116 |
-
|
117 |
def dup_docs_count_graph():
|
118 |
dup_docs_count = {
|
119 |
"80": 382164413,
|
@@ -288,7 +278,7 @@ pii_table = pd.DataFrame(
|
|
288 |
)
|
289 |
|
290 |
table_html_pii = pii_table.to_html(index=False, border=0)
|
291 |
-
table_div_pii = Div(NotStr(table_html_pii), style="
|
292 |
|
293 |
global_div = Div(
|
294 |
Section(
|
@@ -374,7 +364,7 @@ global_div = Div(
|
|
374 |
P(
|
375 |
"There is a high chance that duplicates from different bands will have the same pairs in the same horizontal partition. Performing the Bloom filter step reduces the number of pairs by nearly ninefold."
|
376 |
),
|
377 |
-
Div(NotStr(dedup_pairs_bands()), style="
|
378 |
P(
|
379 |
"The resulting unique pairs are then used to identify clusters of near-duplicates by finding connected components in a graph, where the vertices represent documents and the edges represent matches."
|
380 |
),
|
|
|
104 |
}
|
105 |
).to_html(index=False, border=0)
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
def dup_docs_count_graph():
|
108 |
dup_docs_count = {
|
109 |
"80": 382164413,
|
|
|
278 |
)
|
279 |
|
280 |
table_html_pii = pii_table.to_html(index=False, border=0)
|
281 |
+
table_div_pii = Div(NotStr(table_html_pii), style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;")
|
282 |
|
283 |
global_div = Div(
|
284 |
Section(
|
|
|
364 |
P(
|
365 |
"There is a high chance that duplicates from different bands will have the same pairs in the same horizontal partition. Performing the Bloom filter step reduces the number of pairs by nearly ninefold."
|
366 |
),
|
367 |
+
Div(NotStr(dedup_pairs_bands()), style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;"),
|
368 |
P(
|
369 |
"The resulting unique pairs are then used to identify clusters of near-duplicates by finding connected components in a graph, where the vertices represent documents and the edges represent matches."
|
370 |
),
|