Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,10 @@ from urllib.parse import quote, urlencode
|
|
10 |
import gradio as gr
|
11 |
from bs4 import BeautifulSoup
|
12 |
import io
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Configure logging
|
15 |
logging.basicConfig(level=logging.INFO,
|
@@ -280,14 +284,14 @@ class PaperDownloader:
|
|
280 |
bib_content = f.read()
|
281 |
except Exception as e:
|
282 |
logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
|
283 |
-
return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}"
|
284 |
|
285 |
# Parse BibTeX data
|
286 |
try:
|
287 |
bib_database = bibtexparser.loads(bib_content)
|
288 |
except Exception as e:
|
289 |
logger.error(f"Error parsing BibTeX data: {e}")
|
290 |
-
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}"
|
291 |
|
292 |
# Extract DOIs
|
293 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
|
@@ -334,6 +338,39 @@ class PaperDownloader:
|
|
334 |
|
335 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
|
338 |
def create_gradio_interface():
|
339 |
"""Create Gradio interface for Paper Downloader"""
|
@@ -355,7 +392,13 @@ def create_gradio_interface():
|
|
355 |
return zip_path, downloaded_dois, failed_dois, None
|
356 |
else:
|
357 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
|
360 |
# Gradio Interface
|
361 |
interface = gr.Interface(
|
@@ -367,9 +410,26 @@ def create_gradio_interface():
|
|
367 |
],
|
368 |
outputs=[
|
369 |
gr.File(label="Download Papers (ZIP) or Single PDF"),
|
370 |
-
|
371 |
-
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
],
|
374 |
title="🔬 Academic Paper Batch Downloader",
|
375 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|
@@ -381,20 +441,50 @@ def create_gradio_interface():
|
|
381 |
],
|
382 |
css="""
|
383 |
.gradio-container {
|
384 |
-
background-color:
|
385 |
}
|
386 |
.gr-interface {
|
387 |
max-width: 800px;
|
388 |
margin: 0 auto;
|
389 |
}
|
390 |
.gr-box {
|
391 |
-
background-color:
|
392 |
border-radius: 10px;
|
393 |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
394 |
}
|
|
|
|
|
|
|
395 |
""",
|
396 |
-
cache_examples = False
|
397 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
return interface
|
400 |
|
|
|
10 |
import gradio as gr
|
11 |
from bs4 import BeautifulSoup
|
12 |
import io
|
13 |
+
from docx import Document
|
14 |
+
from docx.shared import Inches
|
15 |
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
16 |
+
|
17 |
|
18 |
# Configure logging
|
19 |
logging.basicConfig(level=logging.INFO,
|
|
|
284 |
bib_content = f.read()
|
285 |
except Exception as e:
|
286 |
logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
|
287 |
+
return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", None
|
288 |
|
289 |
# Parse BibTeX data
|
290 |
try:
|
291 |
bib_database = bibtexparser.loads(bib_content)
|
292 |
except Exception as e:
|
293 |
logger.error(f"Error parsing BibTeX data: {e}")
|
294 |
+
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None
|
295 |
|
296 |
# Extract DOIs
|
297 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
|
|
|
338 |
|
339 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
340 |
|
341 |
+
def create_report_docx(self, downloaded_dois, failed_dois):
|
342 |
+
"""Creates a Word document report of downloaded and failed DOIs."""
|
343 |
+
document = Document()
|
344 |
+
|
345 |
+
# Add a title
|
346 |
+
title_paragraph = document.add_paragraph()
|
347 |
+
title_run = title_paragraph.add_run("DOI Download Report")
|
348 |
+
title_run.bold = True
|
349 |
+
title_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
350 |
+
|
351 |
+
# Add downloaded DOIs
|
352 |
+
document.add_heading('Downloaded DOIs', level=2)
|
353 |
+
if downloaded_dois:
|
354 |
+
for doi in downloaded_dois.split('\n'):
|
355 |
+
if doi:
|
356 |
+
document.add_paragraph(doi)
|
357 |
+
else:
|
358 |
+
document.add_paragraph("No DOIs were successfully downloaded.")
|
359 |
+
|
360 |
+
|
361 |
+
# Add failed DOIs
|
362 |
+
document.add_heading('Failed DOIs', level=2)
|
363 |
+
if failed_dois:
|
364 |
+
for doi in failed_dois.split('\n'):
|
365 |
+
if doi:
|
366 |
+
document.add_paragraph(doi)
|
367 |
+
else:
|
368 |
+
document.add_paragraph("No DOIs failed to download.")
|
369 |
+
|
370 |
+
|
371 |
+
report_path = "doi_report.docx"
|
372 |
+
document.save(report_path)
|
373 |
+
return report_path
|
374 |
|
375 |
def create_gradio_interface():
|
376 |
"""Create Gradio interface for Paper Downloader"""
|
|
|
392 |
return zip_path, downloaded_dois, failed_dois, None
|
393 |
else:
|
394 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
395 |
+
|
396 |
+
def create_report(downloaded_dois, failed_dois):
|
397 |
+
"""Creates a report and returns the report path."""
|
398 |
+
if downloaded_dois or failed_dois:
|
399 |
+
report_path = downloader.create_report_docx(downloaded_dois, failed_dois)
|
400 |
+
return report_path
|
401 |
+
return None
|
402 |
|
403 |
# Gradio Interface
|
404 |
interface = gr.Interface(
|
|
|
410 |
],
|
411 |
outputs=[
|
412 |
gr.File(label="Download Papers (ZIP) or Single PDF"),
|
413 |
+
gr.HTML(label="""
|
414 |
+
<div style='padding-bottom: 5px; font-weight: bold;'>
|
415 |
+
Enter Single DOI
|
416 |
+
</div>
|
417 |
+
<div style='border: 1px solid #ddd; padding: 5px; border-radius: 5px;'>
|
418 |
+
<div style='padding-bottom: 5px; font-weight: bold;'>
|
419 |
+
Downloaded DOIs
|
420 |
+
</div>
|
421 |
+
<div id="downloaded-dois"></div>
|
422 |
+
</div>
|
423 |
+
"""),
|
424 |
+
gr.HTML(label="""
|
425 |
+
<div style='border: 1px solid #ddd; padding: 5px; border-radius: 5px;'>
|
426 |
+
<div style='padding-bottom: 5px; font-weight: bold;'>
|
427 |
+
Failed DOIs
|
428 |
+
</div>
|
429 |
+
<div id="failed-dois"></div>
|
430 |
+
</div>
|
431 |
+
"""),
|
432 |
+
gr.File(label="Downloaded Single PDF"),
|
433 |
],
|
434 |
title="🔬 Academic Paper Batch Downloader",
|
435 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|
|
|
441 |
],
|
442 |
css="""
|
443 |
.gradio-container {
|
444 |
+
background-color: black;
|
445 |
}
|
446 |
.gr-interface {
|
447 |
max-width: 800px;
|
448 |
margin: 0 auto;
|
449 |
}
|
450 |
.gr-box {
|
451 |
+
background-color: black;
|
452 |
border-radius: 10px;
|
453 |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
454 |
}
|
455 |
+
.output-text a {
|
456 |
+
color: #007bff; /* Blue color for hyperlinks */
|
457 |
+
}
|
458 |
""",
|
459 |
+
cache_examples = False,
|
460 |
)
|
461 |
+
|
462 |
+
# Add Javascript to update HTML
|
463 |
+
interface.load = """
|
464 |
+
function(downloaded_dois, failed_dois){
|
465 |
+
let downloaded_html = '<ul>';
|
466 |
+
downloaded_dois.split('\\n').filter(Boolean).forEach(doi => {
|
467 |
+
downloaded_html += '<li>' + doi + '</li>';
|
468 |
+
});
|
469 |
+
downloaded_html += '</ul>';
|
470 |
+
document.querySelector("#downloaded-dois").innerHTML = downloaded_html;
|
471 |
+
|
472 |
+
let failed_html = '<ul>';
|
473 |
+
failed_dois.split('\\n').filter(Boolean).forEach(doi => {
|
474 |
+
failed_html += '<li>' + doi + '</li>';
|
475 |
+
});
|
476 |
+
failed_html += '</ul>';
|
477 |
+
document.querySelector("#failed-dois").innerHTML = failed_html;
|
478 |
+
return [downloaded_html, failed_html];
|
479 |
+
|
480 |
+
}
|
481 |
+
"""
|
482 |
+
|
483 |
+
# Add the report button
|
484 |
+
with gr.Row():
|
485 |
+
report_button = gr.Button("Create Report")
|
486 |
+
report_output = gr.File(label="Download Report")
|
487 |
+
report_button.click(create_report, inputs = [interface.outputs[1],interface.outputs[2]], outputs=report_output)
|
488 |
|
489 |
return interface
|
490 |
|