Spaces:
Sleeping
Sleeping
Update app.py
#1
by
C2MV
- opened
app.py
CHANGED
@@ -436,17 +436,17 @@ class PaperDownloader:
|
|
436 |
with open(filepath, 'wb') as f:
|
437 |
f.write(pdf_content)
|
438 |
logger.info(f"Successfully downloaded: {filename}")
|
439 |
-
return filepath, f'<
|
440 |
else:
|
441 |
logger.warning(f"Could not download: {doi}")
|
442 |
-
return None, f"Could not download {doi}", f'<
|
443 |
|
444 |
except Exception as e:
|
445 |
logger.error(f"Error processing {doi}: {e}")
|
446 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
447 |
|
448 |
-
def download_multiple_dois(self, dois_text):
|
449 |
-
"""Downloads multiple papers from a list of DOIs"""
|
450 |
if not dois_text:
|
451 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
452 |
|
@@ -457,7 +457,9 @@ class PaperDownloader:
|
|
457 |
downloaded_files = []
|
458 |
failed_dois = []
|
459 |
downloaded_links = []
|
460 |
-
|
|
|
|
|
461 |
filepath, success_message, fail_message = self.download_single_doi(doi)
|
462 |
if filepath:
|
463 |
# Unique filename for zip
|
@@ -465,10 +467,14 @@ class PaperDownloader:
|
|
465 |
filepath_unique = os.path.join(self.output_dir, filename)
|
466 |
os.rename(filepath, filepath_unique)
|
467 |
downloaded_files.append(filepath_unique)
|
468 |
-
downloaded_links.append(f'<
|
469 |
-
|
470 |
else:
|
471 |
-
failed_dois.append(f'<
|
|
|
|
|
|
|
|
|
|
|
472 |
|
473 |
if downloaded_files:
|
474 |
zip_filename = 'papers.zip'
|
@@ -477,9 +483,17 @@ class PaperDownloader:
|
|
477 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
478 |
logger.info(f"ZIP file created: {zip_filename}")
|
479 |
|
480 |
-
|
|
|
|
|
|
|
|
|
|
|
481 |
|
482 |
-
|
|
|
|
|
|
|
483 |
"""Process BibTeX file and download papers with multiple strategies"""
|
484 |
# Read BibTeX file content from the uploaded object
|
485 |
try:
|
@@ -504,9 +518,10 @@ class PaperDownloader:
|
|
504 |
downloaded_files = []
|
505 |
failed_dois = []
|
506 |
downloaded_links = []
|
|
|
507 |
|
508 |
# Download PDFs
|
509 |
-
for doi in
|
510 |
try:
|
511 |
# Try to download with multiple methods with retries
|
512 |
pdf_content = self.download_with_retry(doi)
|
@@ -522,14 +537,17 @@ class PaperDownloader:
|
|
522 |
f.write(pdf_content)
|
523 |
|
524 |
downloaded_files.append(filepath)
|
525 |
-
downloaded_links.append(f'<
|
526 |
logger.info(f"Successfully downloaded: {filename}")
|
527 |
else:
|
528 |
-
failed_dois.append(f'<
|
529 |
-
|
530 |
except Exception as e:
|
531 |
-
failed_dois.append(f'<
|
532 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
|
|
|
|
|
533 |
|
534 |
# Create ZIP of downloaded papers
|
535 |
if downloaded_files:
|
@@ -538,10 +556,16 @@ class PaperDownloader:
|
|
538 |
for file_path in downloaded_files:
|
539 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
540 |
logger.info(f"ZIP file created: {zip_filename}")
|
|
|
|
|
|
|
|
|
|
|
541 |
|
542 |
-
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
543 |
|
544 |
-
|
|
|
|
|
545 |
"""Process BibTeX file and download papers with multiple strategies"""
|
546 |
# Read BibTeX file content from the uploaded object
|
547 |
try:
|
@@ -566,9 +590,10 @@ class PaperDownloader:
|
|
566 |
downloaded_files = []
|
567 |
failed_dois = []
|
568 |
downloaded_links = []
|
|
|
569 |
|
570 |
# Download PDFs
|
571 |
-
for doi in
|
572 |
try:
|
573 |
# Try to download with multiple methods with retries
|
574 |
pdf_content = await self.download_with_retry_async(doi)
|
@@ -584,14 +609,17 @@ class PaperDownloader:
|
|
584 |
f.write(pdf_content)
|
585 |
|
586 |
downloaded_files.append(filepath)
|
587 |
-
downloaded_links.append(f'<
|
588 |
logger.info(f"Successfully downloaded: {filename}")
|
589 |
else:
|
590 |
-
failed_dois.append(f'<
|
591 |
-
|
592 |
except Exception as e:
|
593 |
-
failed_dois.append(f'<
|
594 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
|
|
|
|
|
595 |
|
596 |
# Create ZIP of downloaded papers
|
597 |
if downloaded_files:
|
@@ -600,26 +628,34 @@ class PaperDownloader:
|
|
600 |
for file_path in downloaded_files:
|
601 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
602 |
logger.info(f"ZIP file created: {zip_filename}")
|
|
|
|
|
|
|
|
|
|
|
603 |
|
604 |
-
|
|
|
605 |
|
606 |
def create_gradio_interface():
|
607 |
"""Create Gradio interface for Paper Downloader"""
|
608 |
downloader = PaperDownloader()
|
609 |
|
610 |
-
async def download_papers(bib_file, doi_input, dois_input):
|
|
|
|
|
611 |
if bib_file:
|
612 |
# Check file type
|
613 |
if not bib_file.name.lower().endswith('.bib'):
|
614 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
615 |
|
616 |
-
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
|
617 |
return zip_path, downloaded_dois, failed_dois, None
|
618 |
elif doi_input:
|
619 |
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
|
620 |
return None, message, failed_doi, filepath
|
621 |
elif dois_input:
|
622 |
-
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
|
623 |
return zip_path, downloaded_dois, failed_dois, None
|
624 |
else:
|
625 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
@@ -650,7 +686,7 @@ def create_gradio_interface():
|
|
650 |
<div id="failed-dois"></div>
|
651 |
</div>
|
652 |
"""),
|
653 |
-
|
654 |
],
|
655 |
title="🔬 Academic Paper Batch Downloader",
|
656 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|
@@ -700,18 +736,17 @@ def create_gradio_interface():
|
|
700 |
|
701 |
interface.head = """
|
702 |
<script>
|
703 |
-
|
704 |
-
const
|
705 |
-
|
706 |
-
|
|
|
|
|
707 |
.then(() => {
|
708 |
-
|
709 |
-
setTimeout(() => {
|
710 |
-
button.innerText = 'Copy';
|
711 |
-
}, 2000);
|
712 |
})
|
713 |
.catch(err => {
|
714 |
-
|
715 |
});
|
716 |
}
|
717 |
</script>
|
|
|
436 |
with open(filepath, 'wb') as f:
|
437 |
f.write(pdf_content)
|
438 |
logger.info(f"Successfully downloaded: {filename}")
|
439 |
+
return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
|
440 |
else:
|
441 |
logger.warning(f"Could not download: {doi}")
|
442 |
+
return None, f"Could not download {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
|
443 |
|
444 |
except Exception as e:
|
445 |
logger.error(f"Error processing {doi}: {e}")
|
446 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
447 |
|
448 |
+
def download_multiple_dois(self, dois_text, progress_callback=None):
|
449 |
+
"""Downloads multiple papers from a list of DOIs with progress updates and single copy button"""
|
450 |
if not dois_text:
|
451 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
452 |
|
|
|
457 |
downloaded_files = []
|
458 |
failed_dois = []
|
459 |
downloaded_links = []
|
460 |
+
total_dois = len(dois)
|
461 |
+
|
462 |
+
for i, doi in enumerate(dois):
|
463 |
filepath, success_message, fail_message = self.download_single_doi(doi)
|
464 |
if filepath:
|
465 |
# Unique filename for zip
|
|
|
467 |
filepath_unique = os.path.join(self.output_dir, filename)
|
468 |
os.rename(filepath, filepath_unique)
|
469 |
downloaded_files.append(filepath_unique)
|
470 |
+
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
|
|
471 |
else:
|
472 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
473 |
+
|
474 |
+
if progress_callback:
|
475 |
+
progress = int(((i + 1) / total_dois) * 100)
|
476 |
+
progress_callback(progress)
|
477 |
+
|
478 |
|
479 |
if downloaded_files:
|
480 |
zip_filename = 'papers.zip'
|
|
|
483 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
484 |
logger.info(f"ZIP file created: {zip_filename}")
|
485 |
|
486 |
+
|
487 |
+
|
488 |
+
|
489 |
+
# Combine all links into a single string
|
490 |
+
all_links_html = "<br>".join(downloaded_links)
|
491 |
+
copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
|
492 |
|
493 |
+
|
494 |
+
return zip_filename if downloaded_files else None, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois)
|
495 |
+
|
496 |
+
def process_bibtex(self, bib_file, progress_callback=None):
|
497 |
"""Process BibTeX file and download papers with multiple strategies"""
|
498 |
# Read BibTeX file content from the uploaded object
|
499 |
try:
|
|
|
518 |
downloaded_files = []
|
519 |
failed_dois = []
|
520 |
downloaded_links = []
|
521 |
+
total_dois = len(dois)
|
522 |
|
523 |
# Download PDFs
|
524 |
+
for i, doi in enumerate(dois):
|
525 |
try:
|
526 |
# Try to download with multiple methods with retries
|
527 |
pdf_content = self.download_with_retry(doi)
|
|
|
537 |
f.write(pdf_content)
|
538 |
|
539 |
downloaded_files.append(filepath)
|
540 |
+
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
541 |
logger.info(f"Successfully downloaded: {filename}")
|
542 |
else:
|
543 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
|
|
544 |
except Exception as e:
|
545 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
546 |
logger.error(f"Error processing {doi}: {e}")
|
547 |
+
|
548 |
+
if progress_callback:
|
549 |
+
progress = int(((i + 1) / total_dois) * 100)
|
550 |
+
progress_callback(progress)
|
551 |
|
552 |
# Create ZIP of downloaded papers
|
553 |
if downloaded_files:
|
|
|
556 |
for file_path in downloaded_files:
|
557 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
558 |
logger.info(f"ZIP file created: {zip_filename}")
|
559 |
+
|
560 |
+
|
561 |
+
# Combine all links into a single string
|
562 |
+
all_links_html = "<br>".join(downloaded_links)
|
563 |
+
copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
|
564 |
|
|
|
565 |
|
566 |
+
return zip_filename, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois), None
|
567 |
+
|
568 |
+
async def process_bibtex_async(self, bib_file, progress_callback=None):
|
569 |
"""Process BibTeX file and download papers with multiple strategies"""
|
570 |
# Read BibTeX file content from the uploaded object
|
571 |
try:
|
|
|
590 |
downloaded_files = []
|
591 |
failed_dois = []
|
592 |
downloaded_links = []
|
593 |
+
total_dois = len(dois)
|
594 |
|
595 |
# Download PDFs
|
596 |
+
for i, doi in enumerate(dois):
|
597 |
try:
|
598 |
# Try to download with multiple methods with retries
|
599 |
pdf_content = await self.download_with_retry_async(doi)
|
|
|
609 |
f.write(pdf_content)
|
610 |
|
611 |
downloaded_files.append(filepath)
|
612 |
+
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
613 |
logger.info(f"Successfully downloaded: {filename}")
|
614 |
else:
|
615 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
|
|
616 |
except Exception as e:
|
617 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
618 |
logger.error(f"Error processing {doi}: {e}")
|
619 |
+
|
620 |
+
if progress_callback:
|
621 |
+
progress = int(((i + 1) / total_dois) * 100)
|
622 |
+
progress_callback(progress)
|
623 |
|
624 |
# Create ZIP of downloaded papers
|
625 |
if downloaded_files:
|
|
|
628 |
for file_path in downloaded_files:
|
629 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
630 |
logger.info(f"ZIP file created: {zip_filename}")
|
631 |
+
|
632 |
+
|
633 |
+
# Combine all links into a single string
|
634 |
+
all_links_html = "<br>".join(downloaded_links)
|
635 |
+
copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
|
636 |
|
637 |
+
|
638 |
+
return zip_filename, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois), None
|
639 |
|
640 |
def create_gradio_interface():
|
641 |
"""Create Gradio interface for Paper Downloader"""
|
642 |
downloader = PaperDownloader()
|
643 |
|
644 |
+
async def download_papers(bib_file, doi_input, dois_input, progress=gr.Progress()):
|
645 |
+
progress_callback = lambda p: progress(p, desc="Downloading Papers")
|
646 |
+
|
647 |
if bib_file:
|
648 |
# Check file type
|
649 |
if not bib_file.name.lower().endswith('.bib'):
|
650 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
651 |
|
652 |
+
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file, progress_callback)
|
653 |
return zip_path, downloaded_dois, failed_dois, None
|
654 |
elif doi_input:
|
655 |
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
|
656 |
return None, message, failed_doi, filepath
|
657 |
elif dois_input:
|
658 |
+
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input, progress_callback)
|
659 |
return zip_path, downloaded_dois, failed_dois, None
|
660 |
else:
|
661 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
|
|
686 |
<div id="failed-dois"></div>
|
687 |
</div>
|
688 |
"""),
|
689 |
+
gr.File(label="Downloaded Single PDF")
|
690 |
],
|
691 |
title="🔬 Academic Paper Batch Downloader",
|
692 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|
|
|
736 |
|
737 |
interface.head = """
|
738 |
<script>
|
739 |
+
function copyAllLinks(linksHTML) {
|
740 |
+
const tempElement = document.createElement('div');
|
741 |
+
tempElement.innerHTML = linksHTML;
|
742 |
+
const links = Array.from(tempElement.querySelectorAll('a')).map(a => a.href).join('\\n');
|
743 |
+
|
744 |
+
navigator.clipboard.writeText(links)
|
745 |
.then(() => {
|
746 |
+
alert('All links copied to clipboard!');
|
|
|
|
|
|
|
747 |
})
|
748 |
.catch(err => {
|
749 |
+
console.error('Failed to copy links: ', err);
|
750 |
});
|
751 |
}
|
752 |
</script>
|