Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import logging
|
|
5 |
import zipfile
|
6 |
import requests
|
7 |
import bibtexparser
|
|
|
8 |
from urllib.parse import quote, urlencode
|
9 |
import gradio as gr
|
10 |
from bs4 import BeautifulSoup
|
@@ -419,8 +420,8 @@ class PaperDownloader:
|
|
419 |
|
420 |
return None
|
421 |
|
422 |
-
def download_single_doi(self, doi
|
423 |
-
"""Downloads a single paper using a DOI
|
424 |
if not doi:
|
425 |
return None, "Error: DOI not provided", "Error: DOI not provided"
|
426 |
|
@@ -435,20 +436,17 @@ class PaperDownloader:
|
|
435 |
with open(filepath, 'wb') as f:
|
436 |
f.write(pdf_content)
|
437 |
logger.info(f"Successfully downloaded: {filename}")
|
438 |
-
progress(1)
|
439 |
return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
|
440 |
else:
|
441 |
logger.warning(f"Could not download: {doi}")
|
442 |
-
progress(1)
|
443 |
return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
|
444 |
|
445 |
except Exception as e:
|
446 |
logger.error(f"Error processing {doi}: {e}")
|
447 |
-
progress(1)
|
448 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
449 |
|
450 |
-
def download_multiple_dois(self, dois_text
|
451 |
-
"""Downloads multiple papers from a list of DOIs
|
452 |
if not dois_text:
|
453 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
454 |
|
@@ -456,13 +454,11 @@ class PaperDownloader:
|
|
456 |
if not dois:
|
457 |
return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
|
458 |
|
459 |
-
total_dois = len(dois)
|
460 |
downloaded_files = []
|
461 |
failed_dois = []
|
462 |
downloaded_links = []
|
463 |
-
|
464 |
-
|
465 |
-
filepath, success_message, fail_message = self.download_single_doi(doi, progress=progress)
|
466 |
if filepath:
|
467 |
# Unique filename for zip
|
468 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
@@ -473,8 +469,6 @@ class PaperDownloader:
|
|
473 |
|
474 |
else:
|
475 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
476 |
-
progress((i + 1) / total_dois)
|
477 |
-
|
478 |
|
479 |
if downloaded_files:
|
480 |
zip_filename = 'papers.zip'
|
@@ -485,8 +479,8 @@ class PaperDownloader:
|
|
485 |
|
486 |
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
|
487 |
|
488 |
-
def process_bibtex(self, bib_file
|
489 |
-
"""Process BibTeX file and download papers with multiple strategies
|
490 |
# Read BibTeX file content from the uploaded object
|
491 |
try:
|
492 |
with open(bib_file.name, 'r', encoding='utf-8') as f:
|
@@ -510,11 +504,9 @@ class PaperDownloader:
|
|
510 |
downloaded_files = []
|
511 |
failed_dois = []
|
512 |
downloaded_links = []
|
513 |
-
|
514 |
-
total_dois = len(dois)
|
515 |
|
516 |
# Download PDFs
|
517 |
-
for
|
518 |
try:
|
519 |
# Try to download with multiple methods with retries
|
520 |
pdf_content = self.download_with_retry(doi)
|
@@ -538,7 +530,6 @@ class PaperDownloader:
|
|
538 |
except Exception as e:
|
539 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
540 |
logger.error(f"Error processing {doi}: {e}")
|
541 |
-
progress((i + 1) / total_dois)
|
542 |
|
543 |
# Create ZIP of downloaded papers
|
544 |
if downloaded_files:
|
@@ -550,7 +541,7 @@ class PaperDownloader:
|
|
550 |
|
551 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
552 |
|
553 |
-
async def process_bibtex_async(self, bib_file
|
554 |
"""Process BibTeX file and download papers with multiple strategies"""
|
555 |
# Read BibTeX file content from the uploaded object
|
556 |
try:
|
@@ -575,10 +566,9 @@ class PaperDownloader:
|
|
575 |
downloaded_files = []
|
576 |
failed_dois = []
|
577 |
downloaded_links = []
|
578 |
-
total_dois = len(dois)
|
579 |
|
580 |
# Download PDFs
|
581 |
-
for
|
582 |
try:
|
583 |
# Try to download with multiple methods with retries
|
584 |
pdf_content = await self.download_with_retry_async(doi)
|
@@ -602,8 +592,6 @@ class PaperDownloader:
|
|
602 |
except Exception as e:
|
603 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
604 |
logger.error(f"Error processing {doi}: {e}")
|
605 |
-
progress((i + 1) / total_dois)
|
606 |
-
|
607 |
|
608 |
# Create ZIP of downloaded papers
|
609 |
if downloaded_files:
|
@@ -619,19 +607,19 @@ def create_gradio_interface():
|
|
619 |
"""Create Gradio interface for Paper Downloader"""
|
620 |
downloader = PaperDownloader()
|
621 |
|
622 |
-
async def download_papers(bib_file, doi_input, dois_input
|
623 |
if bib_file:
|
624 |
# Check file type
|
625 |
if not bib_file.name.lower().endswith('.bib'):
|
626 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
627 |
|
628 |
-
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file
|
629 |
return zip_path, downloaded_dois, failed_dois, None
|
630 |
elif doi_input:
|
631 |
-
filepath, message, failed_doi = downloader.download_single_doi(doi_input
|
632 |
return None, message, failed_doi, filepath
|
633 |
elif dois_input:
|
634 |
-
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input
|
635 |
return zip_path, downloaded_dois, failed_dois, None
|
636 |
else:
|
637 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
|
|
5 |
import zipfile
|
6 |
import requests
|
7 |
import bibtexparser
|
8 |
+
from tqdm import tqdm
|
9 |
from urllib.parse import quote, urlencode
|
10 |
import gradio as gr
|
11 |
from bs4 import BeautifulSoup
|
|
|
420 |
|
421 |
return None
|
422 |
|
423 |
+
def download_single_doi(self, doi):
|
424 |
+
"""Downloads a single paper using a DOI"""
|
425 |
if not doi:
|
426 |
return None, "Error: DOI not provided", "Error: DOI not provided"
|
427 |
|
|
|
436 |
with open(filepath, 'wb') as f:
|
437 |
f.write(pdf_content)
|
438 |
logger.info(f"Successfully downloaded: {filename}")
|
|
|
439 |
return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
|
440 |
else:
|
441 |
logger.warning(f"Could not download: {doi}")
|
|
|
442 |
return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
|
443 |
|
444 |
except Exception as e:
|
445 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
446 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
447 |
|
448 |
+
def download_multiple_dois(self, dois_text):
|
449 |
+
"""Downloads multiple papers from a list of DOIs"""
|
450 |
if not dois_text:
|
451 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
452 |
|
|
|
454 |
if not dois:
|
455 |
return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
|
456 |
|
|
|
457 |
downloaded_files = []
|
458 |
failed_dois = []
|
459 |
downloaded_links = []
|
460 |
+
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
461 |
+
filepath, success_message, fail_message = self.download_single_doi(doi)
|
|
|
462 |
if filepath:
|
463 |
# Unique filename for zip
|
464 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
|
|
469 |
|
470 |
else:
|
471 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
|
|
|
|
472 |
|
473 |
if downloaded_files:
|
474 |
zip_filename = 'papers.zip'
|
|
|
479 |
|
480 |
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
|
481 |
|
482 |
+
def process_bibtex(self, bib_file):
|
483 |
+
"""Process BibTeX file and download papers with multiple strategies"""
|
484 |
# Read BibTeX file content from the uploaded object
|
485 |
try:
|
486 |
with open(bib_file.name, 'r', encoding='utf-8') as f:
|
|
|
504 |
downloaded_files = []
|
505 |
failed_dois = []
|
506 |
downloaded_links = []
|
|
|
|
|
507 |
|
508 |
# Download PDFs
|
509 |
+
for doi in tqdm(dois, desc="Downloading papers"):
|
510 |
try:
|
511 |
# Try to download with multiple methods with retries
|
512 |
pdf_content = self.download_with_retry(doi)
|
|
|
530 |
except Exception as e:
|
531 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
532 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
533 |
|
534 |
# Create ZIP of downloaded papers
|
535 |
if downloaded_files:
|
|
|
541 |
|
542 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
543 |
|
544 |
+
async def process_bibtex_async(self, bib_file):
|
545 |
"""Process BibTeX file and download papers with multiple strategies"""
|
546 |
# Read BibTeX file content from the uploaded object
|
547 |
try:
|
|
|
566 |
downloaded_files = []
|
567 |
failed_dois = []
|
568 |
downloaded_links = []
|
|
|
569 |
|
570 |
# Download PDFs
|
571 |
+
for doi in tqdm(dois, desc="Downloading papers"):
|
572 |
try:
|
573 |
# Try to download with multiple methods with retries
|
574 |
pdf_content = await self.download_with_retry_async(doi)
|
|
|
592 |
except Exception as e:
|
593 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
594 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
|
595 |
|
596 |
# Create ZIP of downloaded papers
|
597 |
if downloaded_files:
|
|
|
607 |
"""Create Gradio interface for Paper Downloader"""
|
608 |
downloader = PaperDownloader()
|
609 |
|
610 |
+
async def download_papers(bib_file, doi_input, dois_input):
|
611 |
if bib_file:
|
612 |
# Check file type
|
613 |
if not bib_file.name.lower().endswith('.bib'):
|
614 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
615 |
|
616 |
+
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
|
617 |
return zip_path, downloaded_dois, failed_dois, None
|
618 |
elif doi_input:
|
619 |
+
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
|
620 |
return None, message, failed_doi, filepath
|
621 |
elif dois_input:
|
622 |
+
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
|
623 |
return zip_path, downloaded_dois, failed_dois, None
|
624 |
else:
|
625 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|