C2MV commited on
Commit
d416d78
·
verified ·
1 Parent(s): 93f088b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -28
app.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
  import zipfile
6
  import requests
7
  import bibtexparser
 
8
  from urllib.parse import quote, urlencode
9
  import gradio as gr
10
  from bs4 import BeautifulSoup
@@ -419,8 +420,8 @@ class PaperDownloader:
419
 
420
  return None
421
 
422
- def download_single_doi(self, doi, progress=gr.Progress()):
423
- """Downloads a single paper using a DOI with progress bar"""
424
  if not doi:
425
  return None, "Error: DOI not provided", "Error: DOI not provided"
426
 
@@ -435,20 +436,17 @@ class PaperDownloader:
435
  with open(filepath, 'wb') as f:
436
  f.write(pdf_content)
437
  logger.info(f"Successfully downloaded: {filename}")
438
- progress(1)
439
  return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
440
  else:
441
  logger.warning(f"Could not download: {doi}")
442
- progress(1)
443
  return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
444
 
445
  except Exception as e:
446
  logger.error(f"Error processing {doi}: {e}")
447
- progress(1)
448
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
449
 
450
- def download_multiple_dois(self, dois_text, progress=gr.Progress()):
451
- """Downloads multiple papers from a list of DOIs with progress bar"""
452
  if not dois_text:
453
  return None, "Error: No DOIs provided", "Error: No DOIs provided"
454
 
@@ -456,13 +454,11 @@ class PaperDownloader:
456
  if not dois:
457
  return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
458
 
459
- total_dois = len(dois)
460
  downloaded_files = []
461
  failed_dois = []
462
  downloaded_links = []
463
-
464
- for i, doi in enumerate(dois):
465
- filepath, success_message, fail_message = self.download_single_doi(doi, progress=progress)
466
  if filepath:
467
  # Unique filename for zip
468
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
@@ -473,8 +469,6 @@ class PaperDownloader:
473
 
474
  else:
475
  failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
476
- progress((i + 1) / total_dois)
477
-
478
 
479
  if downloaded_files:
480
  zip_filename = 'papers.zip'
@@ -485,8 +479,8 @@ class PaperDownloader:
485
 
486
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
487
 
488
- def process_bibtex(self, bib_file, progress=gr.Progress()):
489
- """Process BibTeX file and download papers with multiple strategies with progress bar"""
490
  # Read BibTeX file content from the uploaded object
491
  try:
492
  with open(bib_file.name, 'r', encoding='utf-8') as f:
@@ -510,11 +504,9 @@ class PaperDownloader:
510
  downloaded_files = []
511
  failed_dois = []
512
  downloaded_links = []
513
-
514
- total_dois = len(dois)
515
 
516
  # Download PDFs
517
- for i, doi in enumerate(dois):
518
  try:
519
  # Try to download with multiple methods with retries
520
  pdf_content = self.download_with_retry(doi)
@@ -538,7 +530,6 @@ class PaperDownloader:
538
  except Exception as e:
539
  failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
540
  logger.error(f"Error processing {doi}: {e}")
541
- progress((i + 1) / total_dois)
542
 
543
  # Create ZIP of downloaded papers
544
  if downloaded_files:
@@ -550,7 +541,7 @@ class PaperDownloader:
550
 
551
  return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
552
 
553
- async def process_bibtex_async(self, bib_file, progress=gr.Progress()):
554
  """Process BibTeX file and download papers with multiple strategies"""
555
  # Read BibTeX file content from the uploaded object
556
  try:
@@ -575,10 +566,9 @@ class PaperDownloader:
575
  downloaded_files = []
576
  failed_dois = []
577
  downloaded_links = []
578
- total_dois = len(dois)
579
 
580
  # Download PDFs
581
- for i, doi in enumerate(dois):
582
  try:
583
  # Try to download with multiple methods with retries
584
  pdf_content = await self.download_with_retry_async(doi)
@@ -602,8 +592,6 @@ class PaperDownloader:
602
  except Exception as e:
603
  failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
604
  logger.error(f"Error processing {doi}: {e}")
605
- progress((i + 1) / total_dois)
606
-
607
 
608
  # Create ZIP of downloaded papers
609
  if downloaded_files:
@@ -619,19 +607,19 @@ def create_gradio_interface():
619
  """Create Gradio interface for Paper Downloader"""
620
  downloader = PaperDownloader()
621
 
622
- async def download_papers(bib_file, doi_input, dois_input, progress=gr.Progress()):
623
  if bib_file:
624
  # Check file type
625
  if not bib_file.name.lower().endswith('.bib'):
626
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
627
 
628
- zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file, progress)
629
  return zip_path, downloaded_dois, failed_dois, None
630
  elif doi_input:
631
- filepath, message, failed_doi = downloader.download_single_doi(doi_input, progress)
632
  return None, message, failed_doi, filepath
633
  elif dois_input:
634
- zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input, progress)
635
  return zip_path, downloaded_dois, failed_dois, None
636
  else:
637
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
 
5
  import zipfile
6
  import requests
7
  import bibtexparser
8
+ from tqdm import tqdm
9
  from urllib.parse import quote, urlencode
10
  import gradio as gr
11
  from bs4 import BeautifulSoup
 
420
 
421
  return None
422
 
423
+ def download_single_doi(self, doi):
424
+ """Downloads a single paper using a DOI"""
425
  if not doi:
426
  return None, "Error: DOI not provided", "Error: DOI not provided"
427
 
 
436
  with open(filepath, 'wb') as f:
437
  f.write(pdf_content)
438
  logger.info(f"Successfully downloaded: {filename}")
 
439
  return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
440
  else:
441
  logger.warning(f"Could not download: {doi}")
 
442
  return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
443
 
444
  except Exception as e:
445
  logger.error(f"Error processing {doi}: {e}")
 
446
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
447
 
448
+ def download_multiple_dois(self, dois_text):
449
+ """Downloads multiple papers from a list of DOIs"""
450
  if not dois_text:
451
  return None, "Error: No DOIs provided", "Error: No DOIs provided"
452
 
 
454
  if not dois:
455
  return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
456
 
 
457
  downloaded_files = []
458
  failed_dois = []
459
  downloaded_links = []
460
+ for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
461
+ filepath, success_message, fail_message = self.download_single_doi(doi)
 
462
  if filepath:
463
  # Unique filename for zip
464
  filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
 
469
 
470
  else:
471
  failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
 
 
472
 
473
  if downloaded_files:
474
  zip_filename = 'papers.zip'
 
479
 
480
  return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
481
 
482
+ def process_bibtex(self, bib_file):
483
+ """Process BibTeX file and download papers with multiple strategies"""
484
  # Read BibTeX file content from the uploaded object
485
  try:
486
  with open(bib_file.name, 'r', encoding='utf-8') as f:
 
504
  downloaded_files = []
505
  failed_dois = []
506
  downloaded_links = []
 
 
507
 
508
  # Download PDFs
509
+ for doi in tqdm(dois, desc="Downloading papers"):
510
  try:
511
  # Try to download with multiple methods with retries
512
  pdf_content = self.download_with_retry(doi)
 
530
  except Exception as e:
531
  failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
532
  logger.error(f"Error processing {doi}: {e}")
 
533
 
534
  # Create ZIP of downloaded papers
535
  if downloaded_files:
 
541
 
542
  return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
543
 
544
+ async def process_bibtex_async(self, bib_file):
545
  """Process BibTeX file and download papers with multiple strategies"""
546
  # Read BibTeX file content from the uploaded object
547
  try:
 
566
  downloaded_files = []
567
  failed_dois = []
568
  downloaded_links = []
 
569
 
570
  # Download PDFs
571
+ for doi in tqdm(dois, desc="Downloading papers"):
572
  try:
573
  # Try to download with multiple methods with retries
574
  pdf_content = await self.download_with_retry_async(doi)
 
592
  except Exception as e:
593
  failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
594
  logger.error(f"Error processing {doi}: {e}")
 
 
595
 
596
  # Create ZIP of downloaded papers
597
  if downloaded_files:
 
607
  """Create Gradio interface for Paper Downloader"""
608
  downloader = PaperDownloader()
609
 
610
+ async def download_papers(bib_file, doi_input, dois_input):
611
  if bib_file:
612
  # Check file type
613
  if not bib_file.name.lower().endswith('.bib'):
614
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
615
 
616
+ zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
617
  return zip_path, downloaded_dois, failed_dois, None
618
  elif doi_input:
619
+ filepath, message, failed_doi = downloader.download_single_doi(doi_input)
620
  return None, message, failed_doi, filepath
621
  elif dois_input:
622
+ zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
623
  return zip_path, downloaded_dois, failed_dois, None
624
  else:
625
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None