Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -219,42 +219,42 @@ class PaperDownloader:
|
|
219 |
def download_single_doi(self, doi):
|
220 |
"""Downloads a single paper using a DOI"""
|
221 |
if not doi:
|
222 |
-
return None, "Error: DOI not provided", "Error: DOI not provided"
|
223 |
|
224 |
try:
|
225 |
pdf_content = self.download_with_retry(doi)
|
226 |
|
227 |
if pdf_content:
|
228 |
if doi is None:
|
229 |
-
return None, "Error: DOI not provided", "
|
230 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
|
231 |
filepath = os.path.join(self.output_dir, filename)
|
232 |
with open(filepath, 'wb') as f:
|
233 |
f.write(pdf_content)
|
234 |
logger.info(f"Successfully downloaded: {filename}")
|
235 |
-
return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
|
236 |
else:
|
237 |
logger.warning(f"Could not download: {doi}")
|
238 |
-
return None, f"Could not download {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
|
239 |
|
240 |
except Exception as e:
|
241 |
logger.error(f"Error processing {doi}: {e}")
|
242 |
-
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
243 |
|
244 |
def download_multiple_dois(self, dois_text):
|
245 |
"""Downloads multiple papers from a list of DOIs"""
|
246 |
if not dois_text:
|
247 |
-
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
248 |
|
249 |
dois = [doi.strip() for doi in dois_text.split('\n') if doi.strip()]
|
250 |
if not dois:
|
251 |
-
return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
|
252 |
|
253 |
downloaded_files = []
|
254 |
failed_dois = []
|
255 |
downloaded_links = []
|
256 |
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
257 |
-
filepath, success_message, fail_message = self.download_single_doi(doi)
|
258 |
if filepath:
|
259 |
# Unique filename for zip
|
260 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
@@ -273,7 +273,7 @@ class PaperDownloader:
|
|
273 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
274 |
logger.info(f"ZIP file created: {zip_filename}")
|
275 |
|
276 |
-
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
|
277 |
|
278 |
|
279 |
def process_bibtex(self, bib_file):
|
@@ -284,14 +284,14 @@ class PaperDownloader:
|
|
284 |
bib_content = f.read()
|
285 |
except Exception as e:
|
286 |
logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
|
287 |
-
return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", None
|
288 |
|
289 |
# Parse BibTeX data
|
290 |
try:
|
291 |
bib_database = bibtexparser.loads(bib_content)
|
292 |
except Exception as e:
|
293 |
logger.error(f"Error parsing BibTeX data: {e}")
|
294 |
-
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None
|
295 |
|
296 |
# Extract DOIs
|
297 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
|
@@ -311,7 +311,7 @@ class PaperDownloader:
|
|
311 |
# Save PDF
|
312 |
if pdf_content:
|
313 |
if doi is None:
|
314 |
-
return None, "Error: DOI not provided", "
|
315 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
|
316 |
filepath = os.path.join(self.output_dir, filename)
|
317 |
|
@@ -336,7 +336,7 @@ class PaperDownloader:
|
|
336 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
337 |
logger.info(f"ZIP file created: {zip_filename}")
|
338 |
|
339 |
-
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
340 |
|
341 |
def create_report_docx(self, downloaded_dois, failed_dois):
|
342 |
"""Creates a Word document report of downloaded and failed DOIs."""
|
@@ -380,18 +380,18 @@ def create_gradio_interface():
|
|
380 |
if bib_file:
|
381 |
# Check file type
|
382 |
if not bib_file.name.lower().endswith('.bib'):
|
383 |
-
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
384 |
|
385 |
-
zip_path, downloaded_dois, failed_dois, _ = downloader.process_bibtex(bib_file)
|
386 |
-
return zip_path, downloaded_dois, failed_dois, None
|
387 |
elif doi_input:
|
388 |
-
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
|
389 |
-
return None, message, failed_doi, filepath
|
390 |
elif dois_input:
|
391 |
-
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
|
392 |
-
return zip_path, downloaded_dois, failed_dois, None
|
393 |
else:
|
394 |
-
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
395 |
|
396 |
def create_report(downloaded_dois_html, failed_dois_html):
|
397 |
"""Creates a report and returns the report path."""
|
|
|
219 |
def download_single_doi(self, doi):
|
220 |
"""Downloads a single paper using a DOI"""
|
221 |
if not doi:
|
222 |
+
return None, "Error: DOI not provided", "Error: DOI not provided", None, None
|
223 |
|
224 |
try:
|
225 |
pdf_content = self.download_with_retry(doi)
|
226 |
|
227 |
if pdf_content:
|
228 |
if doi is None:
|
229 |
+
return None, "Error: DOI not provided", "", None, None
|
230 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
|
231 |
filepath = os.path.join(self.output_dir, filename)
|
232 |
with open(filepath, 'wb') as f:
|
233 |
f.write(pdf_content)
|
234 |
logger.info(f"Successfully downloaded: {filename}")
|
235 |
+
return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', "", None
|
236 |
else:
|
237 |
logger.warning(f"Could not download: {doi}")
|
238 |
+
return None, f"Could not download {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>', None, None
|
239 |
|
240 |
except Exception as e:
|
241 |
logger.error(f"Error processing {doi}: {e}")
|
242 |
+
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}", None, None
|
243 |
|
244 |
def download_multiple_dois(self, dois_text):
|
245 |
"""Downloads multiple papers from a list of DOIs"""
|
246 |
if not dois_text:
|
247 |
+
return None, "Error: No DOIs provided", "Error: No DOIs provided", None, None
|
248 |
|
249 |
dois = [doi.strip() for doi in dois_text.split('\n') if doi.strip()]
|
250 |
if not dois:
|
251 |
+
return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided", None, None
|
252 |
|
253 |
downloaded_files = []
|
254 |
failed_dois = []
|
255 |
downloaded_links = []
|
256 |
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
257 |
+
filepath, success_message, fail_message, _ = self.download_single_doi(doi)
|
258 |
if filepath:
|
259 |
# Unique filename for zip
|
260 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
|
|
273 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
274 |
logger.info(f"ZIP file created: {zip_filename}")
|
275 |
|
276 |
+
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois), None, None
|
277 |
|
278 |
|
279 |
def process_bibtex(self, bib_file):
|
|
|
284 |
bib_content = f.read()
|
285 |
except Exception as e:
|
286 |
logger.error(f"Error reading uploaded file {bib_file.name}: {e}")
|
287 |
+
return None, f"Error reading uploaded file {bib_file.name}: {e}", f"Error reading uploaded file {bib_file.name}: {e}", None, None
|
288 |
|
289 |
# Parse BibTeX data
|
290 |
try:
|
291 |
bib_database = bibtexparser.loads(bib_content)
|
292 |
except Exception as e:
|
293 |
logger.error(f"Error parsing BibTeX data: {e}")
|
294 |
+
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None, None
|
295 |
|
296 |
# Extract DOIs
|
297 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
|
|
|
311 |
# Save PDF
|
312 |
if pdf_content:
|
313 |
if doi is None:
|
314 |
+
return None, "Error: DOI not provided", "", None, None
|
315 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
|
316 |
filepath = os.path.join(self.output_dir, filename)
|
317 |
|
|
|
336 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
337 |
logger.info(f"ZIP file created: {zip_filename}")
|
338 |
|
339 |
+
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None, None
|
340 |
|
341 |
def create_report_docx(self, downloaded_dois, failed_dois):
|
342 |
"""Creates a Word document report of downloaded and failed DOIs."""
|
|
|
380 |
if bib_file:
|
381 |
# Check file type
|
382 |
if not bib_file.name.lower().endswith('.bib'):
|
383 |
+
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None, None
|
384 |
|
385 |
+
zip_path, downloaded_dois, failed_dois, _ , _= downloader.process_bibtex(bib_file)
|
386 |
+
return zip_path, downloaded_dois, failed_dois, None, None
|
387 |
elif doi_input:
|
388 |
+
filepath, message, failed_doi, _ = downloader.download_single_doi(doi_input)
|
389 |
+
return None, message, failed_doi, filepath, None
|
390 |
elif dois_input:
|
391 |
+
zip_path, downloaded_dois, failed_dois, _ , _= downloader.download_multiple_dois(dois_input)
|
392 |
+
return zip_path, downloaded_dois, failed_dois, None, None
|
393 |
else:
|
394 |
+
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None, None
|
395 |
|
396 |
def create_report(downloaded_dois_html, failed_dois_html):
|
397 |
"""Creates a report and returns the report path."""
|