Browse files
@@ -1,826 +1,729 @@
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
import gradio as gr
10 |
import io
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
self.x3_levels = x3_levels
35 |
36 |
def get_levels(self, variable_name):
37 |
38 |
Obtiene los niveles para una variable específica.
39 |
40 |
if variable_name == self.x1_name:
41 |
return self.x1_levels
42 |
elif variable_name == self.x2_name:
43 |
return self.x2_levels
44 |
elif variable_name == self.x3_name:
45 |
return self.x3_levels
46 |
47 |
raise ValueError(f"Variable desconocida: {variable_name}")
48 |
49 |
def fit_model(self):
50 |
51 |
Ajusta el modelo de segundo orden completo a los datos.
52 |
53 |
formula = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + {self.x3_name} + ' \
54 |
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2) + ' \
55 |
f'{self.x1_name}:{self.x2_name} + {self.x1_name}:{self.x3_name} + {self.x2_name}:{self.x3_name}'
56 |
self.model = smf.ols(formula,
57 |
print("Modelo Completo:")
58 |
59 |
return self.model, self.pareto_chart(self.model, "Pareto - Modelo Completo")
60 |
61 |
def fit_simplified_model(self):
62 |
63 |
Ajusta el modelo de segundo orden a los datos, eliminando términos no significativos.
64 |
65 |
formula = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + ' \
66 |
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2)'
67 |
self.model_simplified = smf.ols(formula,
68 |
print("\nModelo Simplificado:")
69 |
70 |
return self.model_simplified, self.pareto_chart(self.model_simplified, "Pareto - Modelo Simplificado")
71 |
72 |
def optimize(self, method='Nelder-Mead'):
73 |
74 |
Encuentra los niveles óptimos de los factores para maximizar la respuesta usando el modelo simplificado.
75 |
76 |
if self.model_simplified is None:
77 |
print("Error: Ajusta el modelo simplificado primero.")
78 |
79 |
80 |
def objective_function(x):
81 |
return -self.model_simplified.predict(pd.DataFrame({
82 |
self.x1_name: [x[0]],
83 |
self.x2_name: [x[1]],
84 |
self.x3_name: [x[2]]
85 |
86 |
87 |
bounds = [(-1, 1), (-1, 1), (-1, 1)]
88 |
x0 = [0, 0, 0]
89 |
90 |
self.optimized_results = minimize(objective_function, x0, method=method, bounds=bounds)
91 |
self.optimal_levels = self.optimized_results.x
92 |
93 |
# Convertir niveles óptimos de codificados a naturales
94 |
optimal_levels_natural = [
95 |
self.coded_to_natural(self.optimal_levels[0], self.x1_name),
96 |
self.coded_to_natural(self.optimal_levels[1], self.x2_name),
97 |
self.coded_to_natural(self.optimal_levels[2], self.x3_name)
98 |
99 |
# Crear la tabla de optimización
100 |
optimization_table = pd.DataFrame({
101 |
'Variable': [self.x1_name, self.x2_name, self.x3_name],
102 |
'Nivel Óptimo (Natural)': optimal_levels_natural,
103 |
'Nivel Óptimo (Codificado)': self.optimal_levels
104 |
105 |
106 |
return optimization_table.round(3) # Redondear a 3 decimales
107 |
108 |
def plot_rsm_individual(self, fixed_variable, fixed_level):
109 |
110 |
Genera un gráfico de superficie de respuesta (RSM) individual para una configuración específica.
111 |
112 |
if self.model_simplified is None:
113 |
print("Error: Ajusta el modelo simplificado primero.")
114 |
return None
115 |
116 |
# Determinar las variables que varían y sus niveles naturales
117 |
varying_variables = [var for var in [self.x1_name, self.x2_name, self.x3_name] if var != fixed_variable]
118 |
119 |
# Establecer los niveles naturales para las variables que varían
120 |
x_natural_levels = self.get_levels(varying_variables[0])
121 |
y_natural_levels = self.get_levels(varying_variables[1])
122 |
123 |
# Crear una malla de puntos para las variables que varían (en unidades naturales)
124 |
x_range_natural = np.linspace(x_natural_levels[0], x_natural_levels[-1], 100)
125 |
y_range_natural = np.linspace(y_natural_levels[0], y_natural_levels[-1], 100)
126 |
x_grid_natural, y_grid_natural = np.meshgrid(x_range_natural, y_range_natural)
127 |
128 |
# Convertir la malla de variables naturales a codificadas
129 |
x_grid_coded = self.natural_to_coded(x_grid_natural, varying_variables[0])
130 |
y_grid_coded = self.natural_to_coded(y_grid_natural, varying_variables[1])
131 |
132 |
# Crear un DataFrame para la predicción con variables codificadas
133 |
prediction_data = pd.DataFrame({
134 |
varying_variables[0]: x_grid_coded.flatten(),
135 |
varying_variables[1]: y_grid_coded.flatten(),
136 |
137 |
prediction_data[fixed_variable] = self.natural_to_coded(fixed_level, fixed_variable)
138 |
139 |
# Calcular los valores predichos
140 |
z_pred = self.model_simplified.predict(prediction_data).values.reshape(x_grid_coded.shape)
141 |
142 |
# Filtrar por el nivel de la variable fija (en codificado)
143 |
fixed_level_coded = self.natural_to_coded(fixed_level, fixed_variable)
144 |
subset_data =[np.isclose([fixed_variable], fixed_level_coded)]
145 |
146 |
# Filtrar por niveles válidos en las variables que varían
147 |
valid_levels = [-1, 0, 1]
148 |
experiments_data = subset_data[
149 |
subset_data[varying_variables[0]].isin(valid_levels) &
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
fig = go.Figure(data=[go.Surface(z=z_pred, x=x_grid_natural, y=y_grid_natural, colorscale='Viridis', opacity=0.7, showscale=True)])
159 |
160 |
# --- Añadir cuadrícula a la superficie ---
161 |
# Líneas en la dirección x
162 |
for i in range(x_grid_natural.shape[0]):
163 |
164 |
x=x_grid_natural[i, :],
165 |
y=y_grid_natural[i, :],
166 |
z=z_pred[i, :],
167 |
168 |
line=dict(color='gray', width=2),
169 |
170 |
171 |
172 |
# Líneas en la dirección y
173 |
for j in range(x_grid_natural.shape[1]):
174 |
175 |
x=x_grid_natural[:, j],
176 |
y=y_grid_natural[:, j],
177 |
z=z_pred[:, j],
178 |
179 |
line=dict(color='gray', width=2),
180 |
181 |
182 |
183 |
184 |
# --- Fin de la adición de la cuadrícula ---
185 |
186 |
# Añadir los puntos de los experimentos en la superficie de respuesta con diferentes colores y etiquetas
187 |
colors = px.colors.qualitative.Safe
188 |
point_labels = [f"{row[self.y_name]:.3f}" for _, row in experiments_data.iterrows()]
189 |
190 |
191 |
192 |
193 |
194 |
195 |
marker=dict(size=4, color=colors[:len(experiments_x_natural)]),
196 |
197 |
textposition='top center',
198 |
199 |
200 |
201 |
# Añadir etiquetas y título con variables naturales
202 |
203 |
204 |
xaxis_title=f"{varying_variables[0]} ({self.get_units(varying_variables[0])})",
205 |
yaxis_title=f"{varying_variables[1]} ({self.get_units(varying_variables[1])})",
206 |
207 |
208 |
title=f"{self.y_name} vs {varying_variables[0]} y {varying_variables[1]}<br><sup>{fixed_variable} fijo en {fixed_level:.3f} ({self.get_units(fixed_variable)}) (Modelo Simplificado)</sup>",
209 |
210 |
211 |
212 |
213 |
return fig
214 |
215 |
def get_units(self, variable_name):
216 |
217 |
Define las unidades de las variables para etiquetas.
218 |
Puedes personalizar este método según tus necesidades.
219 |
220 |
units = {
221 |
'Glucosa': 'g/L',
222 |
'Extracto_de_Levadura': 'g/L',
223 |
'Triptofano': 'g/L',
224 |
'AIA_ppm': 'ppm'
225 |
226 |
return units.get(variable_name, '')
227 |
228 |
def generate_all_plots(self):
229 |
230 |
Genera todas las gráficas de RSM, variando la variable fija y sus niveles usando el modelo simplificado.
231 |
Almacena las figuras en self.all_figures.
232 |
233 |
if self.model_simplified is None:
234 |
print("Error: Ajusta el modelo simplificado primero.")
235 |
236 |
237 |
self.all_figures = [] # Resetear la lista de figuras
238 |
239 |
# Niveles naturales para graficar
240 |
levels_to_plot_natural = {
241 |
self.x1_name: self.x1_levels,
242 |
self.x2_name: self.x2_levels,
243 |
self.x3_name: self.x3_levels
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
sorted_tvalues = abs_tvalues[sorted_idx]
273 |
sorted_names = tvalues.index[sorted_idx]
274 |
275 |
# Calcular el valor crítico de t para la línea de significancia
276 |
alpha = 0.05 # Nivel de significancia
277 |
dof = model.df_resid # Grados de libertad residuales
278 |
t_critical = t.ppf(1 - alpha / 2, dof)
279 |
280 |
# Crear el diagrama de Pareto
281 |
fig =
282 |
283 |
284 |
285 |
labels={'x': 'Efecto Estandarizado', 'y': 'Término'},
286 |
287 |
288 |
289 |
290 |
# Agregar la línea de significancia
291 |
fig.add_vline(x=t_critical, line_dash="dot",
292 |
annotation_text=f"t crítico = {t_critical:.3f}",
293 |
annotation_position="bottom right")
294 |
295 |
return fig
296 |
297 |
def get_simplified_equation(self):
298 |
299 |
Imprime la ecuación del modelo simplificado.
300 |
301 |
if self.model_simplified is None:
302 |
print("Error: Ajusta el modelo simplificado primero.")
303 |
return None
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
return None
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
if self.model_simplified is None:
343 |
print("Error: Ajusta el modelo simplificado primero.")
344 |
return None
345 |
346 |
347 |
348 |
349 |
350 |
ss_total = anova_table['sum_sq'].sum()
351 |
352 |
# Crear tabla de contribución
353 |
contribution_table = pd.DataFrame({
354 |
'Factor': [],
355 |
'Suma de Cuadrados': [],
356 |
'% Contribución': []
357 |
358 |
359 |
# Calcular porcentaje de contribución para cada factor
360 |
for index, row in anova_table.iterrows():
361 |
if index != 'Residual':
362 |
factor_name = index
363 |
if factor_name == f'I({self.x1_name} ** 2)':
364 |
factor_name = f'{self.x1_name}^2'
365 |
elif factor_name == f'I({self.x2_name} ** 2)':
366 |
factor_name = f'{self.x2_name}^2'
367 |
elif factor_name == f'I({self.x3_name} ** 2)':
368 |
factor_name = f'{self.x3_name}^2'
369 |
370 |
ss_factor = row['sum_sq']
371 |
contribution_percentage = (ss_factor / ss_total) * 100
372 |
373 |
contribution_table = pd.concat([contribution_table, pd.DataFrame({
374 |
'Factor': [factor_name],
375 |
'Suma de Cuadrados': [ss_factor],
376 |
'% Contribución': [contribution_percentage]
377 |
})], ignore_index=True)
378 |
379 |
return contribution_table.round(3)
380 |
381 |
def calculate_detailed_anova(self):
382 |
383 |
Calcula la tabla ANOVA detallada con la descomposición del error residual.
384 |
385 |
if self.model_simplified is None:
386 |
print("Error: Ajusta el modelo simplificado primero.")
387 |
return None
388 |
389 |
390 |
391 |
392 |
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2)'
393 |
model_reduced = smf.ols(formula_reduced,
394 |
395 |
396 |
anova_reduced = sm.stats.anova_lm(model_reduced, typ=2)
397 |
398 |
399 |
ss_total = np.sum(([self.y_name] -[self.y_name].mean())**2)
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
# 10. Cuadrados medios
428 |
ms_regression = ss_regression / df_regression
429 |
ms_residual = ss_residual / df_residual
430 |
ms_lack_of_fit = ss_lack_of_fit / df_lack_of_fit if not np.isnan(ss_lack_of_fit) else np.nan
431 |
ms_pure_error = ss_pure_error / df_pure_error if not np.isnan(ss_pure_error) else np.nan
432 |
433 |
# 11. Estadístico F y valor p para la falta de ajuste
434 |
f_lack_of_fit = ms_lack_of_fit / ms_pure_error if not np.isnan(ms_lack_of_fit) else np.nan
435 |
p_lack_of_fit = 1 - f.cdf(f_lack_of_fit, df_lack_of_fit, df_pure_error) if not np.isnan(f_lack_of_fit) else np.nan
436 |
437 |
# 12. Crear la tabla ANOVA detallada
438 |
detailed_anova_table = pd.DataFrame({
439 |
'Fuente de Variación': ['Regresión', 'Residual', 'Falta de Ajuste', 'Error Puro', 'Total'],
440 |
'Suma de Cuadrados': [ss_regression, ss_residual, ss_lack_of_fit, ss_pure_error, ss_total],
441 |
'Grados de Libertad': [df_regression, df_residual, df_lack_of_fit, df_pure_error, df_total],
442 |
'Cuadrado Medio': [ms_regression, ms_residual, ms_lack_of_fit, ms_pure_error, np.nan],
443 |
'F': [np.nan, np.nan, f_lack_of_fit, np.nan, np.nan],
444 |
'Valor p': [np.nan, np.nan, p_lack_of_fit, np.nan, np.nan]
445 |
446 |
447 |
# Calcular la suma de cuadrados y grados de libertad para la curvatura
448 |
ss_curvature = anova_reduced['sum_sq'][f'I({self.x1_name} ** 2)'] + anova_reduced['sum_sq'][f'I({self.x2_name} ** 2)'] + anova_reduced['sum_sq'][f'I({self.x3_name} ** 2)']
449 |
df_curvature = 3
450 |
451 |
# Añadir la fila de curvatura a la tabla ANOVA
452 |
detailed_anova_table.loc[len(detailed_anova_table)] = ['Curvatura', ss_curvature, df_curvature, ss_curvature / df_curvature, np.nan, np.nan]
453 |
454 |
# Reorganizar las filas para que la curvatura aparezca después de la regresión
455 |
detailed_anova_table = detailed_anova_table.reindex([0, 5, 1, 2, 3, 4])
456 |
457 |
# Resetear el índice para que sea consecutivo
458 |
detailed_anova_table = detailed_anova_table.reset_index(drop=True)
459 |
460 |
return detailed_anova_table.round(3)
461 |
462 |
def get_all_tables(self):
463 |
464 |
Obtiene todas las tablas generadas para ser exportadas a Excel.
465 |
466 |
prediction_table = self.generate_prediction_table()
467 |
contribution_table = self.calculate_contribution_percentage()
468 |
detailed_anova_table = self.calculate_detailed_anova()
469 |
470 |
return {
471 |
'Predicciones': prediction_table,
472 |
'% Contribución': contribution_table,
473 |
'ANOVA Detallada': detailed_anova_table
474 |
475 |
476 |
477 |
478 |
479 |
480 |
if not
481 |
return None
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
return None
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file:
533 |
534 |
temp_path =
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
# Convertir los niveles a listas de números
546 |
x1_levels = [float(x.strip()) for x in x1_levels_str.split(',')]
547 |
x2_levels = [float(x.strip()) for x in x2_levels_str.split(',')]
548 |
x3_levels = [float(x.strip()) for x in x3_levels_str.split(',')]
549 |
550 |
# Crear DataFrame a partir de la cadena de datos
551 |
data_list = [row.split(',') for row in data_str.strip().split('\n')]
552 |
column_names = ['Exp.', x1_name, x2_name, x3_name, y_name]
553 |
data = pd.DataFrame(data_list, columns=column_names)
554 |
data = data.apply(pd.to_numeric, errors='coerce') # Convertir a numérico
555 |
556 |
# Validar que el DataFrame tenga las columnas correctas
557 |
if not all(col in data.columns for col in column_names):
558 |
raise ValueError("El formato de los datos no es correcto.")
559 |
560 |
# Crear la instancia de RSM_BoxBehnken
561 |
global rsm
562 |
rsm = RSM_BoxBehnken(data, x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels)
563 |
564 |
return data.round(3), x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels, gr.update(visible=True)
565 |
566 |
except Exception as e:
567 |
# Mostrar mensaje de error
568 |
error_message = f"Error al cargar los datos: {str(e)}"
569 |
570 |
return None, "", "", "", "", [], [], [], gr.update(visible=False)
571 |
572 |
def fit_and_optimize_model():
573 |
if 'rsm' not in globals():
574 |
return [None]*10
575 |
576 |
# Ajustar modelos y optimizar
577 |
model_completo, pareto_completo = rsm.fit_model()
578 |
model_simplificado, pareto_simplificado = rsm.fit_simplified_model()
579 |
optimization_table = rsm.optimize()
580 |
equation = rsm.get_simplified_equation()
581 |
prediction_table = rsm.generate_prediction_table()
582 |
contribution_table = rsm.calculate_contribution_percentage()
583 |
anova_table = rsm.calculate_detailed_anova()
584 |
585 |
# Generar todas las figuras y almacenarlas
586 |
587 |
588 |
# Formatear la ecuación para que se vea mejor en Markdown
589 |
equation_formatted = equation.replace(" + ", "<br>+ ").replace(" ** ", "^").replace("*", " × ")
590 |
equation_formatted = f"### Ecuación del Modelo Simplificado:<br>{equation_formatted}"
591 |
592 |
# Guardar las tablas en Excel temporal
593 |
excel_path = rsm.save_tables_to_excel()
594 |
595 |
# Guardar todas las figuras en un ZIP temporal
596 |
zip_path = rsm.save_figures_to_zip()
597 |
598 |
return (
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
zip_path, # Ruta del ZIP de gráficos
609 |
excel_path # Ruta del Excel de tablas
610 |
611 |
612 |
613 |
if not all_figures:
614 |
return None, "No hay gráficos disponibles.", current_index
615 |
selected_fig = all_figures[current_index]
616 |
plot_info_text = f"Gráfico {current_index + 1} de {len(all_figures)}"
617 |
return selected_fig, plot_info_text, current_index
618 |
619 |
620 |
621 |
Navega entre los gráficos.
622 |
623 |
if not all_figures:
624 |
return None, "No hay gráficos disponibles.", current_index
625 |
626 |
if direction == 'left':
627 |
new_index = (current_index - 1) % len(all_figures)
628 |
elif direction == 'right':
629 |
new_index = (current_index + 1) % len(all_figures)
630 |
631 |
new_index = current_index
632 |
633 |
selected_fig = all_figures[new_index]
634 |
plot_info_text = f"Gráfico {new_index + 1} de {len(all_figures)}"
635 |
636 |
return selected_fig, plot_info_text, new_index
637 |
638 |
639 |
640 |
641 |
642 |
643 |
return None
644 |
fig = all_figures[current_index]
645 |
img_bytes = rsm.save_fig_to_bytes(fig)
646 |
filename = f"Grafico_RSM_{current_index + 1}.png"
647 |
648 |
# Crear un archivo temporal
649 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
650 |
651 |
temp_path =
652 |
653 |
return temp_path # Retornar solo la ruta
654 |
655 |
656 |
657 |
658 |
659 |
660 |
return None
661 |
zip_path = rsm.save_figures_to_zip()
662 |
filename = f"Graficos_RSM_{'%Y%m%d_%H%M%S')}.zip"
663 |
return zip_path # Retornar solo la ruta
664 |
665 |
666 |
667 |
668 |
669 |
670 |
return None
671 |
excel_path = rsm.save_tables_to_excel()
672 |
filename = f"Tablas_RSM_{'%Y%m%d_%H%M%S')}.xlsx"
673 |
return excel_path # Retornar solo la ruta
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
# Descargar todas las tablas en Excel
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
1 |
import os
2 |
import re
3 |
import time
4 |
import logging
5 |
import zipfile
6 |
import requests
7 |
import bibtexparser
8 |
from tqdm import tqdm
9 |
from urllib.parse import quote, urlencode
10 |
import gradio as gr
11 |
from bs4 import BeautifulSoup
12 |
import io
13 |
import asyncio
14 |
import aiohttp
15 |
16 |
# Configure logging
17 |
18 |
format='%(asctime)s - %(levelname)s: %(message)s')
19 |
logger = logging.getLogger(__name__)
20 |
21 |
22 |
class PaperDownloader:
23 |
def __init__(self, output_dir='papers'):
24 |
self.output_dir = output_dir
25 |
os.makedirs(output_dir, exist_ok=True)
26 |
27 |
# Updated download sources
28 |
self.download_sources = [
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
# Request headers
39 |
self.headers = {
40 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ Safari/537.36',
41 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
42 |
'Accept-Language': 'en-US,en;q=0.9',
43 |
44 |
45 |
def clean_doi(self, doi):
46 |
"""Clean and encode DOI for URL"""
47 |
if not isinstance(doi, str):
48 |
return None
49 |
return quote(doi.strip()) if doi else None
50 |
51 |
async def fetch_with_headers(self, session, url, timeout=10):
52 |
"""Utility method to fetch an URL with headers and timeout"""
53 |
54 |
async with session.get(url, headers=self.headers, timeout=timeout, allow_redirects=True) as response:
55 |
56 |
return await response.text(), response.headers
57 |
except Exception as e:
58 |
logger.debug(f"Error fetching {url}: {e}")
59 |
return None, None
60 |
61 |
62 |
async def download_paper_direct_doi_async(self, session, doi):
63 |
"""Attempt to download the pdf from the landing page of the doi"""
64 |
if not doi:
65 |
return None
66 |
67 |
68 |
doi_url = f"{self.clean_doi(doi)}"
69 |
text, headers = await self.fetch_with_headers(session, doi_url, timeout=15)
70 |
if not text:
71 |
return None
72 |
73 |
pdf_patterns = [
74 |
75 |
76 |
77 |
78 |
79 |
pdf_urls = []
80 |
for pattern in pdf_patterns:
81 |
pdf_urls.extend(re.findall(pattern, text))
82 |
83 |
for pdf_url in pdf_urls:
84 |
85 |
pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
86 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
87 |
logger.debug(f"Found PDF from: {pdf_url}")
88 |
return await
89 |
except Exception as e:
90 |
logger.debug(f"Error downloading PDF from {pdf_url}: {e}")
91 |
92 |
93 |
except Exception as e:
94 |
logger.debug(f"Error trying to get the PDF from {doi}: {e}")
95 |
96 |
return None
97 |
98 |
async def download_paper_scihub_async(self, session, doi):
99 |
"""Improved method to download paper from Sci-Hub using async requests"""
100 |
if not doi:
101 |
logger.warning("DOI not provided")
102 |
return None
103 |
104 |
for base_url in self.download_sources:
105 |
106 |
scihub_url = f"{base_url}{self.clean_doi(doi)}"
107 |
text, headers = await self.fetch_with_headers(session, scihub_url, timeout=15)
108 |
if not text:
109 |
110 |
111 |
# Search for multiple PDF URL patterns
112 |
pdf_patterns = [
113 |
114 |
115 |
116 |
117 |
118 |
pdf_urls = []
119 |
for pattern in pdf_patterns:
120 |
pdf_urls.extend(re.findall(pattern, text))
121 |
122 |
# Try downloading from found URLs
123 |
for pdf_url in pdf_urls:
124 |
125 |
pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
126 |
# Verify if it's a PDF
127 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
128 |
logger.debug(f"Found PDF from: {pdf_url}")
129 |
return await
130 |
except Exception as e:
131 |
logger.debug(f"Error downloading PDF from {pdf_url}: {e}")
132 |
133 |
except Exception as e:
134 |
logger.debug(f"Error trying to download {doi} from {base_url}: {e}")
135 |
136 |
return None
137 |
138 |
async def download_paper_libgen_async(self, session, doi):
139 |
"""Download from Libgen, handles the query and the redirection"""
140 |
if not doi:
141 |
return None
142 |
143 |
base_url = ''
144 |
145 |
search_url = f"{base_url}?q={self.clean_doi(doi)}"
146 |
text, headers = await self.fetch_with_headers(session, search_url, timeout=10)
147 |
148 |
if not text or "No results" in text:
149 |
logger.debug(f"No results for DOI: {doi} on libgen")
150 |
return None
151 |
152 |
soup = BeautifulSoup(text, 'html.parser')
153 |
154 |
links ='table.c > tbody > tr:nth-child(2) > td:nth-child(1) > a')
155 |
156 |
if links:
157 |
link = links[0]
158 |
pdf_url = link['href']
159 |
pdf_response = await session.get(pdf_url, headers=self.headers, allow_redirects=True, timeout=10)
160 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
161 |
logger.debug(f"Found PDF from: {pdf_url}")
162 |
return await
163 |
except Exception as e:
164 |
logger.debug(f"Error trying to download {doi} from libgen: {e}")
165 |
return None
166 |
167 |
async def download_paper_google_scholar_async(self, session, doi):
168 |
"""Search google scholar to find an article with the given doi, try to get the pdf"""
169 |
if not doi:
170 |
return None
171 |
172 |
173 |
query = f'doi:"{doi}"'
174 |
params = {'q': query}
175 |
url = f'{urlencode(params)}'
176 |
177 |
text, headers = await self.fetch_with_headers(session, url, timeout=10)
178 |
if not text:
179 |
return None
180 |
181 |
soup = BeautifulSoup(text, 'html.parser')
182 |
183 |
# Find any links with [PDF]
184 |
links = soup.find_all('a', string=re.compile(r'\[PDF\]', re.IGNORECASE))
185 |
186 |
if links:
187 |
pdf_url = links[0]['href']
188 |
pdf_response = await session.get(pdf_url, headers=self.headers, timeout=10)
189 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
190 |
logger.debug(f"Found PDF from: {pdf_url}")
191 |
return await
192 |
except Exception as e:
193 |
logger.debug(f"Google Scholar error for {doi}: {e}")
194 |
195 |
return None
196 |
197 |
async def download_paper_crossref_async(self, session, doi):
198 |
"""Alternative search method using Crossref"""
199 |
if not doi:
200 |
return None
201 |
202 |
203 |
# Search for open access link
204 |
url = f"{doi}"
205 |
response = await session.get(url, headers=self.headers, timeout=10)
206 |
207 |
if response.status == 200:
208 |
data = await response.json()
209 |
work = data.get('message', {})
210 |
211 |
# Search for open access links
212 |
links = work.get('link', [])
213 |
for link in links:
214 |
if link.get('content-type') == 'application/pdf':
215 |
pdf_url = link.get('URL')
216 |
if pdf_url:
217 |
pdf_response = await session.get(pdf_url, headers=self.headers)
218 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
219 |
logger.debug(f"Found PDF from: {pdf_url}")
220 |
return await
221 |
222 |
except Exception as e:
223 |
logger.debug(f"Crossref error for {doi}: {e}")
224 |
225 |
return None
226 |
227 |
async def download_with_retry_async(self, doi, max_retries=3, initial_delay=2):
228 |
"""Downloads a paper using multiple strategies with exponential backoff and async requests"""
229 |
pdf_content = None
230 |
retries = 0
231 |
delay = initial_delay
232 |
233 |
async with aiohttp.ClientSession() as session:
234 |
while retries < max_retries and not pdf_content:
235 |
236 |
pdf_content = (
237 |
await self.download_paper_direct_doi_async(session, doi) or
238 |
await self.download_paper_scihub_async(session, doi) or
239 |
await self.download_paper_libgen_async(session, doi) or
240 |
await self.download_paper_google_scholar_async(session, doi) or
241 |
await self.download_paper_crossref_async(session, doi)
242 |
243 |
244 |
if pdf_content:
245 |
return pdf_content
246 |
except Exception as e:
247 |
logger.error(f"Error in download attempt {retries + 1} for DOI {doi}: {e}")
248 |
249 |
if not pdf_content:
250 |
retries += 1
251 |
logger.warning(f"Retry attempt {retries} for DOI: {doi} after {delay} seconds")
252 |
await asyncio.sleep(delay)
253 |
delay *= 2 # Exponential backoff
254 |
255 |
return None
256 |
257 |
def download_paper_scihub(self, doi):
258 |
"""Improved method to download paper from Sci-Hub"""
259 |
if not doi:
260 |
logger.warning("DOI not provided")
261 |
return None
262 |
263 |
for base_url in self.download_sources:
264 |
265 |
scihub_url = f"{base_url}{self.clean_doi(doi)}"
266 |
267 |
# Request with more tolerance
268 |
response = requests.get(scihub_url,
269 |
270 |
271 |
272 |
273 |
# Search for multiple PDF URL patterns
274 |
pdf_patterns = [
275 |
276 |
277 |
278 |
279 |
280 |
pdf_urls = []
281 |
for pattern in pdf_patterns:
282 |
pdf_urls.extend(re.findall(pattern, response.text))
283 |
284 |
# Try downloading from found URLs
285 |
for pdf_url in pdf_urls:
286 |
287 |
pdf_response = requests.get(pdf_url,
288 |
289 |
290 |
291 |
# Verify if it's a PDF
292 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
293 |
logger.debug(f"Found PDF from: {pdf_url}")
294 |
return pdf_response.content
295 |
except Exception as e:
296 |
logger.debug(f"Error downloading PDF from {pdf_url}: {e}")
297 |
298 |
except Exception as e:
299 |
logger.debug(f"Error trying to download {doi} from {base_url}: {e}")
300 |
301 |
return None
302 |
303 |
def download_paper_libgen(self, doi):
304 |
"""Download from Libgen, handles the query and the redirection"""
305 |
if not doi:
306 |
return None
307 |
308 |
base_url = ''
309 |
310 |
search_url = f"{base_url}?q={self.clean_doi(doi)}"
311 |
response = requests.get(search_url, headers=self.headers, allow_redirects=True, timeout=10)
312 |
313 |
314 |
if "No results" in response.text:
315 |
logger.debug(f"No results for DOI: {doi} on libgen")
316 |
return None
317 |
318 |
soup = BeautifulSoup(response.text, 'html.parser')
319 |
320 |
# Find the link using a specific selector
321 |
links ='table.c > tbody > tr:nth-child(2) > td:nth-child(1) > a')
322 |
323 |
if links:
324 |
link = links[0]
325 |
pdf_url = link['href']
326 |
pdf_response = requests.get(pdf_url, headers=self.headers, allow_redirects=True, timeout=10)
327 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
328 |
logger.debug(f"Found PDF from: {pdf_url}")
329 |
return pdf_response.content
330 |
331 |
except Exception as e:
332 |
logger.debug(f"Error trying to download {doi} from libgen: {e}")
333 |
return None
334 |
335 |
def download_paper_google_scholar(self, doi):
336 |
"""Search google scholar to find an article with the given doi, try to get the pdf"""
337 |
if not doi:
338 |
return None
339 |
340 |
341 |
query = f'doi:"{doi}"'
342 |
params = {'q': query}
343 |
url = f'{urlencode(params)}'
344 |
345 |
response = requests.get(url, headers=self.headers, timeout=10)
346 |
347 |
348 |
soup = BeautifulSoup(response.text, 'html.parser')
349 |
350 |
# Find any links with [PDF]
351 |
links = soup.find_all('a', string=re.compile(r'\[PDF\]', re.IGNORECASE))
352 |
353 |
if links:
354 |
pdf_url = links[0]['href']
355 |
pdf_response = requests.get(pdf_url, headers=self.headers, timeout=10)
356 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
357 |
logger.debug(f"Found PDF from: {pdf_url}")
358 |
return pdf_response.content
359 |
except Exception as e:
360 |
logger.debug(f"Google Scholar error for {doi}: {e}")
361 |
362 |
return None
363 |
364 |
def download_paper_crossref(self, doi):
365 |
"""Alternative search method using Crossref"""
366 |
if not doi:
367 |
return None
368 |
369 |
370 |
# Search for open access link
371 |
url = f"{doi}"
372 |
response = requests.get(url, headers=self.headers, timeout=10)
373 |
374 |
if response.status_code == 200:
375 |
data = response.json()
376 |
work = data.get('message', {})
377 |
378 |
# Search for open access links
379 |
links = work.get('link', [])
380 |
for link in links:
381 |
if link.get('content-type') == 'application/pdf':
382 |
pdf_url = link.get('URL')
383 |
if pdf_url:
384 |
pdf_response = requests.get(pdf_url, headers=self.headers)
385 |
if 'application/pdf' in pdf_response.headers.get('Content-Type', ''):
386 |
logger.debug(f"Found PDF from: {pdf_url}")
387 |
return pdf_response.content
388 |
389 |
except Exception as e:
390 |
logger.debug(f"Crossref error for {doi}: {e}")
391 |
392 |
return None
393 |
394 |
def download_with_retry(self, doi, max_retries=3, initial_delay=2):
395 |
"""Downloads a paper using multiple strategies with exponential backoff"""
396 |
pdf_content = None
397 |
retries = 0
398 |
delay = initial_delay
399 |
400 |
while retries < max_retries and not pdf_content:
401 |
402 |
pdf_content = (
403 |
self.download_paper_scihub(doi) or
404 |
self.download_paper_libgen(doi) or
405 |
self.download_paper_google_scholar(doi) or
406 |
407 |
408 |
409 |
410 |
if pdf_content:
411 |
return pdf_content
412 |
except Exception as e:
413 |
logger.error(f"Error in download attempt {retries + 1} for DOI {doi}: {e}")
414 |
415 |
if not pdf_content:
416 |
retries += 1
417 |
logger.warning(f"Retry attempt {retries} for DOI: {doi} after {delay} seconds")
418 |
419 |
delay *= 2 # Exponential backoff
420 |
421 |
return None
422 |
423 |
def download_single_doi(self, doi):
424 |
"""Downloads a single paper using a DOI"""
425 |
if not doi:
426 |
return None, "Error: DOI not provided", "Error: DOI not provided"
427 |
428 |
429 |
pdf_content = self.download_with_retry(doi)
430 |
431 |
if pdf_content:
432 |
if doi is None:
433 |
return None, "Error: DOI not provided", "Error: DOI not provided"
434 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
435 |
filepath = os.path.join(self.output_dir, filename)
436 |
with open(filepath, 'wb') as f:
437 |
438 |
+"Successfully downloaded: {filename}")
439 |
return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
440 |
441 |
logger.warning(f"Could not download: {doi}")
442 |
return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
443 |
444 |
except Exception as e:
445 |
logger.error(f"Error processing {doi}: {e}")
446 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
447 |
448 |
def download_multiple_dois(self, dois_text):
449 |
"""Downloads multiple papers from a list of DOIs"""
450 |
if not dois_text:
451 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
452 |
453 |
dois = [doi.strip() for doi in dois_text.split('\n') if doi.strip()]
454 |
if not dois:
455 |
return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
456 |
457 |
downloaded_files = []
458 |
failed_dois = []
459 |
downloaded_links = []
460 |
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
461 |
filepath, success_message, fail_message = self.download_single_doi(doi)
462 |
if filepath:
463 |
# Unique filename for zip
464 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
465 |
filepath_unique = os.path.join(self.output_dir, filename)
466 |
os.rename(filepath, filepath_unique)
467 |
468 |
downloaded_links.append(f'<div style="display: flex; align-items: center;">✓ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
469 |
470 |
471 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
472 |
473 |
if downloaded_files:
474 |
zip_filename = ''
475 |
with zipfile.ZipFile(zip_filename, 'w') as zipf:
476 |
for file_path in downloaded_files:
477 |
zipf.write(file_path, arcname=os.path.basename(file_path))
478 |
+"ZIP file created: {zip_filename}")
479 |
480 |
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
481 |
482 |
def process_bibtex(self, bib_file):
483 |
"""Process BibTeX file and download papers with multiple strategies"""
484 |
# Read BibTeX file content from the uploaded object
485 |
486 |
with open(, 'r', encoding='utf-8') as f:
487 |
bib_content =
488 |
except Exception as e:
489 |
logger.error(f"Error reading uploaded file {}: {e}")
490 |
return None, f"Error reading uploaded file {}: {e}", f"Error reading uploaded file {}: {e}", None
491 |
492 |
# Parse BibTeX data
493 |
494 |
bib_database = bibtexparser.loads(bib_content)
495 |
except Exception as e:
496 |
logger.error(f"Error parsing BibTeX data: {e}")
497 |
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None
498 |
499 |
# Extract DOIs
500 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
501 |
+"Found {len(dois)} DOIs to download")
502 |
503 |
# Result lists
504 |
downloaded_files = []
505 |
failed_dois = []
506 |
downloaded_links = []
507 |
508 |
# Download PDFs
509 |
for doi in tqdm(dois, desc="Downloading papers"):
510 |
511 |
# Try to download with multiple methods with retries
512 |
pdf_content = self.download_with_retry(doi)
513 |
514 |
# Save PDF
515 |
if pdf_content:
516 |
if doi is None:
517 |
return None, "Error: DOI not provided", "Error: DOI not provided", None
518 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
519 |
filepath = os.path.join(self.output_dir, filename)
520 |
521 |
with open(filepath, 'wb') as f:
522 |
523 |
524 |
525 |
downloaded_links.append(f'<div style="display: flex; align-items: center;">✓ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
526 |
+"Successfully downloaded: {filename}")
527 |
528 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
529 |
530 |
except Exception as e:
531 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
532 |
logger.error(f"Error processing {doi}: {e}")
533 |
534 |
# Create ZIP of downloaded papers
535 |
if downloaded_files:
536 |
zip_filename = ''
537 |
with zipfile.ZipFile(zip_filename, 'w') as zipf:
538 |
for file_path in downloaded_files:
539 |
zipf.write(file_path, arcname=os.path.basename(file_path))
540 |
+"ZIP file created: {zip_filename}")
541 |
542 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
543 |
544 |
async def process_bibtex_async(self, bib_file):
545 |
"""Process BibTeX file and download papers with multiple strategies"""
546 |
# Read BibTeX file content from the uploaded object
547 |
548 |
with open(, 'r', encoding='utf-8') as f:
549 |
bib_content =
550 |
except Exception as e:
551 |
logger.error(f"Error reading uploaded file {}: {e}")
552 |
return None, f"Error reading uploaded file {}: {e}", f"Error reading uploaded file {}: {e}", None
553 |
554 |
# Parse BibTeX data
555 |
556 |
bib_database = bibtexparser.loads(bib_content)
557 |
except Exception as e:
558 |
logger.error(f"Error parsing BibTeX data: {e}")
559 |
return None, f"Error parsing BibTeX data: {e}", f"Error parsing BibTeX data: {e}", None
560 |
561 |
# Extract DOIs
562 |
dois = [entry.get('doi') for entry in bib_database.entries if entry.get('doi')]
563 |
+"Found {len(dois)} DOIs to download")
564 |
565 |
# Result lists
566 |
downloaded_files = []
567 |
failed_dois = []
568 |
downloaded_links = []
569 |
570 |
# Download PDFs
571 |
for doi in tqdm(dois, desc="Downloading papers"):
572 |
573 |
# Try to download with multiple methods with retries
574 |
pdf_content = await self.download_with_retry_async(doi)
575 |
576 |
# Save PDF
577 |
if pdf_content:
578 |
if doi is None:
579 |
return None, "Error: DOI not provided", "Error: DOI not provided", None
580 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}.pdf"
581 |
filepath = os.path.join(self.output_dir, filename)
582 |
583 |
with open(filepath, 'wb') as f:
584 |
585 |
586 |
587 |
downloaded_links.append(f'<div style="display: flex; align-items: center;">✓ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
588 |
+"Successfully downloaded: {filename}")
589 |
590 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
591 |
592 |
except Exception as e:
593 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
594 |
logger.error(f"Error processing {doi}: {e}")
595 |
596 |
# Create ZIP of downloaded papers
597 |
if downloaded_files:
598 |
zip_filename = ''
599 |
with zipfile.ZipFile(zip_filename, 'w') as zipf:
600 |
for file_path in downloaded_files:
601 |
zipf.write(file_path, arcname=os.path.basename(file_path))
602 |
+"ZIP file created: {zip_filename}")
603 |
604 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
605 |
606 |
def create_gradio_interface():
607 |
"""Create Gradio interface for Paper Downloader"""
608 |
downloader = PaperDownloader()
609 |
610 |
async def download_papers(bib_file, doi_input, dois_input):
611 |
if bib_file:
612 |
# Check file type
613 |
if not'.bib'):
614 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
615 |
616 |
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
617 |
return zip_path, downloaded_dois, failed_dois, None
618 |
elif doi_input:
619 |
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
620 |
return None, message, failed_doi, filepath
621 |
elif dois_input:
622 |
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
623 |
return zip_path, downloaded_dois, failed_dois, None
624 |
625 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
626 |
627 |
# Gradio Interface
628 |
interface = gr.Interface(
629 |
630 |
631 |
gr.File(file_types=['.bib'], label="Upload BibTeX File"),
632 |
gr.Textbox(label="Enter Single DOI", placeholder="10.xxxx/xxxx"),
633 |
gr.Textbox(label="Enter Multiple DOIs (one per line)", placeholder="10.xxxx/xxxx\n10.yyyy/yyyy\n...")
634 |
635 |
636 |
gr.File(label="Download Papers (ZIP) or Single PDF"),
637 |
638 |
<div style='padding-bottom: 5px; font-weight: bold;'>
639 |
Found DOIs
640 |
641 |
<div style='border: 1px solid #ddd; padding: 5px; border-radius: 5px;'>
642 |
<div id="downloaded-dois"></div>
643 |
644 |
645 |
646 |
<div style='padding-bottom: 5px; font-weight: bold;'>
647 |
Missed DOIs
648 |
649 |
<div style='border: 1px solid #ddd; padding: 5px; border-radius: 5px;'>
650 |
<div id="failed-dois"></div>
651 |
652 |
653 |
gr.File(label="Downloaded Single PDF")
654 |
655 |
title="🔬 Academic Paper Batch Downloader",
656 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
657 |
658 |
659 |
["example.bib", None, None], # Bibtex File
660 |
[None, "10.1038/nature12373", None], # Single DOI
661 |
[None, None, "10.1109/5.771073\n10.3390/horticulturae8080677"], # Multiple DOIs
662 |
663 |
664 |
.gradio-container {
665 |
background-color: black;
666 |
667 |
.gr-interface {
668 |
max-width: 800px;
669 |
margin: 0 auto;
670 |
671 |
.gr-box {
672 |
background-color: black;
673 |
border-radius: 10px;
674 |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
675 |
676 |
.output-text a {
677 |
color: #007bff; /* Blue color for hyperlinks */
678 |
679 |
680 |
681 |
682 |
683 |
# Add Javascript to update HTML
684 |
interface.load = """
685 |
function(downloaded_dois, failed_dois) {
686 |
let downloaded_html = '';
687 |
downloaded_dois.split('\\n').filter(Boolean).forEach(doi => {
688 |
downloaded_html += doi + '<br>';
689 |
690 |
document.querySelector("#downloaded-dois").innerHTML = downloaded_html;
691 |
let failed_html = '';
692 |
failed_dois.split('\\n').filter(Boolean).forEach(doi => {
693 |
failed_html += doi + '<br>';
694 |
695 |
document.querySelector("#failed-dois").innerHTML = failed_html;
696 |
return [downloaded_html, failed_html];
697 |
698 |
699 |
700 |
interface.head = """
701 |
702 |
function copyLink(button) {
703 |
const linkElement = button.previousElementSibling;
704 |
const link = linkElement.href;
705 |
706 |
.then(() => {
707 |
button.innerText = '✓ Copied';
708 |
+ = 'green';
709 |
setTimeout(() => {
710 |
button.innerText = 'Copy';
711 |
+ = '';
712 |
}, 2000);
713 |
714 |
.catch(err => {
715 |
console.error('Failed to copy link: ', err);
716 |
717 |
718 |
719 |
720 |
return interface
721 |
722 |
723 |
def main():
724 |
interface = create_gradio_interface()
725 |
726 |
727 |
728 |
if __name__ == "__main__":
729 |