Spaces:
Running
Running
Update extract_text_from_pdf.py
Browse files- extract_text_from_pdf.py +3 -1
extract_text_from_pdf.py
CHANGED
@@ -19,7 +19,7 @@ class PDFTextExtractor:
|
|
19 |
A class to handle PDF text extraction and preprocessing for podcast preparation.
|
20 |
"""
|
21 |
@spaces.GPU
|
22 |
-
def __init__(self, pdf_path, output_path
|
23 |
"""
|
24 |
Initialize the PDFTextExtractor with paths and model details.
|
25 |
|
@@ -28,6 +28,8 @@ class PDFTextExtractor:
|
|
28 |
output_path (str): Path to save the cleaned text file.
|
29 |
model_name (str): Name of the model to use for text processing.
|
30 |
"""
|
|
|
|
|
31 |
self.pdf_path = pdf_path
|
32 |
self.output_path = output_path
|
33 |
self.max_chars = 100000
|
|
|
19 |
A class to handle PDF text extraction and preprocessing for podcast preparation.
|
20 |
"""
|
21 |
@spaces.GPU
|
22 |
+
def __init__(self, pdf_path, output_path):
|
23 |
"""
|
24 |
Initialize the PDFTextExtractor with paths and model details.
|
25 |
|
|
|
28 |
output_path (str): Path to save the cleaned text file.
|
29 |
model_name (str): Name of the model to use for text processing.
|
30 |
"""
|
31 |
+
|
32 |
+
model_name="meta-llama/Llama-3.2-1B-Instruct"
|
33 |
self.pdf_path = pdf_path
|
34 |
self.output_path = output_path
|
35 |
self.max_chars = 100000
|