Spaces:
Paused
Paused
File size: 973 Bytes
bc453aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from enum import Enum
from typing import List
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.documents import Document
import asyncio
class PDFLoaderWrapper():
class LoaderType(str, Enum):
PYMUPDF = "pymupdf"
def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF):
self.file_path = file_path if isinstance(file_path, list) else [file_path]
self.loader_type = loader_type
async def aload(self) -> List[Document]:
all_docs = []
for file_path in self.file_path:
if self.loader_type == self.LoaderType.PYMUPDF:
try:
loader = PyMuPDFLoader(file_path)
docs = await loader.aload()
all_docs.extend(docs)
except Exception as e:
print(f"Error loading file {file_path}: {e}")
continue
return all_docs
|