|
|
|
"""pdf chat.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1RXTs4FPcFCVb9_ZAWBBxLoYQEcKz37x9 |
|
""" |
|
|
|
!pip install langchain |
|
!pip install unstructured |
|
!pip install openai |
|
!pip install pybind11 |
|
!pip install chromadb |
|
!pip install Cython |
|
!pip3 install "git+https://github.com/philferriere/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI" |
|
!pip install unstructured[local-inference] |
|
!CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" pip install 'git+https://github.com/facebookresearch/detectron2.git' |
|
!pip install layoutparser[layoutmodels,tesseract] |
|
!pip install pytesseract |
|
!pip install Pillow==9.0.0 |
|
!pip install tiktoken |
|
!pip install --upgrade Pillow |
|
|
|
import os |
|
os.environ['OPENAI_API_KEY'] = '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' |
|
|
|
from langchain.document_loaders import UnstructuredPDFLoader |
|
from langchain.indexes import VectorstoreIndexCreator |
|
|
|
from detectron2.config import get_cfg |
|
cfg = get_cfg() |
|
cfg.MODEL.DEVICE = 'gpu' |
|
|
|
!wget https://pgcag.files.wordpress.com/2010/01/48lawsofpower.pdf |
|
|
|
!mkdir docs |
|
!mv 48lawsofpower.pdf docs |
|
|
|
text_folder = 'docs' |
|
loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)] |
|
|
|
!apt-get install poppler-utils |
|
|
|
index = VectorstoreIndexCreator().from_loaders(loaders) |
|
|
|
query = "Can you give me an example from history where the enemy was crushed totally from the book?" |
|
index.query(query) |
|
|
|
query = "What's the point of making myself less accessible?" |
|
index.query(query) |
|
|
|
query = "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?" |
|
index.query(query) |
|
|
|
query = "State the names of 5 laws?" |
|
index.query(query) |
|
|
|
|