karthik1362
/

48-Laws-of-Power

Model card Files Files and versions Community

karthik1362 commited on Jan 28

Commit

c60e255

•

1 Parent(s): c6639dd

Upload 2 files

Browse files

Files changed (2) hide show

pdf_chat.ipynb +0 -0
pdf_chat.py +58 -0

pdf_chat.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

pdf_chat.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# -*- coding: utf-8 -*-
+"""pdf chat.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1RXTs4FPcFCVb9_ZAWBBxLoYQEcKz37x9
+"""
+!pip install langchain
+!pip install unstructured # The unstructured library provides open-source components for pre-processing text documents such as PDFs, HTML and Word Documents.
+!pip install openai
+!pip install pybind11 # pybind11 is a lightweight header-only library that exposes C++ types in Python
+!pip install chromadb # the AI-native open-source embedding database
+!pip install Cython # Cython is an optimising static compiler for both the Python programming language
+!pip3 install "git+https://github.com/philferriere/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI" # COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation
+!pip install unstructured[local-inference]
+!CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" pip install 'git+https://github.com/facebookresearch/detectron2.git' # Detectron2 is Facebook AI Research's next generation library that provides state-of-the-art detection and segmentation algorithms.
+!pip install layoutparser[layoutmodels,tesseract] # A Unified Toolkit for Deep Learning Based Document Image Analysis
+!pip install pytesseract # Python-tesseract is an optical character recognition (OCR) tool for python.
+!pip install Pillow==9.0.0 # The Python Imaging Library adds image processing capabilities to your Python interpreter. Need this version, otherwise errors occur.
+!pip install tiktoken
+!pip install --upgrade Pillow
+import os
+os.environ['OPENAI_API_KEY'] = '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'
+from langchain.document_loaders import UnstructuredPDFLoader
+from langchain.indexes import VectorstoreIndexCreator
+from detectron2.config import get_cfg
+cfg = get_cfg()
+cfg.MODEL.DEVICE = 'gpu' #GPU is recommended
+!wget https://pgcag.files.wordpress.com/2010/01/48lawsofpower.pdf #meta earnings; replace with any pdf
+!mkdir docs
+!mv 48lawsofpower.pdf docs
+text_folder = 'docs'
+loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]
+!apt-get install poppler-utils # error occurs without this, pdf rendering library
+index = VectorstoreIndexCreator().from_loaders(loaders)
+query = "Can you give me an example from history where the enemy was crushed totally from the book?"
+index.query(query)
+query = "What's the point of making myself less accessible?"
+index.query(query)
+query = "Can you tell me the story of Queen Elizabeth I from this 48 laws of power book?"
+index.query(query)
+query = "State the names of 5 laws?"
+index.query(query)