Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,20 @@ from langchain.chains import RetrievalQA
|
|
6 |
from transformers import AutoTokenizer
|
7 |
import pickle
|
8 |
import os
|
|
|
9 |
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls = BSHTMLLoader)
|
12 |
|
13 |
data = bshtml_dir_loader.load()
|
|
|
6 |
from transformers import AutoTokenizer
|
7 |
import pickle
|
8 |
import os
|
9 |
+
import shutil
|
10 |
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
|
11 |
|
12 |
+
!git clone https://github.com/TheMITTech/shakespeare
|
13 |
+
|
14 |
+
from glob import glob
|
15 |
+
files = glob("./shakespeare/**/*.html")
|
16 |
+
|
17 |
+
os.mkdir('./data')
|
18 |
+
destination_folder = './data/'
|
19 |
+
|
20 |
+
for html_file in files:
|
21 |
+
shutil.move(html_file, destination_folder + html_file.split("/"[-1]))
|
22 |
+
|
23 |
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls = BSHTMLLoader)
|
24 |
|
25 |
data = bshtml_dir_loader.load()
|