Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,80 @@
|
|
1 |
-
|
|
|
|
|
|
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from docx import Document
|
2 |
+
# from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
|
3 |
+
# import torch
|
4 |
+
# import gradio as gr
|
5 |
|
6 |
+
# # Load the Word document
|
7 |
+
# docx_file_path = "Our Leadership.docx"
|
8 |
+
# doc = Document(docx_file_path)
|
9 |
+
|
10 |
+
# # Extract text from the document
|
11 |
+
# text = ""
|
12 |
+
# for paragraph in doc.paragraphs:
|
13 |
+
# text += paragraph.text + "\n"
|
14 |
+
|
15 |
+
# # Save the extracted text to a text file
|
16 |
+
# txt_file_path = "extracted_text.txt"
|
17 |
+
# with open(txt_file_path, "w", encoding="utf-8") as file:
|
18 |
+
# file.write(text)
|
19 |
+
|
20 |
+
# # Load the pre-trained GPT-2 model and tokenizer
|
21 |
+
# tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
22 |
+
# model = GPT2LMHeadModel.from_pretrained("gpt2")
|
23 |
+
|
24 |
+
# # Tokenize the training data
|
25 |
+
# input_ids = tokenizer(text, return_tensors="pt", padding=True, truncation=True)["input_ids"]
|
26 |
+
|
27 |
+
# # Define the training arguments
|
28 |
+
# training_args = TrainingArguments(
|
29 |
+
# per_device_train_batch_size=4,
|
30 |
+
# num_train_epochs=3,
|
31 |
+
# logging_dir='./logs',
|
32 |
+
# )
|
33 |
+
|
34 |
+
# # Define a dummy data collator (required by Trainer)
|
35 |
+
# class DummyDataCollator:
|
36 |
+
# def __call__(self, features):
|
37 |
+
# return features
|
38 |
+
|
39 |
+
# # Define a Trainer instance
|
40 |
+
# trainer = Trainer(
|
41 |
+
# model=model,
|
42 |
+
# args=training_args,
|
43 |
+
# data_collator=DummyDataCollator(),
|
44 |
+
# train_dataset=input_ids
|
45 |
+
# )
|
46 |
+
|
47 |
+
# # Train the model
|
48 |
+
# trainer.train()
|
49 |
+
|
50 |
+
# # Define the chatbot function
|
51 |
+
# def chatbot(input_text):
|
52 |
+
# # Tokenize input text
|
53 |
+
# input_ids = tokenizer.encode(input_text, return_tensors="pt")
|
54 |
+
|
55 |
+
# # Generate response from the model
|
56 |
+
# output_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
|
57 |
+
|
58 |
+
# # Decode the generated response
|
59 |
+
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
60 |
+
|
61 |
+
# return response
|
62 |
+
|
63 |
+
# # Create the Gradio interface
|
64 |
+
# chatbot_interface = gr.Interface(chatbot, "textbox", "textbox", title="Chatbot")
|
65 |
+
|
66 |
+
# # Launch the Gradio interface
|
67 |
+
# chatbot_interface.launch()
|
68 |
+
|
69 |
+
|
70 |
+
import os
|
71 |
+
|
72 |
+
# Get the current working directory
|
73 |
+
current_directory = os.getcwd()
|
74 |
+
|
75 |
+
# Construct the full file path
|
76 |
+
docx_file_name = "Our Leadership.docx"
|
77 |
+
full_file_path = os.path.join(current_directory, docx_file_name)
|
78 |
+
|
79 |
+
# Print the file path
|
80 |
+
print("File path:", full_file_path)
|