# from docx import Document | |
# from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments | |
# import torch | |
# import gradio as gr | |
# # Load the Word document | |
# docx_file_path = "Our Leadership.docx" | |
# doc = Document(docx_file_path) | |
# # Extract text from the document | |
# text = "" | |
# for paragraph in doc.paragraphs: | |
# text += paragraph.text + "\n" | |
# # Save the extracted text to a text file | |
# txt_file_path = "extracted_text.txt" | |
# with open(txt_file_path, "w", encoding="utf-8") as file: | |
# file.write(text) | |
# # Load the pre-trained GPT-2 model and tokenizer | |
# tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
# model = GPT2LMHeadModel.from_pretrained("gpt2") | |
# # Tokenize the training data | |
# input_ids = tokenizer(text, return_tensors="pt", padding=True, truncation=True)["input_ids"] | |
# # Define the training arguments | |
# training_args = TrainingArguments( | |
# per_device_train_batch_size=4, | |
# num_train_epochs=3, | |
# logging_dir='./logs', | |
# ) | |
# # Define a dummy data collator (required by Trainer) | |
# class DummyDataCollator: | |
# def __call__(self, features): | |
# return features | |
# # Define a Trainer instance | |
# trainer = Trainer( | |
# model=model, | |
# args=training_args, | |
# data_collator=DummyDataCollator(), | |
# train_dataset=input_ids | |
# ) | |
# # Train the model | |
# trainer.train() | |
# # Define the chatbot function | |
# def chatbot(input_text): | |
# # Tokenize input text | |
# input_ids = tokenizer.encode(input_text, return_tensors="pt") | |
# # Generate response from the model | |
# output_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id) | |
# # Decode the generated response | |
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
# return response | |
# # Create the Gradio interface | |
# chatbot_interface = gr.Interface(chatbot, "textbox", "textbox", title="Chatbot") | |
# # Launch the Gradio interface | |
# chatbot_interface.launch() | |
import os | |
# Get the current working directory | |
current_directory = os.getcwd() | |
# Construct the full file path | |
docx_file_name = "Our Leadership.docx" | |
full_file_path = os.path.join(current_directory, docx_file_name) | |
# Print the file path | |
print("File path:", full_file_path) |