Text2Text Generation
fastText
English
i2ebuddy-gpt2 / app.py
SoulXS2's picture
Update app.py
06a6a7a verified
raw
history blame
2.23 kB
# from docx import Document
# from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
# import torch
# import gradio as gr
# # Load the Word document
# docx_file_path = "Our Leadership.docx"
# doc = Document(docx_file_path)
# # Extract text from the document
# text = ""
# for paragraph in doc.paragraphs:
# text += paragraph.text + "\n"
# # Save the extracted text to a text file
# txt_file_path = "extracted_text.txt"
# with open(txt_file_path, "w", encoding="utf-8") as file:
# file.write(text)
# # Load the pre-trained GPT-2 model and tokenizer
# tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# model = GPT2LMHeadModel.from_pretrained("gpt2")
# # Tokenize the training data
# input_ids = tokenizer(text, return_tensors="pt", padding=True, truncation=True)["input_ids"]
# # Define the training arguments
# training_args = TrainingArguments(
# per_device_train_batch_size=4,
# num_train_epochs=3,
# logging_dir='./logs',
# )
# # Define a dummy data collator (required by Trainer)
# class DummyDataCollator:
# def __call__(self, features):
# return features
# # Define a Trainer instance
# trainer = Trainer(
# model=model,
# args=training_args,
# data_collator=DummyDataCollator(),
# train_dataset=input_ids
# )
# # Train the model
# trainer.train()
# # Define the chatbot function
# def chatbot(input_text):
# # Tokenize input text
# input_ids = tokenizer.encode(input_text, return_tensors="pt")
# # Generate response from the model
# output_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
# # Decode the generated response
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# return response
# # Create the Gradio interface
# chatbot_interface = gr.Interface(chatbot, "textbox", "textbox", title="Chatbot")
# # Launch the Gradio interface
# chatbot_interface.launch()
import os
# Get the current working directory
current_directory = os.getcwd()
# Construct the full file path
docx_file_name = "Our Leadership.docx"
full_file_path = os.path.join(current_directory, docx_file_name)
# Print the file path
print("File path:", full_file_path)