pip install transformers datasets torch

from datasets import load_dataset

Load your custom dataset (ensure it's in the proper format)

dataset = load_dataset('Hamses/EU_Regulation_261_2004', data_files={'train': 'train.txt', 'test': 'test.txt'})

Load the GPT-2 tokenizer

from transformers import GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

Preprocess the dataset

def preprocess_function(examples): return tokenizer(examples['text'], padding='max_length', truncation=True)

encoded_dataset = dataset.map(preprocess_function, batched=True)

from transformers import GPT2LMHeadModel, TrainingArguments, Trainer

Load the GPT-2 model

model = GPT2LMHeadModel.from_pretrained('gpt2')

Define training arguments

training_args = TrainingArguments( output_dir='./results',
num_train_epochs=3,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
)

Initialize the Trainer

trainer = Trainer( model=model,
args=training_args,
train_dataset=encoded_dataset['train'],
eval_dataset=encoded_dataset['test'] )

Train the model

trainer.train()

Evaluate the model

results = trainer.evaluate() print(results)

Save the model

model.save_pretrained('./gpt2-finetuned') tokenizer.save_pretrained('./gpt2-finetuned')

Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.

Dataset used to train Hamses/EU_Regulation_261_2004