Yhhxhfh commited on
Commit
83a5134
1 Parent(s): a3b6708

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -21
app.py CHANGED
@@ -8,10 +8,13 @@ import time
8
  import uvicorn
9
  from fastapi import FastAPI
10
  import threading
11
- import spaces
12
 
13
  load_dotenv()
14
- login(token=os.getenv('HUGGINGFACE_TOKEN'))
 
 
 
 
15
 
16
  app = FastAPI()
17
 
@@ -19,7 +22,6 @@ app = FastAPI()
19
  async def root():
20
  return {"message": "Modelo entrenado y en ejecución."}
21
 
22
- @spaces.GPU()
23
  def load_and_train():
24
  model_name = 'gpt2'
25
  tokenizer = GPT2Tokenizer.from_pretrained(model_name)
@@ -28,31 +30,31 @@ def load_and_train():
28
  dataset_humanizado = load_dataset('daily_dialog', split='train', trust_remote_code=True)
29
  dataset_codigo = load_dataset('code_search_net', split='train', trust_remote_code=True)
30
 
31
- combined_dataset = concatenate_datasets([
32
- dataset_humanizado,
33
- dataset_codigo
34
- ])
 
 
35
 
36
  def tokenize_function(examples):
37
- return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)
38
 
39
  tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
40
 
41
  training_args = TrainingArguments(
42
  output_dir='./results',
43
- per_device_train_batch_size=100,
44
- per_device_eval_batch_size=100,
45
- num_train_epochs=0,
46
  learning_rate=1e-5,
47
- logging_steps=-1,
48
- max_grad_norm=1,
49
  save_total_limit=1,
50
  seed=42,
51
- weight_decay=0,
52
- warmup_ratio=0.0,
53
- evaluation_strategy="no",
54
- optim="adamw_torch",
55
- lr_scheduler_type="constant",
56
  )
57
 
58
  trainer = Trainer(
@@ -64,9 +66,9 @@ def load_and_train():
64
  while True:
65
  try:
66
  trainer.train()
67
- model.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', use_temp_dir=True, commit_message="Actualización del modelo")
68
- tokenizer.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', use_temp_dir=True, commit_message="Actualización del tokenizador")
69
- time.sleep(5)
70
  except Exception as e:
71
  print(f"Error durante el entrenamiento: {e}. Reiniciando el proceso de entrenamiento...")
72
  time.sleep(10)
 
8
  import uvicorn
9
  from fastapi import FastAPI
10
  import threading
 
11
 
12
  load_dotenv()
13
+ huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
14
+ if huggingface_token is None:
15
+ raise ValueError("HUGGINGFACE_TOKEN not found in environment variables.")
16
+
17
+ login(token=huggingface_token)
18
 
19
  app = FastAPI()
20
 
 
22
  async def root():
23
  return {"message": "Modelo entrenado y en ejecución."}
24
 
 
25
  def load_and_train():
26
  model_name = 'gpt2'
27
  tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 
30
  dataset_humanizado = load_dataset('daily_dialog', split='train', trust_remote_code=True)
31
  dataset_codigo = load_dataset('code_search_net', split='train', trust_remote_code=True)
32
 
33
+ print("Daily Dialog columns:", dataset_humanizado.column_names)
34
+ print("Code Search Net columns:", dataset_codigo.column_names)
35
+
36
+ combined_dataset = concatenate_datasets([dataset_humanizado, dataset_codigo])
37
+
38
+ print("Combined dataset columns:", combined_dataset.column_names)
39
 
40
  def tokenize_function(examples):
41
+ return tokenizer(examples['actual_column_name'], truncation=True, padding='max_length', max_length=512)
42
 
43
  tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
44
 
45
  training_args = TrainingArguments(
46
  output_dir='./results',
47
+ per_device_train_batch_size=4,
48
+ per_device_eval_batch_size=4,
49
+ num_train_epochs=1,
50
  learning_rate=1e-5,
51
+ logging_steps=100,
 
52
  save_total_limit=1,
53
  seed=42,
54
+ weight_decay=0.01,
55
+ warmup_ratio=0.1,
56
+ evaluation_strategy="epoch",
57
+ lr_scheduler_type="linear",
 
58
  )
59
 
60
  trainer = Trainer(
 
66
  while True:
67
  try:
68
  trainer.train()
69
+ model.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', commit_message="Actualización del modelo")
70
+ tokenizer.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', commit_message="Actualización del tokenizador")
71
+ time.sleep(300)
72
  except Exception as e:
73
  print(f"Error durante el entrenamiento: {e}. Reiniciando el proceso de entrenamiento...")
74
  time.sleep(10)