Spaces:

Yhhxhfh
/

dgdgdgdgd

Runtime error

App Files Files Community

Yhhxhfh commited on Sep 22

Commit

83a5134

•

1 Parent(s): a3b6708

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -21

app.py CHANGED Viewed

@@ -8,10 +8,13 @@ import time
 import uvicorn
 from fastapi import FastAPI
 import threading
-import spaces
 load_dotenv()
-login(token=os.getenv('HUGGINGFACE_TOKEN'))
 app = FastAPI()
@@ -19,7 +22,6 @@ app = FastAPI()
 async def root():
     return {"message": "Modelo entrenado y en ejecución."}
-@spaces.GPU()
 def load_and_train():
     model_name = 'gpt2'
     tokenizer = GPT2Tokenizer.from_pretrained(model_name)
@@ -28,31 +30,31 @@ def load_and_train():
     dataset_humanizado = load_dataset('daily_dialog', split='train', trust_remote_code=True)
     dataset_codigo = load_dataset('code_search_net', split='train', trust_remote_code=True)
-    combined_dataset = concatenate_datasets([
-        dataset_humanizado,
-        dataset_codigo
-    ])
     def tokenize_function(examples):
-        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)
     tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
     training_args = TrainingArguments(
         output_dir='./results',
-        per_device_train_batch_size=100,
-        per_device_eval_batch_size=100,
-        num_train_epochs=0,
         learning_rate=1e-5,
-        logging_steps=-1,
-        max_grad_norm=1,
         save_total_limit=1,
         seed=42,
-        weight_decay=0,
-        warmup_ratio=0.0,
-        evaluation_strategy="no",
-        optim="adamw_torch",
-        lr_scheduler_type="constant",
     )
     trainer = Trainer(
@@ -64,9 +66,9 @@ def load_and_train():
     while True:
         try:
             trainer.train()
-            model.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', use_temp_dir=True, commit_message="Actualización del modelo")
-            tokenizer.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', use_temp_dir=True, commit_message="Actualización del tokenizador")
-            time.sleep(5)
         except Exception as e:
             print(f"Error durante el entrenamiento: {e}. Reiniciando el proceso de entrenamiento...")
             time.sleep(10)

 import uvicorn
 from fastapi import FastAPI
 import threading
 load_dotenv()
+huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
+if huggingface_token is None:
+    raise ValueError("HUGGINGFACE_TOKEN not found in environment variables.")
+login(token=huggingface_token)
 app = FastAPI()
 async def root():
     return {"message": "Modelo entrenado y en ejecución."}
 def load_and_train():
     model_name = 'gpt2'
     tokenizer = GPT2Tokenizer.from_pretrained(model_name)
     dataset_humanizado = load_dataset('daily_dialog', split='train', trust_remote_code=True)
     dataset_codigo = load_dataset('code_search_net', split='train', trust_remote_code=True)
+    print("Daily Dialog columns:", dataset_humanizado.column_names)
+    print("Code Search Net columns:", dataset_codigo.column_names)
+    combined_dataset = concatenate_datasets([dataset_humanizado, dataset_codigo])
+    print("Combined dataset columns:", combined_dataset.column_names)
     def tokenize_function(examples):
+        return tokenizer(examples['actual_column_name'], truncation=True, padding='max_length', max_length=512)
     tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
     training_args = TrainingArguments(
         output_dir='./results',
+        per_device_train_batch_size=4,
+        per_device_eval_batch_size=4,
+        num_train_epochs=1,
         learning_rate=1e-5,
+        logging_steps=100,
         save_total_limit=1,
         seed=42,
+        weight_decay=0.01,
+        warmup_ratio=0.1,
+        evaluation_strategy="epoch",
+        lr_scheduler_type="linear",
     )
     trainer = Trainer(
     while True:
         try:
             trainer.train()
+            model.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', commit_message="Actualización del modelo")
+            tokenizer.push_to_hub('Yhhxhfh/nombre_de_tu_modelo', repo_type='model', commit_message="Actualización del tokenizador")
+            time.sleep(300)
         except Exception as e:
             print(f"Error durante el entrenamiento: {e}. Reiniciando el proceso de entrenamiento...")
             time.sleep(10)