Yhhxhfh commited on
Commit
16372c9
1 Parent(s): 57c7d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -27
app.py CHANGED
@@ -104,33 +104,20 @@ def load_and_train():
104
  """
105
  texts = []
106
  # Determinar el tamaño del lote
107
- batch_size = len(next(iter(examples.values())))
108
 
109
- for i in range(batch_size):
110
  text = ''
111
- if 'dialog' in examples and examples['dialog'][i]:
112
- # Verificar si el campo es una lista y concatenar si es necesario
113
- dialog = examples['dialog'][i]
114
- if isinstance(dialog, list):
115
- dialog = ' '.join(dialog)
116
- text = dialog
117
- elif 'whole_func_string' in examples and examples['whole_func_string'][i]:
118
- whole_func = examples['whole_func_string'][i]
119
- if isinstance(whole_func, list):
120
- whole_func = ' '.join(whole_func)
121
- text = whole_func
122
- elif 'func_documentation_string' in examples and examples['func_documentation_string'][i]:
123
- func_doc = examples['func_documentation_string'][i]
124
- if isinstance(func_doc, list):
125
- func_doc = ' '.join(func_doc)
126
- text = func_doc
127
- else:
128
- text = '' # Asignar cadena vacía si no hay texto disponible
129
-
130
- # Asegurar que 'text' es una cadena de texto
131
- if not isinstance(text, str):
132
- text = str(text)
133
-
134
  texts.append(text)
135
 
136
  examples['text'] = texts
@@ -145,8 +132,8 @@ def load_and_train():
145
  examples['text'],
146
  truncation=True,
147
  padding='max_length',
148
- max_length=512,
149
- clean_up_tokenization_spaces=True # Para evitar la advertencia de FutureWarning
150
  )
151
 
152
  # Tokenizar el dataset
 
104
  """
105
  texts = []
106
  # Determinar el tamaño del lote
107
+ num_examples = len(next(iter(examples.values()))) # Obtener el tamaño del lote
108
 
109
+ for i in range(num_examples):
110
  text = ''
111
+ # Procesar 'dialog'
112
+ if 'dialog' in examples and i < len(examples['dialog']) and isinstance(examples['dialog'][i], str) and examples['dialog'][i]:
113
+ text = examples['dialog'][i]
114
+ # Procesar 'whole_func_string'
115
+ elif 'whole_func_string' in examples and i < len(examples['whole_func_string']) and isinstance(examples['whole_func_string'][i], str) and examples['whole_func_string'][i]:
116
+ text = examples['whole_func_string'][i]
117
+ # Procesar 'func_documentation_string'
118
+ elif 'func_documentation_string' in examples and i < len(examples['func_documentation_string']) and isinstance(examples['func_documentation_string'][i], str) and examples['func_documentation_string'][i]:
119
+ text = examples['func_documentation_string'][i]
120
+ # Puedes añadir más campos si es necesario
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  texts.append(text)
122
 
123
  examples['text'] = texts
 
132
  examples['text'],
133
  truncation=True,
134
  padding='max_length',
135
+ max_length=512
136
+ # clean_up_tokenization_spaces=True # Eliminado porque no es reconocido
137
  )
138
 
139
  # Tokenizar el dataset