Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import json | |
| from codeScripts.Dependencies.SentenceTransformer2 import * | |
| from codeScripts.utils import load_json, create_file_path | |
| class GetSettings(): | |
| """ | |
| This class stores the selected settings for the current experiment | |
| """ | |
| def __init__(self, config_settings, studentsData): | |
| #extracting the settings from the configuration document | |
| self.__getConfigSettings(config_settings) | |
| #getting the responses to study | |
| self.__getDatatoStudy(studentsData) | |
| def __getDatatoStudy(self, data): | |
| if data[0] == None: | |
| #extracting the info from the path in the config json | |
| self.__getData(self.json_file_in) | |
| else: | |
| #extracting the info from the selected file in the api | |
| self.__getApiData(data) | |
| def setApiSettings(self, api_settings): | |
| """ | |
| This function is to overwrite the parameters with the selected values from the api | |
| Inputs: | |
| -api_settings: dictionary with the stored parameters from the api | |
| """ | |
| #transforming string dict into dict | |
| #api_settings = json.loads(api_settings) | |
| self.PesoOrtografia = api_settings["ortographyPercentage"] | |
| self.PesoSintaxis = api_settings["syntaxPercentage"] | |
| self.PesoSemantics = api_settings["semanticPercentage"] | |
| self.rango_ID = api_settings["students"] | |
| def __getConfigSettings(self, df): | |
| """ | |
| This method is used to import the settings from the config json | |
| Inputs: | |
| -df: The dataframe where the config json data is loaded | |
| """ | |
| #+++ General settings +++ | |
| #path where the dataset is stored | |
| self.json_file_in = df["ruta_fichero_entrada"] | |
| #path where output is to be stored | |
| self.json_file_out = df["ruta_fichero_salida"] | |
| #path to access hunspell components | |
| self.hunspell_aff = df["ruta_hunspell"]["aff"] | |
| self.hunspell_dic = df["ruta_hunspell"]["dic"] | |
| #range of students to study ---- Will be overwritten from api | |
| if df["Parametros_Analisis"]["estudiantes"]["Todos"]: | |
| self.rango_ID = "All" | |
| else: | |
| self.rango_ID = df["Parametros_Analisis"]["estudiantes"]["ID_rango"] | |
| self.minAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Minimo"]) | |
| self.maxAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Maximo"] + 1) | |
| #+++ Ortography +++ | |
| #If the ortographic level is activated | |
| self.Ortografia = df["Parametros_Analisis"]["Ortografia"]["Activado"] | |
| #Max number of permitted errors | |
| self.NMaxErrores = df["Parametros_Rubrica"]["Ortografia"]["NMaxErrores"] | |
| #Max number of permitted errors before beginning to substract | |
| self.FaltasSalvaguarda= df["Parametros_Rubrica"]["Ortografia"]["FaltasSalvaguarda"] | |
| #Level weight (rubrics) | |
| self.PesoOrtografia = df["Parametros_Rubrica"]["Ortografia"]["Peso"] | |
| #+++ Syntax +++ | |
| #if the syntactic level is activated | |
| self.Sintaxis = df["Parametros_Analisis"]["Sintaxis"]["Activado"] | |
| #max number of sentences and words permitted | |
| self.NMaxFrases = df["Parametros_Rubrica"]["Sintaxis"]["NMaxFrases"] | |
| self.NMaxPalabras= df["Parametros_Rubrica"]["Sintaxis"]["NMaxPalabras"] | |
| #***weight of the level | |
| self.PesoSintaxis = df["Parametros_Rubrica"]["Sintaxis"]["Peso"] | |
| #+++ Semantics +++ | |
| #if the semantic level is activated | |
| self.Semantica = df["Parametros_Analisis"]["Semantica"]["Activado"] | |
| #***weight of the level | |
| self.PesoSemantics = df["Parametros_Rubrica"]["Semantica"]["Peso"] | |
| #--- Similarity --- | |
| SpacyPackage = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"]["Package"] | |
| self.spacy_package = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"][SpacyPackage] | |
| print("spacy_package", self.spacy_package) | |
| #the minimun value to select one line of response as similar (0.615 sm - 0.875 md and lg) | |
| self.LofRespThreshold = df["Parametros_Rubrica"]["Semantica"]["LineaRespuesta"]["ThresholdToConsiderCeroValue"][SpacyPackage] | |
| print("lofThreshold", self.LofRespThreshold) | |
| #the different thresholds (min-max) to adapt the similarity score | |
| self.UmbralesSimilitud= df["Parametros_Rubrica"]["Semantica"]["Similitud"]["UmbralesSimilitud"][SpacyPackage] | |
| print("self.UmbralesSimilitud", self.UmbralesSimilitud) | |
| #To configure only once the bert model parameters | |
| model_name = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model_path"] | |
| self.model_path = create_file_path('', doctype=4) + model_name | |
| print("self.model_path", self.model_path) | |
| self.modelr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model"] | |
| print("self.modelr", self.modelr) | |
| self.epochr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["epoch"] | |
| print("self.epochr", self.epochr) | |
| self.BertModels_glbl = SentTransf_test([self.modelr], [self.epochr]) | |
| #Variables to store some values | |
| self.studentID = "" | |
| self.faltasOrto = 0 | |
| self.palabrasPorFrase = 0 | |
| self.minipreguntasMalSpacy = "" | |
| self.minipreguntasMalBert = "" | |
| def __getApiData(self, json_file): | |
| """ | |
| This method is used to extract the data and format of the exam from the api (sub-question, sub-answers, etc) | |
| """ | |
| self.answersDF = pd.DataFrame(json_file[0]) | |
| self.id_number = 0 | |
| self.minipreguntas = [] | |
| self.minirespuestas = [] | |
| self.indice_minipreguntas = [] | |
| self.respuesta_prof = "" | |
| self.enunciado = json_file[1]['enunciado'] | |
| self.prof_keywords = json_file[1]['keywords'] | |
| try: | |
| i=0 | |
| while True: | |
| self.minirespuestas.append(json_file[1]['minipreguntas'][i]['minirespuesta']) | |
| self.minipreguntas.append(json_file[1]['minipreguntas'][i]['minipregunta']) | |
| self.indice_minipreguntas.append("minipregunta" + str(i)) | |
| if i == 0: | |
| self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] | |
| else: | |
| self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] | |
| i+=1 | |
| except: | |
| pass | |
| info_profesor = [] | |
| for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas): | |
| info_profesor.append([minipregunta,minirespuesta]) | |
| save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor) | |
| def __getData(self, json_file): | |
| """ | |
| This method is used to extract the data and format of the exam from the path that appears in the config json (sub-question, sub-answers, etc) | |
| """ | |
| self.answersDF = pd.DataFrame(load_json(json_file)) | |
| #self.answersDF_json = copy.deepcopy(data) | |
| #self.answersDF_json2 = dict() | |
| self.id_number = 0 | |
| self.minipreguntas = [] | |
| self.minirespuestas = [] | |
| self.indice_minipreguntas = [] | |
| self.respuesta_prof = "" | |
| self.enunciado = self.answersDF['metadata'][0]['enunciado'] | |
| self.prof_keywords = self.answersDF['metadata'][0]['keywords'] | |
| try: | |
| i=0 | |
| while True: | |
| #for i in range(4): | |
| self.minirespuestas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minirespuesta']) | |
| self.minipreguntas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minipregunta']) | |
| self.indice_minipreguntas.append("minipregunta" + str(i)) | |
| if i == 0: | |
| self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] | |
| else: | |
| self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] | |
| i+=1 | |
| except: | |
| pass | |
| #self.indice_minipreguntas.append("respuesta_completa") | |
| #self.minirespuestas.append(self.respuesta_prof) | |
| info_profesor = [] | |
| for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas): | |
| info_profesor.append([minipregunta,minirespuesta]) | |
| save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor) |