import pandas as pd import json from datetime import datetime def process_csv_to_json(): # Read the CSV file df = pd.read_csv('src/record.csv') # Clean the data: remove empty rows, rename columns df = df.dropna(how='all') df = df.rename(columns={ 'dataset': 'Dataset', 'llm': 'LLM', 'score\n(EM)': 'Score', 'pass rate': 'Pass rate', 'Cost($)': 'Cost($)', 'Eval Date': 'Eval Date', 'framework': 'Framework', 'X-shot': 'X-shot', 'Nums': 'Samples', 'All tokens': 'All tokens', 'Total input tokens': 'Total input tokens', 'Average input tokens': 'Average input tokens', 'Total output tokens': 'Total output tokens', 'Average output tokens': 'Average output tokens' }) # Helper function: handle number strings with commas def parse_number(value): if pd.isna(value) or value == '-': return 0 # Remove commas, convert to float, then to int return int(float(str(value).replace(',', ''))) # Initialize result dictionary result = { "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "results": {} } # Get all unique LLMs llms = df['LLM'].dropna().unique() # Iterate through each algorithm for algorithm in df['Algorithm'].dropna().unique(): if not isinstance(algorithm, str): continue result['results'][algorithm] = {} # Process each LLM for llm in llms: llm_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)] if llm_data.empty: continue # Create dictionary for each LLM result['results'][algorithm][llm] = { 'META': { 'Algorithm': str(algorithm), 'LLM': str(llm), 'Eval Date': str(llm_data['Eval Date'].iloc[0]) } } # Process each dataset for dataset in df['Dataset'].dropna().unique(): if not isinstance(dataset, str): continue dataset_data = llm_data[llm_data['Dataset'] == dataset] if not dataset_data.empty: data_row = dataset_data.iloc[0] result['results'][algorithm][llm][dataset] = { 'Score': round(float(data_row['Score']) if data_row['Score'] != '-' else 0, 2), # Keep two decimal places 'Pass rate': round(float(data_row['Pass rate']) / 100, 4) if data_row['Pass rate'] != '-' else 0.0, # Convert to decimal and keep two decimal places 'Cost($)': float(data_row['Cost($)']) if pd.notnull(data_row['Cost($)']) and data_row['Cost($)'] != '-' else 0.0, 'Framework': str(data_row['Framework']) if 'Framework' in data_row and pd.notnull(data_row['Framework']) else '', 'X-shot': str(data_row['X-shot']) if pd.notnull(data_row['X-shot']) else '', 'Samples': parse_number(data_row['Samples']), 'All tokens': parse_number(data_row['All tokens']), 'Total input tokens': parse_number(data_row['Total input tokens']), 'Average input tokens': parse_number(data_row['Average input tokens']), 'Total output tokens': parse_number(data_row['Total output tokens']), 'Average output tokens': parse_number(data_row['Average output tokens']) } # Check if each field exists required_fields = ['Score', 'Pass rate', 'Cost($)', 'Framework', 'X-shot', 'Samples', 'All tokens', 'Total input tokens', 'Average input tokens', 'Total output tokens', 'Average output tokens'] for key, value in result['results'].items(): for llm, datasets in value.items(): # Check META information meta = datasets.get('META', {}) if 'LLM' not in meta or 'Eval Date' not in meta: print(f"Missing META fields in algorithm '{key}' for LLM '{llm}'") for dataset, data in datasets.items(): if dataset == 'META': continue missing_fields = [field for field in required_fields if field not in data] if missing_fields: print(f"Missing fields {missing_fields} in dataset '{dataset}' for LLM '{llm}' in algorithm '{key}'") # Save as JSON file with open('src/detail_math_score.json', 'w', encoding='utf-8') as f: json.dump(result, f, indent=4, ensure_ascii=False) def process_csv_to_overall_json(): # Read the CSV file df = pd.read_csv('src/record.csv') # Clean the data: remove empty rows, rename columns df = df.dropna(how='all') df = df.rename(columns={ 'dataset': 'Dataset', 'llm': 'LLM', 'score\n(EM)': 'Score', 'Cost($)': 'Cost($)', 'Eval Date': 'Eval Date' }) # Initialize result dictionary result = { "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "results": {} } # Get all unique LLMs llms = df['LLM'].dropna().unique() for llm in llms: # Process base algorithms for algorithm in df['Algorithm'].dropna().unique(): if not isinstance(algorithm, str): continue # Add suffix for non-gpt-3.5-turbo models # Modification: add more information for llama models to ensure uniqueness algo_key = algorithm if llm == 'gpt-3.5-turbo' else f"{algorithm}-{llm}" # Check if the algorithm-LLM combination exists algo_data = df[(df['Algorithm'] == algorithm) & (df['LLM'] == llm)] if algo_data.empty: print(f"No data found for algorithm '{algorithm}' and LLM '{llm}'") continue result['results'][algo_key] = { "META": { "Algorithm": algorithm, "LLM": llm, "Eval Date": str(algo_data['Eval Date'].iloc[0]) } } # Process each dataset for dataset in ['gsm8k', 'AQuA', 'MATH-500']: dataset_data = df[(df['Algorithm'] == algorithm) & (df['Dataset'] == dataset) & (df['LLM'] == llm)] if not dataset_data.empty: result['results'][algo_key][dataset] = { "Score": float(dataset_data['Score'].iloc[0]) if pd.notnull(dataset_data['Score'].iloc[0]) and dataset_data['Score'].iloc[0] != '-' else 0.0, "Cost($)": float(dataset_data['Cost($)'].iloc[0]) if pd.notnull(dataset_data['Cost($)'].iloc[0]) and dataset_data['Cost($)'].iloc[0] != '-' else 0.0 } else: # If the dataset is empty, ensure the key exists and set default values result['results'][algo_key][dataset] = { "Score": 0.0, "Cost($)": 0.0 } # Save as JSON file with open('src/overall_math_score.json', 'w', encoding='utf-8') as f: json.dump(result, f, indent=4, ensure_ascii=False) if __name__ == "__main__": # Generate JSON files in two formats process_csv_to_json() process_csv_to_overall_json()