Spaces:

LLM360
/

TxT360

Running

App Files Files Community

TxT360 / eval_result_figures.py

hunterhector

add fiture to early sections too

a4dc57a about 2 months ago

raw

history blame

2.71 kB

	import os
	from plotly import graph_objects as go
	import pandas as pd

	## Evaluation Graphs

	# Load the data
	all_eval_results = {}
	for fname in os.listdir("data/txt360_eval"):
	if fname.endswith(".csv"):
	metric_name = fname.replace("CKPT Eval - ", "").replace(".csv", "")
	all_eval_results[metric_name] = {}

	# with open(os.path.join("data/txt360_eval", fname)) as f:
	df = pd.read_csv(os.path.join("data/txt360_eval", fname))

	# slimpajama_res = df.iloc[2:, 2].astype(float).fillna(0.0) # slimpajama
	fineweb_res = df.iloc[2:, 1].astype(float).fillna(method="bfill") # fineweb
	txt360_base = df.iloc[2:, 2].astype(float).fillna(method="bfill") # txt360-dedup-only
	txt360_web_up = df.iloc[2:, 3].astype(float).fillna(method="bfill") # txt360-web-only-upsampled
	txt360_all_up_stack = df.iloc[2:, 4].astype(float).fillna(method="bfill") # txt360-all-upsampled + stackv2

	# each row is 20B tokens.
	# all_eval_results[metric_name]["slimpajama"] = slimpajama_res
	all_eval_results[metric_name]["fineweb"] = fineweb_res
	all_eval_results[metric_name]["txt360-dedup-only"] = txt360_base
	all_eval_results[metric_name]["txt360-web-only-upsampled"] = txt360_web_up
	all_eval_results[metric_name]["txt360-all-upsampled + stackv2"] = txt360_all_up_stack
	all_eval_results[metric_name]["token"] = [20 * i for i in range(len(fineweb_res))]


	# Eval Result Plots
	all_eval_res_figs = {}
	for metric_name, res in all_eval_results.items():
	fig_res = go.Figure()

	# Add lines
	fig_res.add_trace(go.Scatter(
	x=all_eval_results[metric_name]["token"],
	y=all_eval_results[metric_name]["fineweb"],
	mode='lines', name='FineWeb'
	))
	fig_res.add_trace(go.Scatter(
	x=all_eval_results[metric_name]["token"],
	y=all_eval_results[metric_name]["txt360-web-only-upsampled"],
	mode='lines', name='TxT360 - CC Data Upsampled'
	))
	fig_res.add_trace(go.Scatter(
	x=all_eval_results[metric_name]["token"],
	y=all_eval_results[metric_name]["txt360-dedup-only"],
	mode='lines', name='TxT360 - CC Data Dedup'
	))
	fig_res.add_trace(go.Scatter(
	x=all_eval_results[metric_name]["token"],
	y=all_eval_results[metric_name]["txt360-all-upsampled + stackv2"],
	mode='lines', name='TxT360 - Full Upsampled + Stack V2'
	))

	# Update layout
	fig_res.update_layout(
	title=f"{metric_name} Performance",
	title_x=0.5, # Centers the title
	xaxis_title="Billion Tokens",
	yaxis_title=metric_name,
	legend_title="Dataset",
	)
	all_eval_res_figs[metric_name] = fig_res