Text Generation
Transformers
Safetensors
Spanish
llama_longbel
biomedical-entity-linking
entity-linking
entity-disambiguation
named-entity-linking
biomedical
healthcare
snomed
spaccc
medprocner
symptemist
distemist
constrained-decoding
causal-lm
llm
conversational
custom_code
Eval Results (legacy)
Instructions to use Aremaki/LongBEL_1B_SPACCC with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Aremaki/LongBEL_1B_SPACCC with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Aremaki/LongBEL_1B_SPACCC", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("Aremaki/LongBEL_1B_SPACCC", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Aremaki/LongBEL_1B_SPACCC with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Aremaki/LongBEL_1B_SPACCC" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Aremaki/LongBEL_1B_SPACCC", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Aremaki/LongBEL_1B_SPACCC
- SGLang
How to use Aremaki/LongBEL_1B_SPACCC with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Aremaki/LongBEL_1B_SPACCC" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Aremaki/LongBEL_1B_SPACCC", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Aremaki/LongBEL_1B_SPACCC" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Aremaki/LongBEL_1B_SPACCC", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use Aremaki/LongBEL_1B_SPACCC with Docker Model Runner:
docker model run hf.co/Aremaki/LongBEL_1B_SPACCC
| { | |
| "best_global_step": 18252, | |
| "best_metric": 0.8571, | |
| "best_model_checkpoint": "models/NED/SPACCC_human_only_tfidf_hybrid_long_v2_addheaders/Llama-3.2-1B-Instruct/checkpoint-18252", | |
| "epoch": 50.0, | |
| "eval_steps": 500, | |
| "global_step": 304200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 2.1268050391018822, | |
| "epoch": 1.0, | |
| "grad_norm": 71.5, | |
| "learning_rate": 1.999671268902038e-05, | |
| "loss": 1.1856, | |
| "mean_token_accuracy": 0.7717264709329229, | |
| "num_tokens": 23190253.0, | |
| "step": 6084 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_entropy": 2.1406238079071045, | |
| "eval_loss": 0.4971363842487335, | |
| "eval_mean_token_accuracy": 0.9041083097457886, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 23190253.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.1758, | |
| "eval_samples_per_second": 79.649, | |
| "eval_steps_per_second": 28.446, | |
| "step": 6084 | |
| }, | |
| { | |
| "entropy": 1.9585949286358049, | |
| "epoch": 2.0, | |
| "grad_norm": 32.5, | |
| "learning_rate": 2.9690823318896277e-05, | |
| "loss": 0.654, | |
| "mean_token_accuracy": 0.8564344449832124, | |
| "num_tokens": 46380506.0, | |
| "step": 12168 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_entropy": 1.99363112449646, | |
| "eval_loss": 0.4503121078014374, | |
| "eval_mean_token_accuracy": 0.901867413520813, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 46380506.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1565, | |
| "eval_samples_per_second": 89.474, | |
| "eval_steps_per_second": 31.955, | |
| "step": 12168 | |
| }, | |
| { | |
| "entropy": 1.6538763878221656, | |
| "epoch": 3.0, | |
| "grad_norm": 163.0, | |
| "learning_rate": 2.9072266617865347e-05, | |
| "loss": 0.3304, | |
| "mean_token_accuracy": 0.9195070968381204, | |
| "num_tokens": 69570759.0, | |
| "step": 18252 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_entropy": 1.6258357524871827, | |
| "eval_loss": 0.527018129825592, | |
| "eval_mean_token_accuracy": 0.9253968238830567, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 69570759.0, | |
| "eval_recall": 0.8571, | |
| "eval_runtime": 0.1557, | |
| "eval_samples_per_second": 89.896, | |
| "eval_steps_per_second": 32.106, | |
| "step": 18252 | |
| }, | |
| { | |
| "entropy": 1.458226298067148, | |
| "epoch": 4.0, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 2.845370991683442e-05, | |
| "loss": 0.1547, | |
| "mean_token_accuracy": 0.9605004129383926, | |
| "num_tokens": 92761012.0, | |
| "step": 24336 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_entropy": 1.5267343997955323, | |
| "eval_loss": 0.43075770139694214, | |
| "eval_mean_token_accuracy": 0.9333333253860474, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 92761012.0, | |
| "eval_recall": 0.7857, | |
| "eval_runtime": 0.1616, | |
| "eval_samples_per_second": 86.629, | |
| "eval_steps_per_second": 30.939, | |
| "step": 24336 | |
| }, | |
| { | |
| "entropy": 1.3115615775560094, | |
| "epoch": 5.0, | |
| "grad_norm": 0.0126953125, | |
| "learning_rate": 2.7835153215803492e-05, | |
| "loss": 0.0645, | |
| "mean_token_accuracy": 0.9830160907994439, | |
| "num_tokens": 115951265.0, | |
| "step": 30420 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_entropy": 1.3103554964065551, | |
| "eval_loss": 0.6741493940353394, | |
| "eval_mean_token_accuracy": 0.8914098978042603, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 115951265.0, | |
| "eval_recall": 0.6429, | |
| "eval_runtime": 0.1548, | |
| "eval_samples_per_second": 90.451, | |
| "eval_steps_per_second": 32.304, | |
| "step": 30420 | |
| }, | |
| { | |
| "entropy": 1.2145140489255966, | |
| "epoch": 6.0, | |
| "grad_norm": 0.051513671875, | |
| "learning_rate": 2.7216596514772566e-05, | |
| "loss": 0.0227, | |
| "mean_token_accuracy": 0.9940000502178341, | |
| "num_tokens": 139141518.0, | |
| "step": 36504 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_entropy": 1.287993311882019, | |
| "eval_loss": 0.6404770612716675, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 139141518.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1536, | |
| "eval_samples_per_second": 91.127, | |
| "eval_steps_per_second": 32.545, | |
| "step": 36504 | |
| }, | |
| { | |
| "entropy": 1.1663444105885987, | |
| "epoch": 7.0, | |
| "grad_norm": 0.177734375, | |
| "learning_rate": 2.659803981374164e-05, | |
| "loss": 0.0072, | |
| "mean_token_accuracy": 0.9983230459012145, | |
| "num_tokens": 162331771.0, | |
| "step": 42588 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_entropy": 1.220553469657898, | |
| "eval_loss": 0.7181684374809265, | |
| "eval_mean_token_accuracy": 0.9009337067604065, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 162331771.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1554, | |
| "eval_samples_per_second": 90.075, | |
| "eval_steps_per_second": 32.17, | |
| "step": 42588 | |
| }, | |
| { | |
| "entropy": 1.1479155327304267, | |
| "epoch": 8.0, | |
| "grad_norm": 0.0177001953125, | |
| "learning_rate": 2.597948311271071e-05, | |
| "loss": 0.0018, | |
| "mean_token_accuracy": 0.9995507592730896, | |
| "num_tokens": 185522024.0, | |
| "step": 48672 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_entropy": 1.2225802183151244, | |
| "eval_loss": 0.8296155333518982, | |
| "eval_mean_token_accuracy": 0.9009337067604065, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 185522024.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1539, | |
| "eval_samples_per_second": 90.954, | |
| "eval_steps_per_second": 32.484, | |
| "step": 48672 | |
| }, | |
| { | |
| "entropy": 1.1291984924158716, | |
| "epoch": 9.0, | |
| "grad_norm": 0.04833984375, | |
| "learning_rate": 2.5360926411679782e-05, | |
| "loss": 0.0006, | |
| "mean_token_accuracy": 0.9999462363752858, | |
| "num_tokens": 208712277.0, | |
| "step": 54756 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_entropy": 1.1875993251800536, | |
| "eval_loss": 0.8487787842750549, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 208712277.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1586, | |
| "eval_samples_per_second": 88.285, | |
| "eval_steps_per_second": 31.53, | |
| "step": 54756 | |
| }, | |
| { | |
| "entropy": 1.1119468484776809, | |
| "epoch": 10.0, | |
| "grad_norm": 0.05029296875, | |
| "learning_rate": 2.4742369710648856e-05, | |
| "loss": 0.0003, | |
| "mean_token_accuracy": 0.9999655756920596, | |
| "num_tokens": 231902530.0, | |
| "step": 60840 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_entropy": 1.1920786857604981, | |
| "eval_loss": 0.8497948050498962, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 231902530.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1574, | |
| "eval_samples_per_second": 88.919, | |
| "eval_steps_per_second": 31.757, | |
| "step": 60840 | |
| }, | |
| { | |
| "entropy": 1.1129700567262255, | |
| "epoch": 11.0, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 2.4123813009617927e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 255092783.0, | |
| "step": 66924 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_entropy": 1.1868632793426515, | |
| "eval_loss": 0.8353831171989441, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 255092783.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1588, | |
| "eval_samples_per_second": 88.164, | |
| "eval_steps_per_second": 31.487, | |
| "step": 66924 | |
| }, | |
| { | |
| "entropy": 1.1077099534769266, | |
| "epoch": 12.0, | |
| "grad_norm": 0.07568359375, | |
| "learning_rate": 2.3505256308586998e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 0.9999945211437954, | |
| "num_tokens": 278283036.0, | |
| "step": 73008 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_entropy": 1.1834761381149292, | |
| "eval_loss": 0.8413147330284119, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 278283036.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1558, | |
| "eval_samples_per_second": 89.841, | |
| "eval_steps_per_second": 32.086, | |
| "step": 73008 | |
| }, | |
| { | |
| "entropy": 1.1058313136701126, | |
| "epoch": 13.0, | |
| "grad_norm": 0.0159912109375, | |
| "learning_rate": 2.2886699607556072e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 301473289.0, | |
| "step": 79092 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_entropy": 1.1844575166702271, | |
| "eval_loss": 0.8453251123428345, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 301473289.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1604, | |
| "eval_samples_per_second": 87.297, | |
| "eval_steps_per_second": 31.177, | |
| "step": 79092 | |
| }, | |
| { | |
| "entropy": 1.106135592432809, | |
| "epoch": 14.0, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 2.2268142906525143e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 324663542.0, | |
| "step": 85176 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_entropy": 1.1829971313476562, | |
| "eval_loss": 0.8412962555885315, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 324663542.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1597, | |
| "eval_samples_per_second": 87.671, | |
| "eval_steps_per_second": 31.311, | |
| "step": 85176 | |
| }, | |
| { | |
| "entropy": 1.1052913846525214, | |
| "epoch": 15.0, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 2.1649586205494213e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 347853795.0, | |
| "step": 91260 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_entropy": 1.183160948753357, | |
| "eval_loss": 0.8468051552772522, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 347853795.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1548, | |
| "eval_samples_per_second": 90.433, | |
| "eval_steps_per_second": 32.298, | |
| "step": 91260 | |
| }, | |
| { | |
| "entropy": 1.1050384549973906, | |
| "epoch": 16.0, | |
| "grad_norm": 0.061279296875, | |
| "learning_rate": 2.1031029504463287e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 371044048.0, | |
| "step": 97344 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_entropy": 1.183035182952881, | |
| "eval_loss": 0.8468660712242126, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 371044048.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1566, | |
| "eval_samples_per_second": 89.398, | |
| "eval_steps_per_second": 31.928, | |
| "step": 97344 | |
| }, | |
| { | |
| "entropy": 1.1046691402685165, | |
| "epoch": 17.0, | |
| "grad_norm": 0.0390625, | |
| "learning_rate": 2.0412472803432358e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 394234301.0, | |
| "step": 103428 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_entropy": 1.1834327936172486, | |
| "eval_loss": 0.8357403874397278, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 394234301.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1667, | |
| "eval_samples_per_second": 83.986, | |
| "eval_steps_per_second": 29.995, | |
| "step": 103428 | |
| }, | |
| { | |
| "entropy": 1.1045704223289057, | |
| "epoch": 18.0, | |
| "grad_norm": 0.0034637451171875, | |
| "learning_rate": 1.979391610240143e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 417424554.0, | |
| "step": 109512 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_entropy": 1.1848695039749146, | |
| "eval_loss": 0.8536882400512695, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 417424554.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1648, | |
| "eval_samples_per_second": 84.942, | |
| "eval_steps_per_second": 30.337, | |
| "step": 109512 | |
| }, | |
| { | |
| "entropy": 1.1047596051495456, | |
| "epoch": 19.0, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 1.9175359401370503e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 440614807.0, | |
| "step": 115596 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_entropy": 1.1839102029800415, | |
| "eval_loss": 0.8556452989578247, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 440614807.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1644, | |
| "eval_samples_per_second": 85.182, | |
| "eval_steps_per_second": 30.422, | |
| "step": 115596 | |
| }, | |
| { | |
| "entropy": 1.1042878528436024, | |
| "epoch": 20.0, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 1.8556802700339577e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 463805060.0, | |
| "step": 121680 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_entropy": 1.183977437019348, | |
| "eval_loss": 0.8611962199211121, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 463805060.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1597, | |
| "eval_samples_per_second": 87.654, | |
| "eval_steps_per_second": 31.305, | |
| "step": 121680 | |
| }, | |
| { | |
| "entropy": 1.104654125815391, | |
| "epoch": 21.0, | |
| "grad_norm": 0.0478515625, | |
| "learning_rate": 1.793824599930865e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 486995313.0, | |
| "step": 127764 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_entropy": 1.1869411706924438, | |
| "eval_loss": 0.8467808961868286, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 486995313.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1555, | |
| "eval_samples_per_second": 90.033, | |
| "eval_steps_per_second": 32.155, | |
| "step": 127764 | |
| }, | |
| { | |
| "entropy": 1.1043770736785095, | |
| "epoch": 22.0, | |
| "grad_norm": 0.040771484375, | |
| "learning_rate": 1.7319689298277722e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 510185566.0, | |
| "step": 133848 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_entropy": 1.1831506013870239, | |
| "eval_loss": 0.8473494648933411, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 510185566.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.153, | |
| "eval_samples_per_second": 91.486, | |
| "eval_steps_per_second": 32.674, | |
| "step": 133848 | |
| }, | |
| { | |
| "entropy": 1.1042385918384003, | |
| "epoch": 23.0, | |
| "grad_norm": 0.00848388671875, | |
| "learning_rate": 1.6701132597246793e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 533375819.0, | |
| "step": 139932 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_entropy": 1.1850621223449707, | |
| "eval_loss": 0.8552234768867493, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 533375819.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1571, | |
| "eval_samples_per_second": 89.113, | |
| "eval_steps_per_second": 31.826, | |
| "step": 139932 | |
| }, | |
| { | |
| "entropy": 1.104157416445107, | |
| "epoch": 24.0, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 1.6082575896215867e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 556566072.0, | |
| "step": 146016 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_entropy": 1.186206865310669, | |
| "eval_loss": 0.8547914624214172, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 556566072.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1564, | |
| "eval_samples_per_second": 89.51, | |
| "eval_steps_per_second": 31.968, | |
| "step": 146016 | |
| }, | |
| { | |
| "entropy": 1.1040764231673201, | |
| "epoch": 25.0, | |
| "grad_norm": 0.0257568359375, | |
| "learning_rate": 1.5464019195184938e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 579756325.0, | |
| "step": 152100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_entropy": 1.1851461410522461, | |
| "eval_loss": 0.8527544736862183, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 579756325.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1568, | |
| "eval_samples_per_second": 89.303, | |
| "eval_steps_per_second": 31.894, | |
| "step": 152100 | |
| }, | |
| { | |
| "entropy": 1.1041492314165473, | |
| "epoch": 26.0, | |
| "grad_norm": 0.06494140625, | |
| "learning_rate": 1.4845462494154008e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 602946578.0, | |
| "step": 158184 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_entropy": 1.1824380159378052, | |
| "eval_loss": 0.8571881055831909, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 602946578.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1542, | |
| "eval_samples_per_second": 90.8, | |
| "eval_steps_per_second": 32.428, | |
| "step": 158184 | |
| }, | |
| { | |
| "entropy": 1.1045205590463953, | |
| "epoch": 27.0, | |
| "grad_norm": 0.05517578125, | |
| "learning_rate": 1.4226905793123081e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 626136831.0, | |
| "step": 164268 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_entropy": 1.1850265502929687, | |
| "eval_loss": 0.8606659173965454, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 626136831.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1562, | |
| "eval_samples_per_second": 89.64, | |
| "eval_steps_per_second": 32.014, | |
| "step": 164268 | |
| }, | |
| { | |
| "entropy": 1.1041291631968848, | |
| "epoch": 28.0, | |
| "grad_norm": 0.130859375, | |
| "learning_rate": 1.3608349092092153e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 649327084.0, | |
| "step": 170352 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_entropy": 1.186257529258728, | |
| "eval_loss": 0.854324996471405, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 649327084.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1578, | |
| "eval_samples_per_second": 88.707, | |
| "eval_steps_per_second": 31.681, | |
| "step": 170352 | |
| }, | |
| { | |
| "entropy": 1.1043956205885634, | |
| "epoch": 29.0, | |
| "grad_norm": 0.004913330078125, | |
| "learning_rate": 1.2989792391061224e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 672517337.0, | |
| "step": 176436 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_entropy": 1.185477328300476, | |
| "eval_loss": 0.8509793877601624, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 672517337.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1565, | |
| "eval_samples_per_second": 89.43, | |
| "eval_steps_per_second": 31.939, | |
| "step": 176436 | |
| }, | |
| { | |
| "entropy": 1.1042519219508695, | |
| "epoch": 30.0, | |
| "grad_norm": 0.12158203125, | |
| "learning_rate": 1.2371235690030298e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 695707590.0, | |
| "step": 182520 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_entropy": 1.1846540451049805, | |
| "eval_loss": 0.8473905920982361, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 695707590.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.155, | |
| "eval_samples_per_second": 90.317, | |
| "eval_steps_per_second": 32.256, | |
| "step": 182520 | |
| }, | |
| { | |
| "entropy": 1.104193890843244, | |
| "epoch": 31.0, | |
| "grad_norm": 0.028564453125, | |
| "learning_rate": 1.175267898899937e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 718897843.0, | |
| "step": 188604 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_entropy": 1.1850831031799316, | |
| "eval_loss": 0.8546524047851562, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 718897843.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.165, | |
| "eval_samples_per_second": 84.858, | |
| "eval_steps_per_second": 30.306, | |
| "step": 188604 | |
| }, | |
| { | |
| "entropy": 1.1044594693799148, | |
| "epoch": 32.0, | |
| "grad_norm": 0.01507568359375, | |
| "learning_rate": 1.1134122287968443e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 742088096.0, | |
| "step": 194688 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_entropy": 1.1852279663085938, | |
| "eval_loss": 0.8612449765205383, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 742088096.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1576, | |
| "eval_samples_per_second": 88.852, | |
| "eval_steps_per_second": 31.733, | |
| "step": 194688 | |
| }, | |
| { | |
| "entropy": 1.1042602037537028, | |
| "epoch": 33.0, | |
| "grad_norm": 0.049072265625, | |
| "learning_rate": 1.0515565586937514e-05, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 765278349.0, | |
| "step": 200772 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_entropy": 1.1850273609161377, | |
| "eval_loss": 0.854258120059967, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 765278349.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1569, | |
| "eval_samples_per_second": 89.235, | |
| "eval_steps_per_second": 31.869, | |
| "step": 200772 | |
| }, | |
| { | |
| "entropy": 1.1045278703633459, | |
| "epoch": 34.0, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 9.897008885906586e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 788468602.0, | |
| "step": 206856 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_entropy": 1.1850582599639892, | |
| "eval_loss": 0.850601077079773, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 788468602.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1584, | |
| "eval_samples_per_second": 88.362, | |
| "eval_steps_per_second": 31.558, | |
| "step": 206856 | |
| }, | |
| { | |
| "entropy": 1.104611723027991, | |
| "epoch": 35.0, | |
| "grad_norm": 0.033203125, | |
| "learning_rate": 9.278452184875659e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 811658855.0, | |
| "step": 212940 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_entropy": 1.1844811916351319, | |
| "eval_loss": 0.8560983538627625, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 811658855.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1556, | |
| "eval_samples_per_second": 89.96, | |
| "eval_steps_per_second": 32.129, | |
| "step": 212940 | |
| }, | |
| { | |
| "entropy": 1.104373964606579, | |
| "epoch": 36.0, | |
| "grad_norm": 0.0027313232421875, | |
| "learning_rate": 8.65989548384473e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 834849108.0, | |
| "step": 219024 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_entropy": 1.1842174053192138, | |
| "eval_loss": 0.8440762758255005, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 834849108.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1546, | |
| "eval_samples_per_second": 90.57, | |
| "eval_steps_per_second": 32.346, | |
| "step": 219024 | |
| }, | |
| { | |
| "entropy": 1.1041848019186717, | |
| "epoch": 37.0, | |
| "grad_norm": 0.06689453125, | |
| "learning_rate": 8.041338782813804e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 858039361.0, | |
| "step": 225108 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_entropy": 1.1856298685073852, | |
| "eval_loss": 0.8591736555099487, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 858039361.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1682, | |
| "eval_samples_per_second": 83.241, | |
| "eval_steps_per_second": 29.729, | |
| "step": 225108 | |
| }, | |
| { | |
| "entropy": 1.1047284195467801, | |
| "epoch": 38.0, | |
| "grad_norm": 0.00799560546875, | |
| "learning_rate": 7.4227820817828744e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 881229614.0, | |
| "step": 231192 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_entropy": 1.1842705011367798, | |
| "eval_loss": 0.8513706922531128, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 881229614.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1567, | |
| "eval_samples_per_second": 89.326, | |
| "eval_steps_per_second": 31.902, | |
| "step": 231192 | |
| }, | |
| { | |
| "entropy": 1.1047490643071627, | |
| "epoch": 39.0, | |
| "grad_norm": 0.025634765625, | |
| "learning_rate": 6.804225380751948e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 904419867.0, | |
| "step": 237276 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_entropy": 1.1818478107452393, | |
| "eval_loss": 0.8533371090888977, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 904419867.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1577, | |
| "eval_samples_per_second": 88.773, | |
| "eval_steps_per_second": 31.705, | |
| "step": 237276 | |
| }, | |
| { | |
| "entropy": 1.1046844204051334, | |
| "epoch": 40.0, | |
| "grad_norm": 0.03466796875, | |
| "learning_rate": 6.185668679721019e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 927610120.0, | |
| "step": 243360 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_entropy": 1.1807388305664062, | |
| "eval_loss": 0.8424216508865356, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 927610120.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.157, | |
| "eval_samples_per_second": 89.195, | |
| "eval_steps_per_second": 31.855, | |
| "step": 243360 | |
| }, | |
| { | |
| "entropy": 1.1044822435563995, | |
| "epoch": 41.0, | |
| "grad_norm": 0.0107421875, | |
| "learning_rate": 5.567111978690091e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 950800373.0, | |
| "step": 249444 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_entropy": 1.1833556652069093, | |
| "eval_loss": 0.8545311093330383, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 950800373.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1548, | |
| "eval_samples_per_second": 90.464, | |
| "eval_steps_per_second": 32.309, | |
| "step": 249444 | |
| }, | |
| { | |
| "entropy": 1.1045338336101571, | |
| "epoch": 42.0, | |
| "grad_norm": 0.028564453125, | |
| "learning_rate": 4.948555277659164e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 973990626.0, | |
| "step": 255528 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_entropy": 1.185540795326233, | |
| "eval_loss": 0.848736584186554, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 973990626.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1564, | |
| "eval_samples_per_second": 89.51, | |
| "eval_steps_per_second": 31.968, | |
| "step": 255528 | |
| }, | |
| { | |
| "entropy": 1.104217090569853, | |
| "epoch": 43.0, | |
| "grad_norm": 0.01708984375, | |
| "learning_rate": 4.329998576628236e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 997180879.0, | |
| "step": 261612 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_entropy": 1.1844725608825684, | |
| "eval_loss": 0.8541809320449829, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 997180879.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1566, | |
| "eval_samples_per_second": 89.418, | |
| "eval_steps_per_second": 31.935, | |
| "step": 261612 | |
| }, | |
| { | |
| "entropy": 1.1048875654276697, | |
| "epoch": 44.0, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 3.711441875597308e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 1020371132.0, | |
| "step": 267696 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_entropy": 1.1849848747253418, | |
| "eval_loss": 0.8501148223876953, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 1020371132.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.156, | |
| "eval_samples_per_second": 89.746, | |
| "eval_steps_per_second": 32.052, | |
| "step": 267696 | |
| }, | |
| { | |
| "entropy": 1.1046531631180834, | |
| "epoch": 45.0, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 3.09288517456638e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 1043561385.0, | |
| "step": 273780 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_entropy": 1.183105206489563, | |
| "eval_loss": 0.8626060485839844, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 1043561385.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1555, | |
| "eval_samples_per_second": 90.048, | |
| "eval_steps_per_second": 32.16, | |
| "step": 273780 | |
| }, | |
| { | |
| "entropy": 1.104986509314342, | |
| "epoch": 46.0, | |
| "grad_norm": 0.0206298828125, | |
| "learning_rate": 2.474328473535452e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 1066751638.0, | |
| "step": 279864 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_entropy": 1.1854262113571168, | |
| "eval_loss": 0.848030686378479, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 1066751638.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1552, | |
| "eval_samples_per_second": 90.181, | |
| "eval_steps_per_second": 32.207, | |
| "step": 279864 | |
| }, | |
| { | |
| "entropy": 1.1049189729509503, | |
| "epoch": 47.0, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 1.8557717725045243e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 1089941891.0, | |
| "step": 285948 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_entropy": 1.1844752788543702, | |
| "eval_loss": 0.8457481265068054, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 1089941891.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1547, | |
| "eval_samples_per_second": 90.504, | |
| "eval_steps_per_second": 32.323, | |
| "step": 285948 | |
| }, | |
| { | |
| "entropy": 1.1046031564865983, | |
| "epoch": 48.0, | |
| "grad_norm": 0.0223388671875, | |
| "learning_rate": 1.2372150714735964e-06, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 1113132144.0, | |
| "step": 292032 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_entropy": 1.1853480339050293, | |
| "eval_loss": 0.8546704649925232, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 1113132144.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1565, | |
| "eval_samples_per_second": 89.473, | |
| "eval_steps_per_second": 31.955, | |
| "step": 292032 | |
| }, | |
| { | |
| "entropy": 1.104954168171102, | |
| "epoch": 49.0, | |
| "grad_norm": 0.0157470703125, | |
| "learning_rate": 6.186583704426686e-07, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 1136322397.0, | |
| "step": 298116 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_entropy": 1.1826888084411622, | |
| "eval_loss": 0.8540498614311218, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 1136322397.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1583, | |
| "eval_samples_per_second": 88.419, | |
| "eval_steps_per_second": 31.578, | |
| "step": 298116 | |
| }, | |
| { | |
| "entropy": 1.104515764203454, | |
| "epoch": 50.0, | |
| "grad_norm": 0.126953125, | |
| "learning_rate": 1.0166941174078366e-10, | |
| "loss": 0.0002, | |
| "mean_token_accuracy": 1.0, | |
| "num_tokens": 1159512650.0, | |
| "step": 304200 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_entropy": 1.1829030990600586, | |
| "eval_loss": 0.8473049998283386, | |
| "eval_mean_token_accuracy": 0.8898225903511048, | |
| "eval_num_gold": 14, | |
| "eval_num_guess": 14, | |
| "eval_num_tokens": 1159512650.0, | |
| "eval_recall": 0.7143, | |
| "eval_runtime": 0.1545, | |
| "eval_samples_per_second": 90.586, | |
| "eval_steps_per_second": 32.352, | |
| "step": 304200 | |
| } | |
| ], | |
| "logging_steps": 0, | |
| "max_steps": 304200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.770251437965312e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |