@misc{gdm, title={Scaling LLM Test-Time Compute Optimally can be More Effective than Scaling Model Parameters}, author={Charlie Snell and Jaehoon Lee and Kelvin Xu and Aviral Kumar}, year={2024}, eprint={2408.03314}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2408.03314}, } @misc{prm, title={Solving math word problems with process- and outcome-based feedback}, author={Jonathan Uesato and Nate Kushman and Ramana Kumar and Francis Song and Noah Siegel and Lisa Wang and Antonia Creswell and Geoffrey Irving and Irina Higgins}, year={2022}, eprint={2211.14275}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2211.14275}, } @misc{lightman2023letsverifystepstep, title={Let's Verify Step by Step}, author={Hunter Lightman and Vineet Kosaraju and Yura Burda and Harri Edwards and Bowen Baker and Teddy Lee and Jan Leike and John Schulman and Ilya Sutskever and Karl Cobbe}, year={2023}, eprint={2305.20050}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2305.20050}, } @misc{dbs, title={Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models}, author={Ashwin K Vijayakumar and Michael Cogswell and Ramprasath R. Selvaraju and Qing Sun and Stefan Lee and David Crandall and Dhruv Batra}, year={2018}, eprint={1610.02424}, archivePrefix={arXiv}, primaryClass={cs.AI}, url={https://arxiv.org/abs/1610.02424}, } @misc{repairtrees, title={Is Self-Repair a Silver Bullet for Code Generation?}, author={Theo X. Olausson and Jeevana Priya Inala and Chenglong Wang and Jianfeng Gao and Armando Solar-Lezama}, year={2024}, eprint={2306.09896}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/2306.09896}, } @misc{math, title={Measuring Mathematical Problem Solving With the MATH Dataset}, author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt}, year={2021}, eprint={2103.03874}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2103.03874}, } @misc{cot, title={Self-Consistency Improves Chain of Thought Reasoning in Language Models}, author={Xuezhi Wang and Jason Wei and Dale Schuurmans and Quoc Le and Ed Chi and Sharan Narang and Aakanksha Chowdhery and Denny Zhou}, year={2023}, eprint={2203.11171}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/2203.11171}, } @misc{minerva, title={Solving Quantitative Reasoning Problems with Language Models}, author={Aitor Lewkowycz and Anders Andreassen and David Dohan and Ethan Dyer and Henryk Michalewski and Vinay Ramasesh and Ambrose Slone and Cem Anil and Imanol Schlag and Theo Gutman-Solo and Yuhuai Wu and Behnam Neyshabur and Guy Gur-Ari and Vedant Misra}, year={2022}, eprint={2206.14858}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/2206.14858}, } @misc{scalinglaws, title={Scaling Laws for Neural Language Models}, author={Jared Kaplan and Sam McCandlish and Tom Henighan and Tom B. Brown and Benjamin Chess and Rewon Child and Scott Gray and Alec Radford and Jeffrey Wu and Dario Amodei}, year={2020}, eprint={2001.08361}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2001.08361}, } @misc{codex, title={Evaluating Large Language Models Trained on Code}, author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba}, year={2021}, eprint={2107.03374}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2107.03374}, } @misc{processbench, title={ProcessBench: Identifying Process Errors in Mathematical Reasoning}, author={Chujie Zheng and Zhenru Zhang and Beichen Zhang and Runji Lin and Keming Lu and Bowen Yu and Dayiheng Liu and Jingren Zhou and Junyang Lin}, year={2024}, eprint={2412.06559}, archivePrefix={arXiv}, primaryClass={cs.AI}, url={https://arxiv.org/abs/2412.06559}, } @misc{score, title={Training Language Models to Self-Correct via Reinforcement Learning}, author={Aviral Kumar and Vincent Zhuang and Rishabh Agarwal and Yi Su and John D Co-Reyes and Avi Singh and Kate Baumli and Shariq Iqbal and Colton Bishop and Rebecca Roelofs and Lei M Zhang and Kay McKinney and Disha Shrivastava and Cosmin Paduraru and George Tucker and Doina Precup and Feryal Behbahani and Aleksandra Faust}, year={2024}, eprint={2409.12917}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2409.12917}, } @misc{rest, title={Reinforced Self-Training (ReST) for Language Modeling}, author={Caglar Gulcehre and Tom Le Paine and Srivatsan Srinivasan and Ksenia Konyushkova and Lotte Weerts and Abhishek Sharma and Aditya Siddhant and Alex Ahern and Miaosen Wang and Chenjie Gu and Wolfgang Macherey and Arnaud Doucet and Orhan Firat and Nando de Freitas}, year={2023}, eprint={2308.08998}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/2308.08998}, } @misc{vstar, title={V-STaR: Training Verifiers for Self-Taught Reasoners}, author={Arian Hosseini and Xingdi Yuan and Nikolay Malkin and Aaron Courville and Alessandro Sordoni and Rishabh Agarwal}, year={2024}, eprint={2402.06457}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2402.06457}, }