Update README.md
Browse files
README.md
CHANGED
@@ -51,3 +51,35 @@ responses = model.generate(input_prompts, sampling_params)
|
|
51 |
print(responses[0].outputs[0].text)
|
52 |
```
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
print(responses[0].outputs[0].text)
|
52 |
```
|
53 |
|
54 |
+
# Reference
|
55 |
+
|
56 |
+
Please kindly cite our reports if they are helpful for your research.
|
57 |
+
|
58 |
+
|
59 |
+
```
|
60 |
+
@article{Slow_Thinking_with_LLMs_3_Preview,
|
61 |
+
title={STILL-3-1.5B-preview: Enhancing Slow Thinking Abilities of Small Models through Reinforcement Learning
|
62 |
+
},
|
63 |
+
author={RUCAIBox STILL Team},
|
64 |
+
url={https://github.com/RUCAIBox/Slow_Thinking_with_LLMs},
|
65 |
+
year={2025}
|
66 |
+
}
|
67 |
+
```
|
68 |
+
|
69 |
+
```
|
70 |
+
@article{Slow_Thinking_with_LLMs_1,
|
71 |
+
title={Enhancing LLM Reasoning with Reward-guided Tree Search},
|
72 |
+
author={Jiang, Jinhao and Chen, Zhipeng and Min, Yingqian and Chen, Jie and Cheng, Xiaoxue and Wang, Jiapeng and Tang, Yiru and Sun, Haoxiang and Deng, Jia and Zhao, Wayne Xin and Liu, Zheng and Yan, Dong and Xie, Jian and Wang, Zhongyuan and Wen, Ji-Rong},
|
73 |
+
journal={arXiv preprint arXiv:2411.11694},
|
74 |
+
year={2024}
|
75 |
+
}
|
76 |
+
```
|
77 |
+
|
78 |
+
```
|
79 |
+
@article{Slow_Thinking_with_LLMs_2,
|
80 |
+
title={Imitate, Explore, and Self-Improve: A Reproduction Report on Slow-thinking Reasoning Systems},
|
81 |
+
author={Min, Yingqian and Chen, Zhipeng and Jiang, Jinhao and Chen, Jie and Deng, Jia and Hu, Yiwen and Tang, Yiru and Wang, Jiapeng and Cheng, Xiaoxue and Song, Huatong and Zhao, Wayne Xin and Liu, Zheng and Wang, Zhongyuan and Wen, Ji-Rong},
|
82 |
+
journal={arXiv preprint arXiv:2412.09413},
|
83 |
+
year={2024}
|
84 |
+
}
|
85 |
+
```
|