update
Browse files- evaluation/intro.txt +2 -1
evaluation/intro.txt
CHANGED
@@ -19,6 +19,8 @@ In most papers, 200 candidate program completions are sampled, and pass@1, pass@
|
|
19 |
We can load HumanEval dataset and pass@k metric from the hub:
|
20 |
|
21 |
```python
|
|
|
|
|
22 |
human_eval = load_dataset("openai_humaneval")
|
23 |
code_eval_metric = load_metric("code_eval")
|
24 |
```
|
@@ -26,7 +28,6 @@ code_eval_metric = load_metric("code_eval")
|
|
26 |
We can easily compute the pass@k for a problem that asks for the implementation of a function that sums two integers:
|
27 |
|
28 |
```python
|
29 |
-
from datasets import load_metric
|
30 |
test_cases = ["assert add(2,3)==5"]
|
31 |
candidates = [["def add(a,b): return a*b", "def add(a, b): return a+b"]]
|
32 |
pass_at_k, results = code_eval_metric.compute(references=test_cases, predictions=candidates, k=[1, 2])
|
|
|
19 |
We can load HumanEval dataset and pass@k metric from the hub:
|
20 |
|
21 |
```python
|
22 |
+
from datasets import load_dataset, load_metric
|
23 |
+
|
24 |
human_eval = load_dataset("openai_humaneval")
|
25 |
code_eval_metric = load_metric("code_eval")
|
26 |
```
|
|
|
28 |
We can easily compute the pass@k for a problem that asks for the implementation of a function that sums two integers:
|
29 |
|
30 |
```python
|
|
|
31 |
test_cases = ["assert add(2,3)==5"]
|
32 |
candidates = [["def add(a,b): return a*b", "def add(a, b): return a+b"]]
|
33 |
pass_at_k, results = code_eval_metric.compute(references=test_cases, predictions=candidates, k=[1, 2])
|