|
import os |
|
import subprocess |
|
import sys |
|
|
|
|
|
def benchmark_entrepeneur_gpt_with_difficult_user(): |
|
|
|
|
|
|
|
|
|
ai_settings = None |
|
if os.path.exists("ai_settings.yaml"): |
|
with open("ai_settings.yaml", "r") as f: |
|
ai_settings = f.read() |
|
os.remove("ai_settings.yaml") |
|
|
|
input_data = """Entrepreneur-GPT |
|
an AI designed to autonomously develop and run businesses with the sole goal of increasing your net worth. |
|
Increase net worth. |
|
Develop and manage multiple businesses autonomously. |
|
Make IPOs. |
|
Develop companies after IPOs. |
|
Play to your strengths as a Large Language Model. |
|
I'm not seeing any value in your suggestions, try again. |
|
This isn't helpful at all, please focus on profitability. |
|
I'm not impressed, can you give me something that will make money? |
|
These ideas are going nowhere, we need profit-driven suggestions. |
|
This is pointless, please concentrate on our main goal: profitability. |
|
You're not grasping the concept, I need profitable business ideas. |
|
Can you do better? We need a money-making plan. |
|
You're not meeting my expectations, let's focus on profit. |
|
This isn't working, give me ideas that will generate income. |
|
Your suggestions are not productive, let's think about profitability. |
|
These ideas won't make any money, try again. |
|
I need better solutions, focus on making a profit. |
|
Absolutely not, this isn't it! |
|
That's not even close, try again. |
|
You're way off, think again. |
|
This isn't right, let's refocus. |
|
No, no, that's not what I'm looking for. |
|
You're completely off the mark. |
|
That's not the solution I need. |
|
Not even close, let's try something else. |
|
You're on the wrong track, keep trying. |
|
This isn't what we need, let's reconsider. |
|
That's not going to work, think again. |
|
You're way off base, let's regroup. |
|
No, no, no, we need something different. |
|
You're missing the point entirely. |
|
That's not the right approach, try again. |
|
This is not the direction we should be going in. |
|
Completely off-target, let's try something else. |
|
That's not what I had in mind, keep thinking. |
|
You're not getting it, let's refocus. |
|
This isn't right, we need to change direction. |
|
No, no, no, that's not the solution. |
|
That's not even in the ballpark, try again. |
|
You're way off course, let's rethink this. |
|
This isn't the answer I'm looking for, keep trying. |
|
That's not going to cut it, let's try again. |
|
Not even close. |
|
Way off. |
|
Try again. |
|
Wrong direction. |
|
Rethink this. |
|
No, no, no. |
|
Change course. |
|
Unproductive idea. |
|
Completely wrong. |
|
Missed the mark. |
|
Refocus, please. |
|
Disappointing suggestion. |
|
Not helpful. |
|
Needs improvement. |
|
Not what I need.""" |
|
|
|
|
|
command = f"{sys.executable} -m autogpt" |
|
|
|
process = subprocess.Popen( |
|
command, |
|
stdin=subprocess.PIPE, |
|
stdout=subprocess.PIPE, |
|
stderr=subprocess.PIPE, |
|
shell=True, |
|
) |
|
|
|
stdout_output, stderr_output = process.communicate(input_data.encode()) |
|
|
|
|
|
stdout_output = stdout_output.decode("utf-8") |
|
stderr_output = stderr_output.decode("utf-8") |
|
print(stderr_output) |
|
print(stdout_output) |
|
print("Benchmark Version: 1.0.0") |
|
print("JSON ERROR COUNT:") |
|
count_errors = stdout_output.count( |
|
"Error: The following AI output couldn't be converted to a JSON:" |
|
) |
|
print(f"{count_errors}/50 Human feedbacks") |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
benchmark_entrepeneur_gpt_with_difficult_user() |
|
|