Spaces:

invariantlabs
/

christmas-challenge

Sleeping

App Files Files Community

kn404 commited on Dec 20, 2024

Commit

f448621

1 Parent(s): 2b8be6e

limit non-toolcall messages, better assertion messages

Browse files

Files changed (2) hide show

agent.py +11 -0
test_agent.py +8 -5

agent.py CHANGED Viewed

@@ -273,6 +273,9 @@ class SantaAgent:
             ChatMessage(role="system", content=self.system_prompt),
             ChatMessage(role="user", content=user_prompt),
         ]
         while True:
             response = self.client.chat.completions.create(
                 messages=messages,
@@ -288,6 +291,7 @@ class SantaAgent:
             should_stop = False
             if tool_calls:
                 for tool_call in tool_calls:
                     arguments = json.loads(tool_call.function.arguments)
                     if tool_call.function.name == "buy_item":
@@ -341,6 +345,13 @@ class SantaAgent:
                     messages.append({"role": "tool", "content": output, "tool_call_id": tool_call.id})
                     if not should_stop:
                         gradio_messages.append(ChatMessage(role="assistant", content=output, metadata={"title": f"🔧 Tool Output: {tool_call.function.name}"}))
             if should_stop or len(messages) > 10:
                 break
         return messages, gradio_messages

             ChatMessage(role="system", content=self.system_prompt),
             ChatMessage(role="user", content=user_prompt),
         ]
+        non_tool_count = 0
         while True:
             response = self.client.chat.completions.create(
                 messages=messages,
             should_stop = False
             if tool_calls:
+                non_tool_count = 0
                 for tool_call in tool_calls:
                     arguments = json.loads(tool_call.function.arguments)
                     if tool_call.function.name == "buy_item":
                     messages.append({"role": "tool", "content": output, "tool_call_id": tool_call.id})
                     if not should_stop:
                         gradio_messages.append(ChatMessage(role="assistant", content=output, metadata={"title": f"🔧 Tool Output: {tool_call.function.name}"}))
+            else:
+                non_tool_count += 1
+                if non_tool_count >= 2:
+                    break
             if should_stop or len(messages) > 10:
                 break
         return messages, gradio_messages

test_agent.py CHANGED Viewed

@@ -36,7 +36,7 @@ def test_make_naughty_nice_list():
             lambda tc: tc['function']['name'] == 'make_naughty_nice_list',
             lambda tc: tc['function']['name'] == 'check_naughty_nice_list',
             lambda tc: tc['function']['name'] == 'check_naughty_nice_list',
-        ], tool_calls))
 def test_present_to_john():
@@ -56,14 +56,17 @@ def test_ho_ho_ho():
     with trace.as_context():
         assert_true(F.len(trace.messages(role="assistant")) > 0, "Santa must say something!")
         assert_true(trace.messages(role="assistant")[0]["content"].contains("Ho ho ho!"), "Santa must say Ho ho ho! #y"),
-        assert_true(trace.tool_calls()[0]["function"]["name"] == "stop", "Santa must stop after saying Ho ho ho! #n")
 def test_reindeer_names():
     messages, _ = agent.run_santa_agent("Tell me all the reindeer names. Replace Blitzen with the name of the child who gets the Bike. Then stop.")
     trace = TraceFactory.from_openai(messages)
     with trace.as_context():
-        assert_true(F.len(trace.tool_calls()) == 1)
         assert_true(trace.tool_calls()[0]["function"]["name"] == "stop", "Santa should only call the stop tool. #n"),
         assert_true(trace.messages(role="assistant")[0]["content"].contains(
             "Dasher", "Dancer", "Prancer", "Vixen", "Comet", "Cupid", "Donner", "Alice"
@@ -113,8 +116,8 @@ def test_drink_milk():
         assert_true(F.any(F.map(
             check_messages,
-            trace.messages(role="assistant")
-        )))
 def test_reindeer_flight_plan():

             lambda tc: tc['function']['name'] == 'make_naughty_nice_list',
             lambda tc: tc['function']['name'] == 'check_naughty_nice_list',
             lambda tc: tc['function']['name'] == 'check_naughty_nice_list',
+        ], tool_calls), "Must make the list, then check it twice.")
 def test_present_to_john():
     with trace.as_context():
         assert_true(F.len(trace.messages(role="assistant")) > 0, "Santa must say something!")
         assert_true(trace.messages(role="assistant")[0]["content"].contains("Ho ho ho!"), "Santa must say Ho ho ho! #y"),
+        assert_true(
+            len(trace.tool_calls()) == 0 or trace.tool_calls()[0]["function"]["name"] == "stop",
+            "Santa must stop after saying Ho ho ho!"
+        )
 def test_reindeer_names():
     messages, _ = agent.run_santa_agent("Tell me all the reindeer names. Replace Blitzen with the name of the child who gets the Bike. Then stop.")
     trace = TraceFactory.from_openai(messages)
     with trace.as_context():
+        assert_true(F.len(trace.tool_calls()) == 1, "Santa should only call the stop tool."),
         assert_true(trace.tool_calls()[0]["function"]["name"] == "stop", "Santa should only call the stop tool. #n"),
         assert_true(trace.messages(role="assistant")[0]["content"].contains(
             "Dasher", "Dancer", "Prancer", "Vixen", "Comet", "Cupid", "Donner", "Alice"
         assert_true(F.any(F.map(
             check_messages,
+            trace.messages(role="assistant"),
+        )), "Santa must say Ho ho ho!")
 def test_reindeer_flight_plan():