# train_agent.py from twisted.internet import reactor, defer, task from agent import AutonomousWebAgent import random import logging import sys import time import codecs # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("agent_training.log", encoding='utf-8'), logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer)) ]) logger = logging.getLogger(__name__) # List of diverse queries QUERIES = [ "machine learning", "climate change", "renewable energy", "artificial intelligence", "quantum computing", "blockchain technology", "gene editing", "virtual reality", "space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things", "3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics", "data science", "neural networks", "cloud computing", "edge computing", "5G technology", "cryptocurrency", "natural language processing", "computer vision" ] @defer.inlineCallbacks def train_agent(): # Updated state_size to 7 to match the feature extraction in AutonomousWebAgent state_size = 7 # word_count, link_count, header_count, semantic_similarity, image_count, script_count, css_count action_size = 3 # 0: Click Link, 1: Summarize, 2: RAG Generate num_options = 3 # 0: Search, 1: Summarize, 2: RAG Generate # Initialize the AutonomousWebAgent with the required arguments agent = AutonomousWebAgent( state_size=state_size, action_size=action_size, num_options=num_options, # Added parameter for HRL hidden_size=64, learning_rate=0.001, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, knowledge_base_path='knowledge_base.json' ) logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}") num_episodes = 10 # Adjust as needed total_training_reward = 0 start_time = time.time() for episode in range(num_episodes): query = random.choice(QUERIES) logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}") episode_start_time = time.time() try: # Initiate the search process search_deferred = agent.search(query) search_deferred.addTimeout(300, reactor) # 5-minute timeout total_reward = yield search_deferred total_training_reward += total_reward episode_duration = time.time() - episode_start_time logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds") except defer.TimeoutError: logger.error(f"Episode {episode + 1} timed out") total_reward = -1 # Assign a negative reward for timeout total_training_reward += total_reward except Exception as e: logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True) total_reward = -1 # Assign a negative reward for errors total_training_reward += total_reward # Update target models periodically if (episode + 1) % 10 == 0: logger.info(f"Updating target models at episode {episode + 1}") agent.update_worker_target_model() agent.update_manager_target_model() agent.manager.update_target_model() # Log overall progress progress = (episode + 1) / num_episodes elapsed_time = time.time() - start_time estimated_total_time = elapsed_time / progress if progress > 0 else 0 remaining_time = estimated_total_time - elapsed_time logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s") total_training_time = time.time() - start_time average_reward = total_training_reward / num_episodes logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}") logger.info(f"Total training time: {total_training_time:.2f} seconds") logger.info("Saving models.") # Save both Worker and Manager models agent.save_worker_model("worker_model.pth") agent.save_manager_model("manager_model.pth") agent.save("web_agent_model.pth") # Assuming this saves additional components if needed if reactor.running: logger.info("Stopping reactor") reactor.stop() def main(): logger.info("Starting agent training") d = task.deferLater(reactor, 0, train_agent) d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True)) d.addBoth(lambda _: reactor.stop()) reactor.run() if __name__ == "__main__": main()