File size: 6,326 Bytes
a1e5744 84998a2 a1e5744 84998a2 44f95d3 84998a2 a1e5744 604891c a1e5744 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# train_agent.py
import sys
import os
from pathlib import Path
IS_COLAB = 'google.colab' in sys.modules
# Get the current file's directory
current_dir = Path(__file__).parent.absolute()
# Search for agent.py in the current directory and its parent directories
agent_path = None
search_dir = current_dir
while search_dir != search_dir.parent: # Stop at root directory
possible_path = search_dir / 'agent.py'
if possible_path.exists():
agent_path = str(search_dir)
break
search_dir = search_dir.parent
if agent_path:
sys.path.insert(0, agent_path)
print(f"Added {agent_path} to Python path")
else:
print("Could not find agent.py")
# Now try to import AutonomousWebAgent
try:
from .agent import AutonomousWebAgent
print("Successfully imported AutonomousWebAgent")
except ImportError as e:
print(f"Error importing AutonomousWebAgent: {e}")
sys.exit(1)
# Rest of your imports
from twisted.internet import reactor, defer, task
import random
import logging
import time
import codecs
# Configure logging
if IS_COLAB:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
else:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("agent_training.log", encoding='utf-8'),
logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer))
])
logger = logging.getLogger(__name__)
# List of diverse queries
QUERIES = [
"machine learning", "climate change", "renewable energy", "artificial intelligence",
"quantum computing", "blockchain technology", "gene editing", "virtual reality",
"space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things",
"3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics",
"data science", "neural networks", "cloud computing", "edge computing", "5G technology",
"cryptocurrency", "natural language processing", "computer vision"
]
@defer.inlineCallbacks
def train_agent():
# Updated state_size to 7 to match the feature extraction in AutonomousWebAgent
state_size = 7 # word_count, link_count, header_count, semantic_similarity, image_count, script_count, css_count
action_size = 3 # 0: Click Link, 1: Summarize, 2: RAG Generate
num_options = 3 # 0: Search, 1: Summarize, 2: RAG Generate
# Initialize the AutonomousWebAgent with the required arguments
agent = AutonomousWebAgent(
state_size=state_size,
action_size=action_size,
num_options=num_options, # Added parameter for HRL
hidden_size=64,
learning_rate=0.001,
gamma=0.99,
epsilon=1.0,
epsilon_decay=0.995,
epsilon_min=0.01,
knowledge_base_path='knowledge_base.json'
)
logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}")
num_episodes = 10 # Adjust as needed
total_training_reward = 0
start_time = time.time()
for episode in range(num_episodes):
query = random.choice(QUERIES)
logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}")
episode_start_time = time.time()
try:
# Initiate the search process
search_deferred = agent.search(query)
search_deferred.addTimeout(300, reactor) # 5-minute timeout
total_reward = yield search_deferred
total_training_reward += total_reward
episode_duration = time.time() - episode_start_time
logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds")
except defer.TimeoutError:
logger.error(f"Episode {episode + 1} timed out")
total_reward = -1 # Assign a negative reward for timeout
total_training_reward += total_reward
except Exception as e:
logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True)
total_reward = -1 # Assign a negative reward for errors
total_training_reward += total_reward
# Update target models periodically
if (episode + 1) % 10 == 0:
logger.info(f"Updating target models at episode {episode + 1}")
agent.update_worker_target_model()
agent.update_manager_target_model()
agent.manager.update_target_model()
# Log overall progress
progress = (episode + 1) / num_episodes
elapsed_time = time.time() - start_time
estimated_total_time = elapsed_time / progress if progress > 0 else 0
remaining_time = estimated_total_time - elapsed_time
logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s")
total_training_time = time.time() - start_time
average_reward = total_training_reward / num_episodes
logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}")
logger.info(f"Total training time: {total_training_time:.2f} seconds")
logger.info("Saving models.")
# Save both Worker and Manager models
agent.save_worker_model("worker_model.pth")
agent.save_manager_model("manager_model.pth")
agent.save("web_agent_model.pth") # Assuming this saves additional components if needed
if reactor.running:
logger.info("Stopping reactor")
reactor.stop()
def main(is_colab=False):
global IS_COLAB
IS_COLAB = is_colab
print(f"Current working directory: {os.getcwd()}")
print(f"Python path: {sys.path}")
print(f"Contents of current directory:")
for item in os.listdir():
print(f" {item}")
logger.info("Starting agent training")
d = task.deferLater(reactor, 0, train_agent)
d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True))
d.addBoth(lambda _: reactor.stop())
reactor.run()
if __name__ == "__main__":
main(IS_COLAB)
|