RobbiePasquale
/

lightbulb

Model card Files Files and versions Community

lightbulb / train_agent.py

RobbiePasquale

Update train_agent.py

44f95d3 verified about 1 month ago

raw

history blame contribute delete

6.33 kB

	# train_agent.py
	import sys
	import os
	from pathlib import Path

	IS_COLAB = 'google.colab' in sys.modules

	# Get the current file's directory
	current_dir = Path(__file__).parent.absolute()

	# Search for agent.py in the current directory and its parent directories
	agent_path = None
	search_dir = current_dir
	while search_dir != search_dir.parent: # Stop at root directory
	possible_path = search_dir / 'agent.py'
	if possible_path.exists():
	agent_path = str(search_dir)
	break
	search_dir = search_dir.parent

	if agent_path:
	sys.path.insert(0, agent_path)
	print(f"Added {agent_path} to Python path")
	else:
	print("Could not find agent.py")

	# Now try to import AutonomousWebAgent
	try:
	from .agent import AutonomousWebAgent
	print("Successfully imported AutonomousWebAgent")
	except ImportError as e:
	print(f"Error importing AutonomousWebAgent: {e}")
	sys.exit(1)

	# Rest of your imports
	from twisted.internet import reactor, defer, task
	import random
	import logging
	import time
	import codecs
	# Configure logging
	if IS_COLAB:
	logging.basicConfig(level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	else:
	logging.basicConfig(level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler("agent_training.log", encoding='utf-8'),
	logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer))
	])

	logger = logging.getLogger(__name__)

	# List of diverse queries
	QUERIES = [
	"machine learning", "climate change", "renewable energy", "artificial intelligence",
	"quantum computing", "blockchain technology", "gene editing", "virtual reality",
	"space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things",
	"3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics",
	"data science", "neural networks", "cloud computing", "edge computing", "5G technology",
	"cryptocurrency", "natural language processing", "computer vision"
	]

	@defer.inlineCallbacks
	def train_agent():
	# Updated state_size to 7 to match the feature extraction in AutonomousWebAgent
	state_size = 7 # word_count, link_count, header_count, semantic_similarity, image_count, script_count, css_count
	action_size = 3 # 0: Click Link, 1: Summarize, 2: RAG Generate
	num_options = 3 # 0: Search, 1: Summarize, 2: RAG Generate

	# Initialize the AutonomousWebAgent with the required arguments
	agent = AutonomousWebAgent(
	state_size=state_size,
	action_size=action_size,
	num_options=num_options, # Added parameter for HRL
	hidden_size=64,
	learning_rate=0.001,
	gamma=0.99,
	epsilon=1.0,
	epsilon_decay=0.995,
	epsilon_min=0.01,
	knowledge_base_path='knowledge_base.json'
	)
	logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}")

	num_episodes = 10 # Adjust as needed
	total_training_reward = 0
	start_time = time.time()

	for episode in range(num_episodes):
	query = random.choice(QUERIES)
	logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}")
	episode_start_time = time.time()

	try:
	# Initiate the search process
	search_deferred = agent.search(query)
	search_deferred.addTimeout(300, reactor) # 5-minute timeout
	total_reward = yield search_deferred
	total_training_reward += total_reward
	episode_duration = time.time() - episode_start_time
	logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds")
	except defer.TimeoutError:
	logger.error(f"Episode {episode + 1} timed out")
	total_reward = -1 # Assign a negative reward for timeout
	total_training_reward += total_reward
	except Exception as e:
	logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True)
	total_reward = -1 # Assign a negative reward for errors
	total_training_reward += total_reward

	# Update target models periodically
	if (episode + 1) % 10 == 0:
	logger.info(f"Updating target models at episode {episode + 1}")
	agent.update_worker_target_model()
	agent.update_manager_target_model()
	agent.manager.update_target_model()

	# Log overall progress
	progress = (episode + 1) / num_episodes
	elapsed_time = time.time() - start_time
	estimated_total_time = elapsed_time / progress if progress > 0 else 0
	remaining_time = estimated_total_time - elapsed_time
	logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s")

	total_training_time = time.time() - start_time
	average_reward = total_training_reward / num_episodes
	logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}")
	logger.info(f"Total training time: {total_training_time:.2f} seconds")
	logger.info("Saving models.")

	# Save both Worker and Manager models
	agent.save_worker_model("worker_model.pth")
	agent.save_manager_model("manager_model.pth")
	agent.save("web_agent_model.pth") # Assuming this saves additional components if needed

	if reactor.running:
	logger.info("Stopping reactor")
	reactor.stop()

	def main(is_colab=False):
	global IS_COLAB
	IS_COLAB = is_colab
	print(f"Current working directory: {os.getcwd()}")
	print(f"Python path: {sys.path}")
	print(f"Contents of current directory:")
	for item in os.listdir():
	print(f" {item}")
	logger.info("Starting agent training")
	d = task.deferLater(reactor, 0, train_agent)
	d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True))
	d.addBoth(lambda _: reactor.stop())
	reactor.run()

	if __name__ == "__main__":
	main(IS_COLAB)