feat: Add TurnOrchestrator for multi-turn LLM simulation (addresses #156)

TurnOrchestrator: Coordinates multi-agent turn-based simulation
- Perspective switching with FOV layer updates
- Screenshot capture per agent per turn
- Pluggable LLM query callback
- SimulationStep/SimulationLog for full context capture
- JSON save/load with replay support

New demos:
- 2_integrated_demo.py: WorldGraph + action execution integration
- 3_multi_turn_demo.py: Complete multi-turn simulation with logging

Updated 1_multi_agent_demo.py with action parser/executor integration.

Tested with Qwen2.5-VL-32B: agents successfully navigate based on
WorldGraph descriptions and VLM visual input.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
John McCardle 2025-12-14 12:53:48 -05:00
parent 2890528e21
commit de739037f0
4 changed files with 1039 additions and 0 deletions

View File

@ -22,6 +22,9 @@ import base64
import os
import random
from action_parser import parse_action
from action_executor import ActionExecutor
# VLLM configuration
VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions"
SCREENSHOT_DIR = "/tmp/vllm_multi_agent"
@ -284,6 +287,9 @@ def run_demo():
# Setup scene
grid, fov_layer, agents, rat = setup_scene()
# Create action executor
executor = ActionExecutor(grid)
# Cycle through each agent's perspective
for i, agent in enumerate(agents):
print(f"\n{'='*70}")
@ -319,6 +325,21 @@ def run_demo():
print(f"\n{agent.name}'s Response:\n{response}")
print()
# Parse and execute action
print(f"--- Action Execution ---")
action = parse_action(response)
print(f"Parsed action: {action.type.value} {action.args}")
result = executor.execute(agent, action)
if result.success:
print(f"SUCCESS: {result.message}")
if result.new_position:
# Update perspective after movement
switch_perspective(grid, fov_layer, agent)
mcrfpy.step(0.016)
else:
print(f"FAILED: {result.message}")
print("\n" + "=" * 70)
print("Multi-Agent Demo Complete")
print("=" * 70)

View File

@ -0,0 +1,399 @@
#!/usr/bin/env python3
"""
Integrated VLLM Demo
====================
Combines:
- WorldGraph for structured room descriptions (#155)
- Action parsing and execution (#156)
- Per-agent perspective rendering
This is the foundation for multi-turn simulation.
"""
import sys
import os
# Add the vllm_demo directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import mcrfpy
from mcrfpy import automation
import requests
import base64
from world_graph import (
WorldGraph, Room, Door, WorldObject, Direction, AgentInfo,
create_two_room_scenario
)
from action_parser import parse_action, ActionType
from action_executor import ActionExecutor
# Configuration
VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions"
SCREENSHOT_DIR = "/tmp/vllm_integrated"
# Sprite constants
FLOOR_TILE = 0
WALL_TILE = 40
WIZARD_SPRITE = 84
KNIGHT_SPRITE = 96
class Agent:
"""Agent wrapper with WorldGraph integration."""
def __init__(self, name: str, display_name: str, entity, world: WorldGraph):
self.name = name
self.display_name = display_name
self.entity = entity
self.world = world
self.message_history = [] # For speech system (future)
@property
def pos(self) -> tuple:
return (int(self.entity.pos[0]), int(self.entity.pos[1]))
@property
def current_room(self) -> str:
"""Get the name of the room this agent is in."""
room = self.world.room_at(*self.pos)
return room.name if room else None
def get_context(self, visible_agents: list) -> dict:
"""
Build complete context for LLM query.
Args:
visible_agents: List of Agent objects visible to this agent
Returns:
Dict with location description, available actions, messages
"""
room_name = self.current_room
# Convert Agent objects to AgentInfo for WorldGraph
agent_infos = [
AgentInfo(
name=a.name,
display_name=a.display_name,
position=a.pos,
is_player=(a.name == self.name)
)
for a in visible_agents
]
return {
"location": self.world.describe_room(
room_name,
visible_agents=agent_infos,
observer_name=self.name
),
"available_actions": self.world.get_available_actions(room_name),
"recent_messages": self.message_history[-5:],
}
def file_to_base64(file_path):
"""Convert image file to base64 string."""
with open(file_path, 'rb') as f:
return base64.b64encode(f.read()).decode('utf-8')
def llm_chat_completion(messages: list):
"""Send chat completion request to local LLM."""
try:
response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60)
return response.json()
except requests.exceptions.RequestException as e:
return {"error": str(e)}
def message_with_image(text, image_path):
"""Create a message with embedded image for vision models."""
image_data = file_to_base64(image_path)
return {
"role": "user",
"content": [
{"type": "text", "text": text},
{"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}}
]
}
def setup_scene_from_world(world: WorldGraph):
"""
Create McRogueFace scene from WorldGraph.
Carves out rooms and places doors based on WorldGraph data.
"""
mcrfpy.createScene("integrated_demo")
mcrfpy.setScene("integrated_demo")
ui = mcrfpy.sceneUI("integrated_demo")
texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16)
# Create grid sized for the world (with margin)
grid = mcrfpy.Grid(
grid_size=(25, 15),
texture=texture,
pos=(5, 5),
size=(1014, 700)
)
grid.fill_color = mcrfpy.Color(20, 20, 30)
grid.zoom = 2.0
ui.append(grid)
# Initialize all tiles as walls
for x in range(25):
for y in range(15):
point = grid.at(x, y)
point.tilesprite = WALL_TILE
point.walkable = False
point.transparent = False
# Carve out rooms from WorldGraph
for room in world.rooms.values():
for rx in range(room.x, room.x + room.width):
for ry in range(room.y, room.y + room.height):
if 0 <= rx < 25 and 0 <= ry < 15:
point = grid.at(rx, ry)
point.tilesprite = FLOOR_TILE
point.walkable = True
point.transparent = True
# Place doors (carve corridor between rooms)
for door in world.doors:
dx, dy = door.position
if 0 <= dx < 25 and 0 <= dy < 15:
point = grid.at(dx, dy)
point.tilesprite = FLOOR_TILE
point.walkable = not door.locked
point.transparent = True
# Create FOV layer for fog of war
fov_layer = grid.add_layer('color', z_index=10)
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
return grid, fov_layer, texture
def create_agents(grid, world: WorldGraph, texture) -> list:
"""Create agent entities in their starting rooms."""
agents = []
# Agent A: Wizard in guard_room
guard_room = world.rooms["guard_room"]
wizard_entity = mcrfpy.Entity(
grid_pos=guard_room.center,
texture=texture,
sprite_index=WIZARD_SPRITE
)
grid.entities.append(wizard_entity)
agents.append(Agent("Wizard", "a wizard", wizard_entity, world))
# Agent B: Knight in armory
armory = world.rooms["armory"]
knight_entity = mcrfpy.Entity(
grid_pos=armory.center,
texture=texture,
sprite_index=KNIGHT_SPRITE
)
grid.entities.append(knight_entity)
agents.append(Agent("Knight", "a knight", knight_entity, world))
return agents
def switch_perspective(grid, fov_layer, agent):
"""Switch grid view to an agent's perspective."""
# Reset fog layer to all unknown (black)
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
# Apply this agent's perspective
fov_layer.apply_perspective(
entity=agent.entity,
visible=mcrfpy.Color(0, 0, 0, 0),
discovered=mcrfpy.Color(40, 40, 60, 180),
unknown=mcrfpy.Color(0, 0, 0, 255)
)
# Update visibility from agent's position
agent.entity.update_visibility()
# Center camera on this agent
px, py = agent.pos
grid.center = (px * 16 + 8, py * 16 + 8)
def get_visible_agents(grid, observer, all_agents) -> list:
"""Get agents visible to the observer based on FOV."""
visible = []
for agent in all_agents:
if agent.name == observer.name:
continue
ax, ay = agent.pos
if grid.is_in_fov(ax, ay):
visible.append(agent)
return visible
def query_agent_llm(agent, screenshot_path, context) -> str:
"""
Query VLLM for agent's action using WorldGraph context.
This uses the structured context from WorldGraph instead of
ad-hoc grounded prompts.
"""
system_prompt = f"""You are {agent.display_name} in a roguelike dungeon game.
You see the world through screenshots and receive text descriptions.
Your goal is to explore and interact with your environment.
Always end your response with a clear action declaration: "Action: <ACTION>"
"""
# Build the user prompt with WorldGraph context
actions_str = ", ".join(context["available_actions"])
user_prompt = f"""{context["location"]}
Available actions: {actions_str}
Look at the screenshot showing your current view. The dark areas are outside your field of vision.
What would you like to do? State your reasoning briefly (1-2 sentences), then declare your action.
Example: "I see a key on the ground that might be useful. Action: TAKE brass_key"
"""
messages = [
{"role": "system", "content": system_prompt},
message_with_image(user_prompt, screenshot_path)
]
resp = llm_chat_completion(messages)
if "error" in resp:
return f"[VLLM Error: {resp['error']}]"
return resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
def run_single_turn(grid, fov_layer, agents, executor, turn_num):
"""
Execute one turn for all agents.
Each agent:
1. Gets their perspective rendered
2. Receives WorldGraph context
3. Queries LLM for action
4. Executes the action
"""
print(f"\n{'='*70}")
print(f"TURN {turn_num}")
print("=" * 70)
results = []
for agent in agents:
print(f"\n--- {agent.name}'s Turn ---")
print(f"Position: {agent.pos} | Room: {agent.current_room}")
# Switch perspective to this agent
switch_perspective(grid, fov_layer, agent)
mcrfpy.step(0.016)
# Take screenshot
screenshot_path = os.path.join(
SCREENSHOT_DIR,
f"turn{turn_num}_{agent.name.lower()}.png"
)
automation.screenshot(screenshot_path)
print(f"Screenshot: {screenshot_path}")
# Get context using WorldGraph
visible = get_visible_agents(grid, agent, agents)
context = agent.get_context(visible + [agent]) # Include self for filtering
print(f"\nContext from WorldGraph:")
print(f" Location: {context['location']}")
print(f" Actions: {context['available_actions']}")
# Query LLM
print(f"\nQuerying VLLM...")
response = query_agent_llm(agent, screenshot_path, context)
print(f"Response: {response[:300]}{'...' if len(response) > 300 else ''}")
# Parse and execute action
action = parse_action(response)
print(f"\nParsed: {action.type.value} {action.args}")
result = executor.execute(agent, action)
status = "SUCCESS" if result.success else "FAILED"
print(f"Result: {status} - {result.message}")
results.append({
"agent": agent.name,
"room": agent.current_room,
"context": context,
"response": response,
"action": action,
"result": result
})
return results
def run_demo():
"""Main demo: single integrated turn with WorldGraph context."""
print("=" * 70)
print("Integrated WorldGraph + Action Demo")
print("=" * 70)
os.makedirs(SCREENSHOT_DIR, exist_ok=True)
# Create world from WorldGraph factory
print("\nCreating world from WorldGraph...")
world = create_two_room_scenario()
print(f" Rooms: {list(world.rooms.keys())}")
print(f" Doors: {len(world.doors)}")
print(f" Objects: {list(world.objects.keys())}")
# Setup scene from WorldGraph
print("\nSetting up scene...")
grid, fov_layer, texture = setup_scene_from_world(world)
# Create agents
print("\nCreating agents...")
agents = create_agents(grid, world, texture)
for agent in agents:
print(f" {agent.name} at {agent.pos} in {agent.current_room}")
# Create executor
executor = ActionExecutor(grid)
# Run one turn
results = run_single_turn(grid, fov_layer, agents, executor, turn_num=1)
# Summary
print("\n" + "=" * 70)
print("TURN SUMMARY")
print("=" * 70)
for r in results:
status = "OK" if r["result"].success else "FAIL"
print(f" {r['agent']}: {r['action'].type.value} -> {status}")
if r["result"].new_position:
print(f" New position: {r['result'].new_position}")
print("\n" + "=" * 70)
print("Demo Complete")
print("=" * 70)
return True
if __name__ == "__main__":
try:
success = run_demo()
print("\nPASS" if success else "\nFAIL")
sys.exit(0 if success else 1)
except Exception as e:
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,318 @@
#!/usr/bin/env python3
"""
Multi-Turn Simulation Demo
==========================
Runs multiple turns of agent interaction with full logging.
This is the Phase 1 implementation from issue #154.
Two agents start in separate rooms and can move, observe,
and (in future versions) communicate to solve puzzles.
"""
import sys
import os
# Add the vllm_demo directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import mcrfpy
from mcrfpy import automation
import requests
import base64
from world_graph import (
WorldGraph, Room, Door, WorldObject, Direction, AgentInfo,
create_two_room_scenario, create_button_door_scenario
)
from action_parser import parse_action
from action_executor import ActionExecutor
from turn_orchestrator import TurnOrchestrator, SimulationLog
# Configuration
VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions"
SCREENSHOT_DIR = "/tmp/vllm_multi_turn"
LOG_PATH = "/tmp/vllm_multi_turn/simulation_log.json"
MAX_TURNS = 5
# Sprites
FLOOR_TILE = 0
WALL_TILE = 40
WIZARD_SPRITE = 84
KNIGHT_SPRITE = 96
class Agent:
"""Agent with WorldGraph integration."""
def __init__(self, name: str, display_name: str, entity, world: WorldGraph):
self.name = name
self.display_name = display_name
self.entity = entity
self.world = world
self.message_history = []
@property
def pos(self) -> tuple:
return (int(self.entity.pos[0]), int(self.entity.pos[1]))
@property
def current_room(self) -> str:
room = self.world.room_at(*self.pos)
return room.name if room else None
def get_context(self, visible_agents: list) -> dict:
"""Build context for LLM query."""
room_name = self.current_room
agent_infos = [
AgentInfo(
name=a.name,
display_name=a.display_name,
position=a.pos,
is_player=(a.name == self.name)
)
for a in visible_agents
]
return {
"location": self.world.describe_room(room_name, agent_infos, self.name),
"available_actions": self.world.get_available_actions(room_name),
"recent_messages": self.message_history[-5:],
}
def file_to_base64(path: str) -> str:
"""Convert file to base64 string."""
with open(path, 'rb') as f:
return base64.b64encode(f.read()).decode('utf-8')
def llm_query(agent, screenshot_path: str, context: dict) -> str:
"""
Query VLLM for agent action.
This function is passed to TurnOrchestrator as the LLM query callback.
"""
system_prompt = f"""You are {agent.display_name} exploring a dungeon.
You receive visual and text information about your surroundings.
Your goal is to explore, find items, and interact with the environment.
Always end your response with: Action: <YOUR_ACTION>"""
actions_str = ", ".join(context["available_actions"])
user_prompt = f"""{context["location"]}
Available actions: {actions_str}
[Screenshot attached showing your current view - dark areas are outside your vision]
What do you do? Brief reasoning (1-2 sentences), then Action: <action>"""
messages = [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": [
{"type": "text", "text": user_prompt},
{"type": "image_url", "image_url": {
"url": "data:image/png;base64," + file_to_base64(screenshot_path)
}}
]
}
]
try:
resp = requests.post(VLLM_URL, json={'messages': messages}, timeout=60)
data = resp.json()
if "error" in data:
return f"[VLLM Error: {data['error']}]"
return data.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
except Exception as e:
return f"[Connection Error: {e}]"
def setup_scene(world: WorldGraph):
"""Create McRogueFace scene from WorldGraph."""
mcrfpy.createScene("multi_turn")
mcrfpy.setScene("multi_turn")
ui = mcrfpy.sceneUI("multi_turn")
texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16)
grid = mcrfpy.Grid(
grid_size=(25, 15),
texture=texture,
pos=(5, 5),
size=(1014, 700)
)
grid.fill_color = mcrfpy.Color(20, 20, 30)
grid.zoom = 2.0
ui.append(grid)
# Initialize all as walls
for x in range(25):
for y in range(15):
p = grid.at(x, y)
p.tilesprite = WALL_TILE
p.walkable = False
p.transparent = False
# Carve rooms from WorldGraph
for room in world.rooms.values():
for rx in range(room.x, room.x + room.width):
for ry in range(room.y, room.y + room.height):
if 0 <= rx < 25 and 0 <= ry < 15:
p = grid.at(rx, ry)
p.tilesprite = FLOOR_TILE
p.walkable = True
p.transparent = True
# Place doors
for door in world.doors:
dx, dy = door.position
if 0 <= dx < 25 and 0 <= dy < 15:
p = grid.at(dx, dy)
p.tilesprite = FLOOR_TILE
p.walkable = not door.locked
p.transparent = True
# FOV layer
fov_layer = grid.add_layer('color', z_index=10)
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
return grid, fov_layer, texture
def create_agents(grid, world: WorldGraph, texture) -> list:
"""Create agents in their starting rooms."""
agents = []
# Wizard in guard_room (left)
room_a = world.rooms["guard_room"]
wizard = mcrfpy.Entity(
grid_pos=room_a.center,
texture=texture,
sprite_index=WIZARD_SPRITE
)
grid.entities.append(wizard)
agents.append(Agent("Wizard", "a wizard", wizard, world))
# Knight in armory (right)
room_b = world.rooms["armory"]
knight = mcrfpy.Entity(
grid_pos=room_b.center,
texture=texture,
sprite_index=KNIGHT_SPRITE
)
grid.entities.append(knight)
agents.append(Agent("Knight", "a knight", knight, world))
return agents
def run_demo():
"""Run multi-turn simulation."""
print("=" * 70)
print("Multi-Turn Simulation Demo")
print(f"Running up to {MAX_TURNS} turns with 2 agents")
print("=" * 70)
os.makedirs(SCREENSHOT_DIR, exist_ok=True)
# Create world
print("\nCreating world...")
world = create_two_room_scenario()
print(f" Rooms: {list(world.rooms.keys())}")
print(f" Objects: {list(world.objects.keys())}")
# Setup scene
print("\nSetting up scene...")
grid, fov_layer, texture = setup_scene(world)
# Create agents
print("\nCreating agents...")
agents = create_agents(grid, world, texture)
for agent in agents:
print(f" {agent.name} at {agent.pos} in {agent.current_room}")
# Create orchestrator
orchestrator = TurnOrchestrator(
grid=grid,
fov_layer=fov_layer,
world=world,
agents=agents,
screenshot_dir=SCREENSHOT_DIR,
llm_query_fn=llm_query
)
# Optional: Define a stop condition
def agents_met(orch):
"""Stop when agents are in the same room."""
return orch.agents_in_same_room()
# Run simulation
log = orchestrator.run_simulation(
max_turns=MAX_TURNS,
stop_condition=None # Or use agents_met for early stopping
)
# Save log
log.save(LOG_PATH)
# Print summary
print("\n" + "=" * 70)
print(log.summary())
print("=" * 70)
# Show final positions
print("\nFinal Agent Positions:")
for agent in agents:
print(f" {agent.name}: {agent.pos} in {agent.current_room}")
print(f"\nScreenshots saved to: {SCREENSHOT_DIR}/")
print(f"Simulation log saved to: {LOG_PATH}")
return True
def replay_log(log_path: str):
"""
Replay a simulation from a log file.
This is a utility function for reviewing past simulations.
"""
print(f"Loading simulation from: {log_path}")
log = SimulationLog.load(log_path)
print("\n" + log.summary())
print("\nTurn-by-Turn Replay:")
print("-" * 50)
current_turn = 0
for step in log.steps:
if step.turn != current_turn:
current_turn = step.turn
print(f"\n=== Turn {current_turn} ===")
status = "OK" if step.result_success else "FAIL"
print(f" {step.agent_id}: {step.parsed_action_type} {step.parsed_action_args}")
print(f" {status}: {step.result_message}")
if step.new_position:
print(f" Moved to: {step.new_position}")
if __name__ == "__main__":
# Check for replay mode
if len(sys.argv) > 1 and sys.argv[1] == "--replay":
log_file = sys.argv[2] if len(sys.argv) > 2 else LOG_PATH
replay_log(log_file)
sys.exit(0)
# Normal execution
try:
success = run_demo()
print("\nPASS" if success else "\nFAIL")
sys.exit(0 if success else 1)
except Exception as e:
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,301 @@
"""
Turn Orchestrator
=================
Manages multi-turn simulation with logging for replay.
Coordinates perspective switching, LLM queries, and action execution.
"""
import json
import os
from dataclasses import dataclass, asdict, field
from typing import List, Dict, Any, Optional, Callable
from datetime import datetime
from world_graph import WorldGraph, AgentInfo
from action_parser import Action, ActionType, parse_action
from action_executor import ActionExecutor, ActionResult
@dataclass
class SimulationStep:
"""Record of one agent's turn."""
turn: int
agent_id: str
agent_position: tuple
room: str
perception: Dict[str, Any] # Context shown to LLM
llm_response: str # Raw LLM output
parsed_action_type: str # Action type as string
parsed_action_args: tuple # Action arguments
result_success: bool
result_message: str
new_position: Optional[tuple] = None
path: Optional[List[tuple]] = None # For animation replay
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
@dataclass
class SimulationLog:
"""Complete simulation record for replay and analysis."""
metadata: Dict[str, Any]
steps: List[SimulationStep] = field(default_factory=list)
def save(self, path: str):
"""Save log to JSON file."""
data = {
"metadata": self.metadata,
"steps": [asdict(s) for s in self.steps]
}
with open(path, 'w') as f:
json.dump(data, f, indent=2, default=str)
print(f"Simulation log saved to: {path}")
@classmethod
def load(cls, path: str) -> 'SimulationLog':
"""Load log from JSON file."""
with open(path) as f:
data = json.load(f)
steps = []
for s in data["steps"]:
# Convert tuple strings back to tuples
if isinstance(s.get("agent_position"), list):
s["agent_position"] = tuple(s["agent_position"])
if isinstance(s.get("new_position"), list):
s["new_position"] = tuple(s["new_position"])
if isinstance(s.get("parsed_action_args"), list):
s["parsed_action_args"] = tuple(s["parsed_action_args"])
if s.get("path"):
s["path"] = [tuple(p) for p in s["path"]]
steps.append(SimulationStep(**s))
return cls(metadata=data["metadata"], steps=steps)
def get_agent_steps(self, agent_name: str) -> List[SimulationStep]:
"""Get all steps for a specific agent."""
return [s for s in self.steps if s.agent_id == agent_name]
def get_turn_steps(self, turn: int) -> List[SimulationStep]:
"""Get all steps from a specific turn."""
return [s for s in self.steps if s.turn == turn]
def summary(self) -> str:
"""Generate a summary of the simulation."""
lines = [
f"Simulation Summary",
f"==================",
f"Total turns: {self.metadata.get('total_turns', 'unknown')}",
f"Total steps: {len(self.steps)}",
f"Agents: {', '.join(self.metadata.get('agent_names', []))}",
f"",
]
# Per-agent stats
for agent_name in self.metadata.get('agent_names', []):
agent_steps = self.get_agent_steps(agent_name)
successes = sum(1 for s in agent_steps if s.result_success)
lines.append(f"{agent_name}:")
lines.append(f" Actions: {len(agent_steps)}")
lines.append(f" Successful: {successes}")
if agent_steps:
final = agent_steps[-1]
final_pos = final.new_position or final.agent_position
lines.append(f" Final position: {final_pos}")
lines.append(f" Final room: {final.room}")
lines.append("")
return "\n".join(lines)
class TurnOrchestrator:
"""
Orchestrates multi-turn simulation.
Handles:
- Turn sequencing
- Perspective switching
- LLM queries
- Action execution
- Simulation logging
"""
def __init__(self, grid, fov_layer, world: WorldGraph, agents: list,
screenshot_dir: str, llm_query_fn: Callable):
"""
Initialize orchestrator.
Args:
grid: mcrfpy.Grid instance
fov_layer: Color layer for FOV rendering
world: WorldGraph instance
agents: List of Agent objects
screenshot_dir: Directory for screenshots
llm_query_fn: Function(agent, screenshot_path, context) -> str
"""
self.grid = grid
self.fov_layer = fov_layer
self.world = world
self.agents = agents
self.screenshot_dir = screenshot_dir
self.llm_query_fn = llm_query_fn
self.executor = ActionExecutor(grid)
self.turn_number = 0
self.steps: List[SimulationStep] = []
os.makedirs(screenshot_dir, exist_ok=True)
def run_turn(self) -> List[SimulationStep]:
"""
Execute one full turn (all agents act once).
Returns list of SimulationSteps for this turn.
"""
import mcrfpy
self.turn_number += 1
turn_steps = []
print(f"\n{'='*60}")
print(f"TURN {self.turn_number}")
print("=" * 60)
for agent in self.agents:
step = self._run_agent_turn(agent)
turn_steps.append(step)
self.steps.append(step)
return turn_steps
def run_simulation(self, max_turns: int = 10,
stop_condition: Callable = None) -> SimulationLog:
"""
Run complete simulation.
Args:
max_turns: Maximum number of turns to run
stop_condition: Optional callable(orchestrator) -> bool
Returns True to stop simulation early
Returns:
SimulationLog with all steps
"""
print(f"\nStarting simulation: max {max_turns} turns")
print(f"Agents: {[a.name for a in self.agents]}")
print("=" * 60)
for turn in range(max_turns):
self.run_turn()
# Check stop condition
if stop_condition and stop_condition(self):
print(f"\nStop condition met at turn {self.turn_number}")
break
# Create log
log = SimulationLog(
metadata={
"total_turns": self.turn_number,
"num_agents": len(self.agents),
"agent_names": [a.name for a in self.agents],
"timestamp": datetime.now().isoformat(),
"world_rooms": list(self.world.rooms.keys()),
"screenshot_dir": self.screenshot_dir,
},
steps=self.steps
)
return log
def _run_agent_turn(self, agent) -> SimulationStep:
"""Execute one agent's turn."""
import mcrfpy
from mcrfpy import automation
print(f"\n--- {agent.name}'s Turn ---")
print(f"Position: {agent.pos} | Room: {agent.current_room}")
# Switch perspective
self._switch_perspective(agent)
mcrfpy.step(0.016)
# Screenshot
screenshot_path = os.path.join(
self.screenshot_dir,
f"turn{self.turn_number}_{agent.name.lower()}.png"
)
automation.screenshot(screenshot_path)
# Build context
visible_agents = self._get_visible_agents(agent)
context = agent.get_context(visible_agents + [agent])
# Query LLM
llm_response = self.llm_query_fn(agent, screenshot_path, context)
# Parse and execute
action = parse_action(llm_response)
result = self.executor.execute(agent, action)
# Log output
status = "SUCCESS" if result.success else "FAILED"
print(f" Action: {action.type.value} {action.args}")
print(f" Result: {status} - {result.message}")
# Build step record
step = SimulationStep(
turn=self.turn_number,
agent_id=agent.name,
agent_position=agent.pos,
room=agent.current_room,
perception={
"location": context["location"],
"available_actions": context["available_actions"],
},
llm_response=llm_response,
parsed_action_type=action.type.value,
parsed_action_args=action.args,
result_success=result.success,
result_message=result.message,
new_position=result.new_position,
path=result.path
)
return step
def _switch_perspective(self, agent):
"""Switch grid view to agent's perspective."""
import mcrfpy
self.fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
self.fov_layer.apply_perspective(
entity=agent.entity,
visible=mcrfpy.Color(0, 0, 0, 0),
discovered=mcrfpy.Color(40, 40, 60, 180),
unknown=mcrfpy.Color(0, 0, 0, 255)
)
agent.entity.update_visibility()
px, py = agent.pos
self.grid.center = (px * 16 + 8, py * 16 + 8)
def _get_visible_agents(self, observer) -> list:
"""Get agents visible to observer based on FOV."""
visible = []
for agent in self.agents:
if agent.name == observer.name:
continue
ax, ay = agent.pos
if self.grid.is_in_fov(ax, ay):
visible.append(agent)
return visible
def get_agent_positions(self) -> Dict[str, tuple]:
"""Get current positions of all agents."""
return {a.name: a.pos for a in self.agents}
def agents_in_same_room(self) -> bool:
"""Check if all agents are in the same room."""
rooms = [a.current_room for a in self.agents]
return len(set(rooms)) == 1