feat: Add VLLM integration demos for multi-agent research (#156)
- 0_basic_vllm_demo.py: Single agent with FOV, grounded text, VLLM query - 1_multi_agent_demo.py: Three agents with perspective cycling Features demonstrated: - Headless step() + screenshot() for AI-driven gameplay - ColorLayer.apply_perspective() for per-agent fog of war - Grounded text generation based on entity visibility - Sequential VLLM queries with vision model support - Proper FOV reset between perspective switches 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
f2f8d6422f
commit
4713b62535
|
|
@ -0,0 +1,293 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
VLLM Integration Demo for McRogueFace
|
||||
=====================================
|
||||
|
||||
Demonstrates using a local Vision-Language Model (Gemma 3) with
|
||||
McRogueFace headless rendering to create an AI-driven agent.
|
||||
|
||||
Requirements:
|
||||
- Local VLLM running at http://192.168.1.100:8100
|
||||
- McRogueFace built with headless mode support
|
||||
|
||||
This is a research-grade demo for issue #156.
|
||||
"""
|
||||
|
||||
import mcrfpy
|
||||
from mcrfpy import automation
|
||||
import sys
|
||||
import requests
|
||||
import base64
|
||||
import os
|
||||
import random
|
||||
|
||||
# VLLM configuration
|
||||
VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions"
|
||||
SCREENSHOT_PATH = "/tmp/vllm_demo_screenshot.png"
|
||||
|
||||
# Sprite constants from Crypt of Sokoban tileset
|
||||
FLOOR_COMMON = 0 # 95% of floors
|
||||
FLOOR_SPECKLE1 = 12 # 4% of floors
|
||||
FLOOR_SPECKLE2 = 24 # 1% of floors
|
||||
WALL_TILE = 40 # Wall sprite
|
||||
PLAYER_SPRITE = 84 # Player character
|
||||
RAT_SPRITE = 123 # Enemy/rat creature
|
||||
|
||||
def file_to_base64(file_path):
|
||||
"""Convert any image file to base64 string."""
|
||||
with open(file_path, 'rb') as f:
|
||||
return base64.b64encode(f.read()).decode('utf-8')
|
||||
|
||||
def llm_chat_completion(messages: list):
|
||||
"""Chat completion endpoint of local LLM"""
|
||||
try:
|
||||
response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60)
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
def message_with_image(text, image_path):
|
||||
"""Create a message with an embedded image for vision models."""
|
||||
image_data = file_to_base64(image_path)
|
||||
return {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": text},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}}
|
||||
]
|
||||
}
|
||||
|
||||
def get_floor_tile():
|
||||
"""Return a floor tile sprite with realistic distribution."""
|
||||
roll = random.random()
|
||||
if roll < 0.95:
|
||||
return FLOOR_COMMON
|
||||
elif roll < 0.99:
|
||||
return FLOOR_SPECKLE1
|
||||
else:
|
||||
return FLOOR_SPECKLE2
|
||||
|
||||
def setup_scene():
|
||||
"""Create a dungeon scene with player agent and NPC rat."""
|
||||
print("Setting up scene...")
|
||||
|
||||
# Create and set scene
|
||||
mcrfpy.createScene("vllm_demo")
|
||||
mcrfpy.setScene("vllm_demo")
|
||||
ui = mcrfpy.sceneUI("vllm_demo")
|
||||
|
||||
# Load the game texture (16x16 tiles from Crypt of Sokoban)
|
||||
texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16)
|
||||
|
||||
# Create grid: 1014px wide at position (5,5)
|
||||
# Using 20x15 grid for a reasonable dungeon size
|
||||
grid = mcrfpy.Grid(
|
||||
grid_size=(20, 15),
|
||||
texture=texture,
|
||||
pos=(5, 5),
|
||||
size=(1014, 700)
|
||||
)
|
||||
grid.fill_color = mcrfpy.Color(20, 20, 30)
|
||||
|
||||
# Set zoom factor to 2.0 for better visibility
|
||||
grid.zoom = 2.0
|
||||
|
||||
ui.append(grid)
|
||||
|
||||
# Set up floor tiles and walls with proper sprite distribution
|
||||
for x in range(20):
|
||||
for y in range(15):
|
||||
point = grid.at(x, y)
|
||||
# Create walls around the edges
|
||||
if x == 0 or x == 19 or y == 0 or y == 14:
|
||||
point.tilesprite = WALL_TILE
|
||||
point.walkable = False
|
||||
point.transparent = False # Walls block FOV
|
||||
else:
|
||||
# Floor inside with varied sprites
|
||||
point.tilesprite = get_floor_tile()
|
||||
point.walkable = True
|
||||
point.transparent = True # Floors don't block FOV
|
||||
|
||||
# Add some interior walls for interest - a room divider
|
||||
for y in range(5, 10):
|
||||
point = grid.at(10, y)
|
||||
point.tilesprite = WALL_TILE
|
||||
point.walkable = False
|
||||
point.transparent = False
|
||||
# Door opening
|
||||
door = grid.at(10, 7)
|
||||
door.tilesprite = get_floor_tile()
|
||||
door.walkable = True
|
||||
door.transparent = True
|
||||
|
||||
# Create a ColorLayer for fog of war (z_index=10 to render on top)
|
||||
fov_layer = grid.add_layer('color', z_index=10)
|
||||
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255)) # Start all black (unknown)
|
||||
|
||||
# Create the player entity ("The Agent")
|
||||
player = mcrfpy.Entity(grid_pos=(5, 7), texture=texture, sprite_index=PLAYER_SPRITE)
|
||||
grid.entities.append(player)
|
||||
|
||||
# Create an NPC rat entity (closer so it's visible in FOV)
|
||||
rat = mcrfpy.Entity(grid_pos=(10, 7), texture=texture, sprite_index=RAT_SPRITE)
|
||||
grid.entities.append(rat)
|
||||
|
||||
# Bind the fog layer to player's perspective
|
||||
# visible = transparent, discovered = dim, unknown = black
|
||||
fov_layer.apply_perspective(
|
||||
entity=player,
|
||||
visible=mcrfpy.Color(0, 0, 0, 0), # Transparent when visible
|
||||
discovered=mcrfpy.Color(40, 40, 60, 180), # Dark overlay when discovered but not visible
|
||||
unknown=mcrfpy.Color(0, 0, 0, 255) # Black when never seen
|
||||
)
|
||||
|
||||
# Update visibility from player's position
|
||||
player.update_visibility()
|
||||
|
||||
# Center the camera on the agent entity
|
||||
px, py = int(player.pos[0]), int(player.pos[1])
|
||||
grid.center = (px * 16 + 8, py * 16 + 8)
|
||||
|
||||
return grid, player, rat
|
||||
|
||||
def check_entity_visible(grid, entity):
|
||||
"""Check if an entity is within the current FOV."""
|
||||
ex, ey = int(entity.pos[0]), int(entity.pos[1])
|
||||
return grid.is_in_fov(ex, ey)
|
||||
|
||||
def build_grounded_prompt(grid, player, rat):
|
||||
"""Build a text prompt with visually grounded information."""
|
||||
observations = []
|
||||
|
||||
# Check what the agent can see
|
||||
if check_entity_visible(grid, rat):
|
||||
observations.append("You see a rat to the east.")
|
||||
|
||||
# Could add more observations here:
|
||||
# - walls blocking path
|
||||
# - items on ground
|
||||
# - doors/exits
|
||||
|
||||
if not observations:
|
||||
observations.append("The area appears clear.")
|
||||
|
||||
return " ".join(observations)
|
||||
|
||||
def run_demo():
|
||||
"""Main demo function."""
|
||||
print("=" * 60)
|
||||
print("VLLM Integration Demo (Research Mode)")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Setup the scene
|
||||
grid, player, rat = setup_scene()
|
||||
|
||||
# Advance simulation to ensure scene is ready
|
||||
mcrfpy.step(0.016)
|
||||
|
||||
# Take screenshot
|
||||
print(f"Taking screenshot: {SCREENSHOT_PATH}")
|
||||
result = automation.screenshot(SCREENSHOT_PATH)
|
||||
if not result:
|
||||
print("ERROR: Failed to take screenshot")
|
||||
return False
|
||||
|
||||
file_size = os.path.getsize(SCREENSHOT_PATH)
|
||||
print(f"Screenshot saved: {file_size} bytes")
|
||||
print()
|
||||
|
||||
# Build grounded observations
|
||||
grounded_text = build_grounded_prompt(grid, player, rat)
|
||||
print(f"Grounded observations: {grounded_text}")
|
||||
print()
|
||||
|
||||
# Query 1: Ask VLLM to describe what it sees
|
||||
print("-" * 40)
|
||||
print("Query 1: Describe what you see")
|
||||
print("-" * 40)
|
||||
|
||||
system_prompt = """You are an AI agent in a roguelike dungeon game. You can see the game world through screenshots.
|
||||
The view shows a top-down grid-based dungeon with tiles, walls, and creatures.
|
||||
Your character is the humanoid figure. The dark areas are outside your field of vision.
|
||||
Other creatures may be enemies or NPCs. Describe what you observe concisely."""
|
||||
|
||||
user_prompt = f"""Look at this game screenshot. {grounded_text}
|
||||
|
||||
Describe what you see in the dungeon from your character's perspective.
|
||||
Be specific about:
|
||||
- Your position in the room
|
||||
- Any creatures you can see
|
||||
- The layout of walls and passages
|
||||
- Areas obscured by fog of war (darkness)"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
message_with_image(user_prompt, SCREENSHOT_PATH)
|
||||
]
|
||||
|
||||
resp = llm_chat_completion(messages)
|
||||
|
||||
if "error" in resp:
|
||||
print(f"VLLM Error: {resp['error']}")
|
||||
print("\nNote: The VLLM server may not be running or accessible.")
|
||||
print("Screenshot is saved for manual inspection.")
|
||||
description = "I can see a dungeon scene."
|
||||
else:
|
||||
description = resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||
print(f"\nVLLM Response:\n{description}")
|
||||
print()
|
||||
|
||||
# Query 2: Ask what action the agent would like to take
|
||||
print("-" * 40)
|
||||
print("Query 2: What would you like to do?")
|
||||
print("-" * 40)
|
||||
|
||||
messages.append({"role": "assistant", "content": description})
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": f"""Based on what you see, what action would you like to take?
|
||||
|
||||
Available actions:
|
||||
- GO NORTH / SOUTH / EAST / WEST - move in that direction
|
||||
- WAIT - stay in place and observe
|
||||
- LOOK - examine your surroundings more carefully
|
||||
|
||||
{grounded_text}
|
||||
|
||||
State your reasoning briefly, then declare your action clearly (e.g., "Action: GO EAST")."""
|
||||
})
|
||||
|
||||
resp = llm_chat_completion(messages)
|
||||
|
||||
if "error" in resp:
|
||||
print(f"VLLM Error: {resp['error']}")
|
||||
else:
|
||||
action = resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||
print(f"\nVLLM Response:\n{action}")
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print("Demo Complete")
|
||||
print("=" * 60)
|
||||
print(f"\nScreenshot preserved at: {SCREENSHOT_PATH}")
|
||||
print("Grid settings: zoom=2.0, FOV radius=8, perspective rendering enabled")
|
||||
|
||||
return True
|
||||
|
||||
# Main execution
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
success = run_demo()
|
||||
if success:
|
||||
print("\nPASS")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("\nFAIL")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\nError: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,346 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-Agent VLLM Demo for McRogueFace
|
||||
=====================================
|
||||
|
||||
Demonstrates cycling through multiple agent perspectives,
|
||||
each with their own FOV and grounded observations.
|
||||
|
||||
Three agents:
|
||||
- Wizard (left side) - can see the rat but not the other agents
|
||||
- Blacksmith (right side) - can see the knight, rat, and the wall
|
||||
- Knight (right side) - can see the blacksmith, rat, and the wall
|
||||
|
||||
Each agent gets their own screenshot and VLLM query.
|
||||
"""
|
||||
|
||||
import mcrfpy
|
||||
from mcrfpy import automation
|
||||
import sys
|
||||
import requests
|
||||
import base64
|
||||
import os
|
||||
import random
|
||||
|
||||
# VLLM configuration
|
||||
VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions"
|
||||
SCREENSHOT_DIR = "/tmp/vllm_multi_agent"
|
||||
|
||||
# Sprite constants
|
||||
FLOOR_COMMON = 0
|
||||
FLOOR_SPECKLE1 = 12
|
||||
FLOOR_SPECKLE2 = 24
|
||||
WALL_TILE = 40
|
||||
|
||||
# Agent sprites
|
||||
WIZARD_SPRITE = 84
|
||||
BLACKSMITH_SPRITE = 86
|
||||
KNIGHT_SPRITE = 96
|
||||
RAT_SPRITE = 123
|
||||
|
||||
|
||||
def file_to_base64(file_path):
|
||||
"""Convert any image file to base64 string."""
|
||||
with open(file_path, 'rb') as f:
|
||||
return base64.b64encode(f.read()).decode('utf-8')
|
||||
|
||||
|
||||
def llm_chat_completion(messages: list):
|
||||
"""Chat completion endpoint of local LLM"""
|
||||
try:
|
||||
response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60)
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def message_with_image(text, image_path):
|
||||
"""Create a message with an embedded image for vision models."""
|
||||
image_data = file_to_base64(image_path)
|
||||
return {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": text},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def get_floor_tile():
|
||||
"""Return a floor tile sprite with realistic distribution."""
|
||||
roll = random.random()
|
||||
if roll < 0.95:
|
||||
return FLOOR_COMMON
|
||||
elif roll < 0.99:
|
||||
return FLOOR_SPECKLE1
|
||||
else:
|
||||
return FLOOR_SPECKLE2
|
||||
|
||||
|
||||
class Agent:
|
||||
"""Wrapper for an agent entity with metadata."""
|
||||
def __init__(self, name, entity, description):
|
||||
self.name = name
|
||||
self.entity = entity
|
||||
self.description = description # e.g., "a wizard", "a blacksmith"
|
||||
|
||||
@property
|
||||
def pos(self):
|
||||
return (int(self.entity.pos[0]), int(self.entity.pos[1]))
|
||||
|
||||
|
||||
def setup_scene():
|
||||
"""Create a dungeon scene with multiple agents."""
|
||||
print("Setting up multi-agent scene...")
|
||||
|
||||
# Create and set scene
|
||||
mcrfpy.createScene("multi_agent_demo")
|
||||
mcrfpy.setScene("multi_agent_demo")
|
||||
ui = mcrfpy.sceneUI("multi_agent_demo")
|
||||
|
||||
# Load the game texture
|
||||
texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16)
|
||||
|
||||
# Create grid
|
||||
grid = mcrfpy.Grid(
|
||||
grid_size=(25, 15),
|
||||
texture=texture,
|
||||
pos=(5, 5),
|
||||
size=(1014, 700)
|
||||
)
|
||||
grid.fill_color = mcrfpy.Color(20, 20, 30)
|
||||
grid.zoom = 2.0
|
||||
ui.append(grid)
|
||||
|
||||
# Set up floor tiles and walls
|
||||
for x in range(25):
|
||||
for y in range(15):
|
||||
point = grid.at(x, y)
|
||||
if x == 0 or x == 24 or y == 0 or y == 14:
|
||||
point.tilesprite = WALL_TILE
|
||||
point.walkable = False
|
||||
point.transparent = False
|
||||
else:
|
||||
point.tilesprite = get_floor_tile()
|
||||
point.walkable = True
|
||||
point.transparent = True
|
||||
|
||||
# Add a wall divider in the middle (blocks wizard's view of right side)
|
||||
for y in range(3, 12):
|
||||
point = grid.at(10, y)
|
||||
point.tilesprite = WALL_TILE
|
||||
point.walkable = False
|
||||
point.transparent = False
|
||||
|
||||
# Door opening in the wall
|
||||
door = grid.at(10, 7)
|
||||
door.tilesprite = get_floor_tile()
|
||||
door.walkable = True
|
||||
door.transparent = True
|
||||
|
||||
# Create FOV layer for fog of war
|
||||
fov_layer = grid.add_layer('color', z_index=10)
|
||||
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
|
||||
|
||||
# Create agents
|
||||
agents = []
|
||||
|
||||
# Wizard on the left side
|
||||
wizard_entity = mcrfpy.Entity(grid_pos=(4, 7), texture=texture, sprite_index=WIZARD_SPRITE)
|
||||
grid.entities.append(wizard_entity)
|
||||
agents.append(Agent("Wizard", wizard_entity, "a wizard"))
|
||||
|
||||
# Blacksmith on the right side (upper)
|
||||
blacksmith_entity = mcrfpy.Entity(grid_pos=(18, 5), texture=texture, sprite_index=BLACKSMITH_SPRITE)
|
||||
grid.entities.append(blacksmith_entity)
|
||||
agents.append(Agent("Blacksmith", blacksmith_entity, "a blacksmith"))
|
||||
|
||||
# Knight on the right side (lower)
|
||||
knight_entity = mcrfpy.Entity(grid_pos=(18, 10), texture=texture, sprite_index=KNIGHT_SPRITE)
|
||||
grid.entities.append(knight_entity)
|
||||
agents.append(Agent("Knight", knight_entity, "a knight"))
|
||||
|
||||
# Rat in the middle-right area (visible to blacksmith and knight, maybe wizard through door)
|
||||
rat_entity = mcrfpy.Entity(grid_pos=(14, 7), texture=texture, sprite_index=RAT_SPRITE)
|
||||
grid.entities.append(rat_entity)
|
||||
|
||||
return grid, fov_layer, agents, rat_entity
|
||||
|
||||
|
||||
def switch_perspective(grid, fov_layer, agent):
|
||||
"""Switch the grid view to an agent's perspective."""
|
||||
# Reset fog layer to all unknown (black) before switching
|
||||
# This prevents discovered tiles from one agent carrying over to another
|
||||
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
|
||||
|
||||
# Apply this agent's perspective
|
||||
fov_layer.apply_perspective(
|
||||
entity=agent.entity,
|
||||
visible=mcrfpy.Color(0, 0, 0, 0),
|
||||
discovered=mcrfpy.Color(40, 40, 60, 180),
|
||||
unknown=mcrfpy.Color(0, 0, 0, 255)
|
||||
)
|
||||
|
||||
# Update visibility from agent's position
|
||||
agent.entity.update_visibility()
|
||||
|
||||
# Center camera on this agent
|
||||
px, py = agent.pos
|
||||
grid.center = (px * 16 + 8, py * 16 + 8)
|
||||
|
||||
|
||||
def get_visible_entities(grid, observer, all_agents, rat):
|
||||
"""Get list of entities visible to the observer."""
|
||||
visible = []
|
||||
ox, oy = observer.pos
|
||||
|
||||
# Check rat visibility
|
||||
rx, ry = int(rat.pos[0]), int(rat.pos[1])
|
||||
if grid.is_in_fov(rx, ry):
|
||||
# Determine direction
|
||||
direction = get_direction(ox, oy, rx, ry)
|
||||
visible.append(f"a rat to the {direction}")
|
||||
|
||||
# Check other agents
|
||||
for agent in all_agents:
|
||||
if agent.name == observer.name:
|
||||
continue
|
||||
ax, ay = agent.pos
|
||||
if grid.is_in_fov(ax, ay):
|
||||
direction = get_direction(ox, oy, ax, ay)
|
||||
visible.append(f"{agent.description} to the {direction}")
|
||||
|
||||
return visible
|
||||
|
||||
|
||||
def get_direction(from_x, from_y, to_x, to_y):
|
||||
"""Get cardinal direction from one point to another."""
|
||||
dx = to_x - from_x
|
||||
dy = to_y - from_y
|
||||
|
||||
# Primary direction
|
||||
if abs(dx) > abs(dy):
|
||||
return "east" if dx > 0 else "west"
|
||||
elif abs(dy) > abs(dx):
|
||||
return "south" if dy > 0 else "north"
|
||||
else:
|
||||
# Diagonal - pick one
|
||||
ns = "south" if dy > 0 else "north"
|
||||
ew = "east" if dx > 0 else "west"
|
||||
return f"{ns}{ew}"
|
||||
|
||||
|
||||
def build_grounded_prompt(visible_entities):
|
||||
"""Build grounded text from visible entities."""
|
||||
if not visible_entities:
|
||||
return "The area appears clear."
|
||||
|
||||
if len(visible_entities) == 1:
|
||||
return f"You see {visible_entities[0]}."
|
||||
else:
|
||||
items = ", ".join(visible_entities[:-1]) + f" and {visible_entities[-1]}"
|
||||
return f"You see {items}."
|
||||
|
||||
|
||||
def query_agent(agent, screenshot_path, grounded_text):
|
||||
"""Query VLLM for a single agent's perspective."""
|
||||
system_prompt = f"""You are {agent.description} in a roguelike dungeon game. You can see the game world through screenshots.
|
||||
The view shows a top-down grid-based dungeon. Your character is centered in the view.
|
||||
The dark areas are outside your field of vision. Other figures may be allies, enemies, or NPCs.
|
||||
Describe what you observe concisely and decide on an action."""
|
||||
|
||||
user_prompt = f"""Look at this game screenshot from your perspective as {agent.description}. {grounded_text}
|
||||
|
||||
Describe what you see briefly, then choose an action:
|
||||
- GO NORTH / SOUTH / EAST / WEST
|
||||
- WAIT
|
||||
- LOOK
|
||||
|
||||
State your reasoning in 1-2 sentences, then declare: "Action: <YOUR_ACTION>" """
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
message_with_image(user_prompt, screenshot_path)
|
||||
]
|
||||
|
||||
resp = llm_chat_completion(messages)
|
||||
|
||||
if "error" in resp:
|
||||
return f"VLLM Error: {resp['error']}"
|
||||
else:
|
||||
return resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||
|
||||
|
||||
def run_demo():
|
||||
"""Main demo function."""
|
||||
print("=" * 70)
|
||||
print("Multi-Agent VLLM Demo")
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
# Create screenshot directory
|
||||
os.makedirs(SCREENSHOT_DIR, exist_ok=True)
|
||||
|
||||
# Setup scene
|
||||
grid, fov_layer, agents, rat = setup_scene()
|
||||
|
||||
# Cycle through each agent's perspective
|
||||
for i, agent in enumerate(agents):
|
||||
print(f"\n{'='*70}")
|
||||
print(f"Agent {i+1}/3: {agent.name} ({agent.description})")
|
||||
print(f"Position: {agent.pos}")
|
||||
print("=" * 70)
|
||||
|
||||
# Switch to this agent's perspective
|
||||
switch_perspective(grid, fov_layer, agent)
|
||||
|
||||
# Advance simulation
|
||||
mcrfpy.step(0.016)
|
||||
|
||||
# Take screenshot
|
||||
screenshot_path = os.path.join(SCREENSHOT_DIR, f"{i}_{agent.name.lower()}_view.png")
|
||||
result = automation.screenshot(screenshot_path)
|
||||
if not result:
|
||||
print(f"ERROR: Failed to take screenshot for {agent.name}")
|
||||
continue
|
||||
|
||||
file_size = os.path.getsize(screenshot_path)
|
||||
print(f"Screenshot: {screenshot_path} ({file_size} bytes)")
|
||||
|
||||
# Get visible entities for this agent
|
||||
visible = get_visible_entities(grid, agent, agents, rat)
|
||||
grounded_text = build_grounded_prompt(visible)
|
||||
print(f"Grounded observations: {grounded_text}")
|
||||
|
||||
# Query VLLM
|
||||
print(f"\nQuerying VLLM for {agent.name}...")
|
||||
print("-" * 50)
|
||||
response = query_agent(agent, screenshot_path, grounded_text)
|
||||
print(f"\n{agent.name}'s Response:\n{response}")
|
||||
print()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Multi-Agent Demo Complete")
|
||||
print("=" * 70)
|
||||
print(f"\nScreenshots saved to: {SCREENSHOT_DIR}/")
|
||||
for i, agent in enumerate(agents):
|
||||
print(f" - {i}_{agent.name.lower()}_view.png")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# Main execution
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
success = run_demo()
|
||||
if success:
|
||||
print("\nPASS")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("\nFAIL")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\nError: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
Loading…
Reference in New Issue