feat: Add VLLM integration demos for multi-agent research (#156)
- 0_basic_vllm_demo.py: Single agent with FOV, grounded text, VLLM query - 1_multi_agent_demo.py: Three agents with perspective cycling Features demonstrated: - Headless step() + screenshot() for AI-driven gameplay - ColorLayer.apply_perspective() for per-agent fog of war - Grounded text generation based on entity visibility - Sequential VLLM queries with vision model support - Proper FOV reset between perspective switches 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
f2f8d6422f
commit
4713b62535
|
|
@ -0,0 +1,293 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
VLLM Integration Demo for McRogueFace
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
Demonstrates using a local Vision-Language Model (Gemma 3) with
|
||||||
|
McRogueFace headless rendering to create an AI-driven agent.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Local VLLM running at http://192.168.1.100:8100
|
||||||
|
- McRogueFace built with headless mode support
|
||||||
|
|
||||||
|
This is a research-grade demo for issue #156.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import mcrfpy
|
||||||
|
from mcrfpy import automation
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
# VLLM configuration
|
||||||
|
VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions"
|
||||||
|
SCREENSHOT_PATH = "/tmp/vllm_demo_screenshot.png"
|
||||||
|
|
||||||
|
# Sprite constants from Crypt of Sokoban tileset
|
||||||
|
FLOOR_COMMON = 0 # 95% of floors
|
||||||
|
FLOOR_SPECKLE1 = 12 # 4% of floors
|
||||||
|
FLOOR_SPECKLE2 = 24 # 1% of floors
|
||||||
|
WALL_TILE = 40 # Wall sprite
|
||||||
|
PLAYER_SPRITE = 84 # Player character
|
||||||
|
RAT_SPRITE = 123 # Enemy/rat creature
|
||||||
|
|
||||||
|
def file_to_base64(file_path):
|
||||||
|
"""Convert any image file to base64 string."""
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
return base64.b64encode(f.read()).decode('utf-8')
|
||||||
|
|
||||||
|
def llm_chat_completion(messages: list):
|
||||||
|
"""Chat completion endpoint of local LLM"""
|
||||||
|
try:
|
||||||
|
response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60)
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
def message_with_image(text, image_path):
|
||||||
|
"""Create a message with an embedded image for vision models."""
|
||||||
|
image_data = file_to_base64(image_path)
|
||||||
|
return {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": text},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_floor_tile():
|
||||||
|
"""Return a floor tile sprite with realistic distribution."""
|
||||||
|
roll = random.random()
|
||||||
|
if roll < 0.95:
|
||||||
|
return FLOOR_COMMON
|
||||||
|
elif roll < 0.99:
|
||||||
|
return FLOOR_SPECKLE1
|
||||||
|
else:
|
||||||
|
return FLOOR_SPECKLE2
|
||||||
|
|
||||||
|
def setup_scene():
|
||||||
|
"""Create a dungeon scene with player agent and NPC rat."""
|
||||||
|
print("Setting up scene...")
|
||||||
|
|
||||||
|
# Create and set scene
|
||||||
|
mcrfpy.createScene("vllm_demo")
|
||||||
|
mcrfpy.setScene("vllm_demo")
|
||||||
|
ui = mcrfpy.sceneUI("vllm_demo")
|
||||||
|
|
||||||
|
# Load the game texture (16x16 tiles from Crypt of Sokoban)
|
||||||
|
texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16)
|
||||||
|
|
||||||
|
# Create grid: 1014px wide at position (5,5)
|
||||||
|
# Using 20x15 grid for a reasonable dungeon size
|
||||||
|
grid = mcrfpy.Grid(
|
||||||
|
grid_size=(20, 15),
|
||||||
|
texture=texture,
|
||||||
|
pos=(5, 5),
|
||||||
|
size=(1014, 700)
|
||||||
|
)
|
||||||
|
grid.fill_color = mcrfpy.Color(20, 20, 30)
|
||||||
|
|
||||||
|
# Set zoom factor to 2.0 for better visibility
|
||||||
|
grid.zoom = 2.0
|
||||||
|
|
||||||
|
ui.append(grid)
|
||||||
|
|
||||||
|
# Set up floor tiles and walls with proper sprite distribution
|
||||||
|
for x in range(20):
|
||||||
|
for y in range(15):
|
||||||
|
point = grid.at(x, y)
|
||||||
|
# Create walls around the edges
|
||||||
|
if x == 0 or x == 19 or y == 0 or y == 14:
|
||||||
|
point.tilesprite = WALL_TILE
|
||||||
|
point.walkable = False
|
||||||
|
point.transparent = False # Walls block FOV
|
||||||
|
else:
|
||||||
|
# Floor inside with varied sprites
|
||||||
|
point.tilesprite = get_floor_tile()
|
||||||
|
point.walkable = True
|
||||||
|
point.transparent = True # Floors don't block FOV
|
||||||
|
|
||||||
|
# Add some interior walls for interest - a room divider
|
||||||
|
for y in range(5, 10):
|
||||||
|
point = grid.at(10, y)
|
||||||
|
point.tilesprite = WALL_TILE
|
||||||
|
point.walkable = False
|
||||||
|
point.transparent = False
|
||||||
|
# Door opening
|
||||||
|
door = grid.at(10, 7)
|
||||||
|
door.tilesprite = get_floor_tile()
|
||||||
|
door.walkable = True
|
||||||
|
door.transparent = True
|
||||||
|
|
||||||
|
# Create a ColorLayer for fog of war (z_index=10 to render on top)
|
||||||
|
fov_layer = grid.add_layer('color', z_index=10)
|
||||||
|
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255)) # Start all black (unknown)
|
||||||
|
|
||||||
|
# Create the player entity ("The Agent")
|
||||||
|
player = mcrfpy.Entity(grid_pos=(5, 7), texture=texture, sprite_index=PLAYER_SPRITE)
|
||||||
|
grid.entities.append(player)
|
||||||
|
|
||||||
|
# Create an NPC rat entity (closer so it's visible in FOV)
|
||||||
|
rat = mcrfpy.Entity(grid_pos=(10, 7), texture=texture, sprite_index=RAT_SPRITE)
|
||||||
|
grid.entities.append(rat)
|
||||||
|
|
||||||
|
# Bind the fog layer to player's perspective
|
||||||
|
# visible = transparent, discovered = dim, unknown = black
|
||||||
|
fov_layer.apply_perspective(
|
||||||
|
entity=player,
|
||||||
|
visible=mcrfpy.Color(0, 0, 0, 0), # Transparent when visible
|
||||||
|
discovered=mcrfpy.Color(40, 40, 60, 180), # Dark overlay when discovered but not visible
|
||||||
|
unknown=mcrfpy.Color(0, 0, 0, 255) # Black when never seen
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update visibility from player's position
|
||||||
|
player.update_visibility()
|
||||||
|
|
||||||
|
# Center the camera on the agent entity
|
||||||
|
px, py = int(player.pos[0]), int(player.pos[1])
|
||||||
|
grid.center = (px * 16 + 8, py * 16 + 8)
|
||||||
|
|
||||||
|
return grid, player, rat
|
||||||
|
|
||||||
|
def check_entity_visible(grid, entity):
|
||||||
|
"""Check if an entity is within the current FOV."""
|
||||||
|
ex, ey = int(entity.pos[0]), int(entity.pos[1])
|
||||||
|
return grid.is_in_fov(ex, ey)
|
||||||
|
|
||||||
|
def build_grounded_prompt(grid, player, rat):
|
||||||
|
"""Build a text prompt with visually grounded information."""
|
||||||
|
observations = []
|
||||||
|
|
||||||
|
# Check what the agent can see
|
||||||
|
if check_entity_visible(grid, rat):
|
||||||
|
observations.append("You see a rat to the east.")
|
||||||
|
|
||||||
|
# Could add more observations here:
|
||||||
|
# - walls blocking path
|
||||||
|
# - items on ground
|
||||||
|
# - doors/exits
|
||||||
|
|
||||||
|
if not observations:
|
||||||
|
observations.append("The area appears clear.")
|
||||||
|
|
||||||
|
return " ".join(observations)
|
||||||
|
|
||||||
|
def run_demo():
|
||||||
|
"""Main demo function."""
|
||||||
|
print("=" * 60)
|
||||||
|
print("VLLM Integration Demo (Research Mode)")
|
||||||
|
print("=" * 60)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Setup the scene
|
||||||
|
grid, player, rat = setup_scene()
|
||||||
|
|
||||||
|
# Advance simulation to ensure scene is ready
|
||||||
|
mcrfpy.step(0.016)
|
||||||
|
|
||||||
|
# Take screenshot
|
||||||
|
print(f"Taking screenshot: {SCREENSHOT_PATH}")
|
||||||
|
result = automation.screenshot(SCREENSHOT_PATH)
|
||||||
|
if not result:
|
||||||
|
print("ERROR: Failed to take screenshot")
|
||||||
|
return False
|
||||||
|
|
||||||
|
file_size = os.path.getsize(SCREENSHOT_PATH)
|
||||||
|
print(f"Screenshot saved: {file_size} bytes")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Build grounded observations
|
||||||
|
grounded_text = build_grounded_prompt(grid, player, rat)
|
||||||
|
print(f"Grounded observations: {grounded_text}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Query 1: Ask VLLM to describe what it sees
|
||||||
|
print("-" * 40)
|
||||||
|
print("Query 1: Describe what you see")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
system_prompt = """You are an AI agent in a roguelike dungeon game. You can see the game world through screenshots.
|
||||||
|
The view shows a top-down grid-based dungeon with tiles, walls, and creatures.
|
||||||
|
Your character is the humanoid figure. The dark areas are outside your field of vision.
|
||||||
|
Other creatures may be enemies or NPCs. Describe what you observe concisely."""
|
||||||
|
|
||||||
|
user_prompt = f"""Look at this game screenshot. {grounded_text}
|
||||||
|
|
||||||
|
Describe what you see in the dungeon from your character's perspective.
|
||||||
|
Be specific about:
|
||||||
|
- Your position in the room
|
||||||
|
- Any creatures you can see
|
||||||
|
- The layout of walls and passages
|
||||||
|
- Areas obscured by fog of war (darkness)"""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
message_with_image(user_prompt, SCREENSHOT_PATH)
|
||||||
|
]
|
||||||
|
|
||||||
|
resp = llm_chat_completion(messages)
|
||||||
|
|
||||||
|
if "error" in resp:
|
||||||
|
print(f"VLLM Error: {resp['error']}")
|
||||||
|
print("\nNote: The VLLM server may not be running or accessible.")
|
||||||
|
print("Screenshot is saved for manual inspection.")
|
||||||
|
description = "I can see a dungeon scene."
|
||||||
|
else:
|
||||||
|
description = resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||||
|
print(f"\nVLLM Response:\n{description}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Query 2: Ask what action the agent would like to take
|
||||||
|
print("-" * 40)
|
||||||
|
print("Query 2: What would you like to do?")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
messages.append({"role": "assistant", "content": description})
|
||||||
|
messages.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": f"""Based on what you see, what action would you like to take?
|
||||||
|
|
||||||
|
Available actions:
|
||||||
|
- GO NORTH / SOUTH / EAST / WEST - move in that direction
|
||||||
|
- WAIT - stay in place and observe
|
||||||
|
- LOOK - examine your surroundings more carefully
|
||||||
|
|
||||||
|
{grounded_text}
|
||||||
|
|
||||||
|
State your reasoning briefly, then declare your action clearly (e.g., "Action: GO EAST")."""
|
||||||
|
})
|
||||||
|
|
||||||
|
resp = llm_chat_completion(messages)
|
||||||
|
|
||||||
|
if "error" in resp:
|
||||||
|
print(f"VLLM Error: {resp['error']}")
|
||||||
|
else:
|
||||||
|
action = resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||||
|
print(f"\nVLLM Response:\n{action}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("Demo Complete")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"\nScreenshot preserved at: {SCREENSHOT_PATH}")
|
||||||
|
print("Grid settings: zoom=2.0, FOV radius=8, perspective rendering enabled")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Main execution
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
success = run_demo()
|
||||||
|
if success:
|
||||||
|
print("\nPASS")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print("\nFAIL")
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
sys.exit(1)
|
||||||
|
|
@ -0,0 +1,346 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Multi-Agent VLLM Demo for McRogueFace
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
Demonstrates cycling through multiple agent perspectives,
|
||||||
|
each with their own FOV and grounded observations.
|
||||||
|
|
||||||
|
Three agents:
|
||||||
|
- Wizard (left side) - can see the rat but not the other agents
|
||||||
|
- Blacksmith (right side) - can see the knight, rat, and the wall
|
||||||
|
- Knight (right side) - can see the blacksmith, rat, and the wall
|
||||||
|
|
||||||
|
Each agent gets their own screenshot and VLLM query.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import mcrfpy
|
||||||
|
from mcrfpy import automation
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
# VLLM configuration
|
||||||
|
VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions"
|
||||||
|
SCREENSHOT_DIR = "/tmp/vllm_multi_agent"
|
||||||
|
|
||||||
|
# Sprite constants
|
||||||
|
FLOOR_COMMON = 0
|
||||||
|
FLOOR_SPECKLE1 = 12
|
||||||
|
FLOOR_SPECKLE2 = 24
|
||||||
|
WALL_TILE = 40
|
||||||
|
|
||||||
|
# Agent sprites
|
||||||
|
WIZARD_SPRITE = 84
|
||||||
|
BLACKSMITH_SPRITE = 86
|
||||||
|
KNIGHT_SPRITE = 96
|
||||||
|
RAT_SPRITE = 123
|
||||||
|
|
||||||
|
|
||||||
|
def file_to_base64(file_path):
|
||||||
|
"""Convert any image file to base64 string."""
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
return base64.b64encode(f.read()).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
def llm_chat_completion(messages: list):
|
||||||
|
"""Chat completion endpoint of local LLM"""
|
||||||
|
try:
|
||||||
|
response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60)
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def message_with_image(text, image_path):
|
||||||
|
"""Create a message with an embedded image for vision models."""
|
||||||
|
image_data = file_to_base64(image_path)
|
||||||
|
return {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": text},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_floor_tile():
|
||||||
|
"""Return a floor tile sprite with realistic distribution."""
|
||||||
|
roll = random.random()
|
||||||
|
if roll < 0.95:
|
||||||
|
return FLOOR_COMMON
|
||||||
|
elif roll < 0.99:
|
||||||
|
return FLOOR_SPECKLE1
|
||||||
|
else:
|
||||||
|
return FLOOR_SPECKLE2
|
||||||
|
|
||||||
|
|
||||||
|
class Agent:
|
||||||
|
"""Wrapper for an agent entity with metadata."""
|
||||||
|
def __init__(self, name, entity, description):
|
||||||
|
self.name = name
|
||||||
|
self.entity = entity
|
||||||
|
self.description = description # e.g., "a wizard", "a blacksmith"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pos(self):
|
||||||
|
return (int(self.entity.pos[0]), int(self.entity.pos[1]))
|
||||||
|
|
||||||
|
|
||||||
|
def setup_scene():
|
||||||
|
"""Create a dungeon scene with multiple agents."""
|
||||||
|
print("Setting up multi-agent scene...")
|
||||||
|
|
||||||
|
# Create and set scene
|
||||||
|
mcrfpy.createScene("multi_agent_demo")
|
||||||
|
mcrfpy.setScene("multi_agent_demo")
|
||||||
|
ui = mcrfpy.sceneUI("multi_agent_demo")
|
||||||
|
|
||||||
|
# Load the game texture
|
||||||
|
texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16)
|
||||||
|
|
||||||
|
# Create grid
|
||||||
|
grid = mcrfpy.Grid(
|
||||||
|
grid_size=(25, 15),
|
||||||
|
texture=texture,
|
||||||
|
pos=(5, 5),
|
||||||
|
size=(1014, 700)
|
||||||
|
)
|
||||||
|
grid.fill_color = mcrfpy.Color(20, 20, 30)
|
||||||
|
grid.zoom = 2.0
|
||||||
|
ui.append(grid)
|
||||||
|
|
||||||
|
# Set up floor tiles and walls
|
||||||
|
for x in range(25):
|
||||||
|
for y in range(15):
|
||||||
|
point = grid.at(x, y)
|
||||||
|
if x == 0 or x == 24 or y == 0 or y == 14:
|
||||||
|
point.tilesprite = WALL_TILE
|
||||||
|
point.walkable = False
|
||||||
|
point.transparent = False
|
||||||
|
else:
|
||||||
|
point.tilesprite = get_floor_tile()
|
||||||
|
point.walkable = True
|
||||||
|
point.transparent = True
|
||||||
|
|
||||||
|
# Add a wall divider in the middle (blocks wizard's view of right side)
|
||||||
|
for y in range(3, 12):
|
||||||
|
point = grid.at(10, y)
|
||||||
|
point.tilesprite = WALL_TILE
|
||||||
|
point.walkable = False
|
||||||
|
point.transparent = False
|
||||||
|
|
||||||
|
# Door opening in the wall
|
||||||
|
door = grid.at(10, 7)
|
||||||
|
door.tilesprite = get_floor_tile()
|
||||||
|
door.walkable = True
|
||||||
|
door.transparent = True
|
||||||
|
|
||||||
|
# Create FOV layer for fog of war
|
||||||
|
fov_layer = grid.add_layer('color', z_index=10)
|
||||||
|
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
|
||||||
|
|
||||||
|
# Create agents
|
||||||
|
agents = []
|
||||||
|
|
||||||
|
# Wizard on the left side
|
||||||
|
wizard_entity = mcrfpy.Entity(grid_pos=(4, 7), texture=texture, sprite_index=WIZARD_SPRITE)
|
||||||
|
grid.entities.append(wizard_entity)
|
||||||
|
agents.append(Agent("Wizard", wizard_entity, "a wizard"))
|
||||||
|
|
||||||
|
# Blacksmith on the right side (upper)
|
||||||
|
blacksmith_entity = mcrfpy.Entity(grid_pos=(18, 5), texture=texture, sprite_index=BLACKSMITH_SPRITE)
|
||||||
|
grid.entities.append(blacksmith_entity)
|
||||||
|
agents.append(Agent("Blacksmith", blacksmith_entity, "a blacksmith"))
|
||||||
|
|
||||||
|
# Knight on the right side (lower)
|
||||||
|
knight_entity = mcrfpy.Entity(grid_pos=(18, 10), texture=texture, sprite_index=KNIGHT_SPRITE)
|
||||||
|
grid.entities.append(knight_entity)
|
||||||
|
agents.append(Agent("Knight", knight_entity, "a knight"))
|
||||||
|
|
||||||
|
# Rat in the middle-right area (visible to blacksmith and knight, maybe wizard through door)
|
||||||
|
rat_entity = mcrfpy.Entity(grid_pos=(14, 7), texture=texture, sprite_index=RAT_SPRITE)
|
||||||
|
grid.entities.append(rat_entity)
|
||||||
|
|
||||||
|
return grid, fov_layer, agents, rat_entity
|
||||||
|
|
||||||
|
|
||||||
|
def switch_perspective(grid, fov_layer, agent):
|
||||||
|
"""Switch the grid view to an agent's perspective."""
|
||||||
|
# Reset fog layer to all unknown (black) before switching
|
||||||
|
# This prevents discovered tiles from one agent carrying over to another
|
||||||
|
fov_layer.fill(mcrfpy.Color(0, 0, 0, 255))
|
||||||
|
|
||||||
|
# Apply this agent's perspective
|
||||||
|
fov_layer.apply_perspective(
|
||||||
|
entity=agent.entity,
|
||||||
|
visible=mcrfpy.Color(0, 0, 0, 0),
|
||||||
|
discovered=mcrfpy.Color(40, 40, 60, 180),
|
||||||
|
unknown=mcrfpy.Color(0, 0, 0, 255)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update visibility from agent's position
|
||||||
|
agent.entity.update_visibility()
|
||||||
|
|
||||||
|
# Center camera on this agent
|
||||||
|
px, py = agent.pos
|
||||||
|
grid.center = (px * 16 + 8, py * 16 + 8)
|
||||||
|
|
||||||
|
|
||||||
|
def get_visible_entities(grid, observer, all_agents, rat):
|
||||||
|
"""Get list of entities visible to the observer."""
|
||||||
|
visible = []
|
||||||
|
ox, oy = observer.pos
|
||||||
|
|
||||||
|
# Check rat visibility
|
||||||
|
rx, ry = int(rat.pos[0]), int(rat.pos[1])
|
||||||
|
if grid.is_in_fov(rx, ry):
|
||||||
|
# Determine direction
|
||||||
|
direction = get_direction(ox, oy, rx, ry)
|
||||||
|
visible.append(f"a rat to the {direction}")
|
||||||
|
|
||||||
|
# Check other agents
|
||||||
|
for agent in all_agents:
|
||||||
|
if agent.name == observer.name:
|
||||||
|
continue
|
||||||
|
ax, ay = agent.pos
|
||||||
|
if grid.is_in_fov(ax, ay):
|
||||||
|
direction = get_direction(ox, oy, ax, ay)
|
||||||
|
visible.append(f"{agent.description} to the {direction}")
|
||||||
|
|
||||||
|
return visible
|
||||||
|
|
||||||
|
|
||||||
|
def get_direction(from_x, from_y, to_x, to_y):
|
||||||
|
"""Get cardinal direction from one point to another."""
|
||||||
|
dx = to_x - from_x
|
||||||
|
dy = to_y - from_y
|
||||||
|
|
||||||
|
# Primary direction
|
||||||
|
if abs(dx) > abs(dy):
|
||||||
|
return "east" if dx > 0 else "west"
|
||||||
|
elif abs(dy) > abs(dx):
|
||||||
|
return "south" if dy > 0 else "north"
|
||||||
|
else:
|
||||||
|
# Diagonal - pick one
|
||||||
|
ns = "south" if dy > 0 else "north"
|
||||||
|
ew = "east" if dx > 0 else "west"
|
||||||
|
return f"{ns}{ew}"
|
||||||
|
|
||||||
|
|
||||||
|
def build_grounded_prompt(visible_entities):
|
||||||
|
"""Build grounded text from visible entities."""
|
||||||
|
if not visible_entities:
|
||||||
|
return "The area appears clear."
|
||||||
|
|
||||||
|
if len(visible_entities) == 1:
|
||||||
|
return f"You see {visible_entities[0]}."
|
||||||
|
else:
|
||||||
|
items = ", ".join(visible_entities[:-1]) + f" and {visible_entities[-1]}"
|
||||||
|
return f"You see {items}."
|
||||||
|
|
||||||
|
|
||||||
|
def query_agent(agent, screenshot_path, grounded_text):
|
||||||
|
"""Query VLLM for a single agent's perspective."""
|
||||||
|
system_prompt = f"""You are {agent.description} in a roguelike dungeon game. You can see the game world through screenshots.
|
||||||
|
The view shows a top-down grid-based dungeon. Your character is centered in the view.
|
||||||
|
The dark areas are outside your field of vision. Other figures may be allies, enemies, or NPCs.
|
||||||
|
Describe what you observe concisely and decide on an action."""
|
||||||
|
|
||||||
|
user_prompt = f"""Look at this game screenshot from your perspective as {agent.description}. {grounded_text}
|
||||||
|
|
||||||
|
Describe what you see briefly, then choose an action:
|
||||||
|
- GO NORTH / SOUTH / EAST / WEST
|
||||||
|
- WAIT
|
||||||
|
- LOOK
|
||||||
|
|
||||||
|
State your reasoning in 1-2 sentences, then declare: "Action: <YOUR_ACTION>" """
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
message_with_image(user_prompt, screenshot_path)
|
||||||
|
]
|
||||||
|
|
||||||
|
resp = llm_chat_completion(messages)
|
||||||
|
|
||||||
|
if "error" in resp:
|
||||||
|
return f"VLLM Error: {resp['error']}"
|
||||||
|
else:
|
||||||
|
return resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||||
|
|
||||||
|
|
||||||
|
def run_demo():
|
||||||
|
"""Main demo function."""
|
||||||
|
print("=" * 70)
|
||||||
|
print("Multi-Agent VLLM Demo")
|
||||||
|
print("=" * 70)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Create screenshot directory
|
||||||
|
os.makedirs(SCREENSHOT_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# Setup scene
|
||||||
|
grid, fov_layer, agents, rat = setup_scene()
|
||||||
|
|
||||||
|
# Cycle through each agent's perspective
|
||||||
|
for i, agent in enumerate(agents):
|
||||||
|
print(f"\n{'='*70}")
|
||||||
|
print(f"Agent {i+1}/3: {agent.name} ({agent.description})")
|
||||||
|
print(f"Position: {agent.pos}")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Switch to this agent's perspective
|
||||||
|
switch_perspective(grid, fov_layer, agent)
|
||||||
|
|
||||||
|
# Advance simulation
|
||||||
|
mcrfpy.step(0.016)
|
||||||
|
|
||||||
|
# Take screenshot
|
||||||
|
screenshot_path = os.path.join(SCREENSHOT_DIR, f"{i}_{agent.name.lower()}_view.png")
|
||||||
|
result = automation.screenshot(screenshot_path)
|
||||||
|
if not result:
|
||||||
|
print(f"ERROR: Failed to take screenshot for {agent.name}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_size = os.path.getsize(screenshot_path)
|
||||||
|
print(f"Screenshot: {screenshot_path} ({file_size} bytes)")
|
||||||
|
|
||||||
|
# Get visible entities for this agent
|
||||||
|
visible = get_visible_entities(grid, agent, agents, rat)
|
||||||
|
grounded_text = build_grounded_prompt(visible)
|
||||||
|
print(f"Grounded observations: {grounded_text}")
|
||||||
|
|
||||||
|
# Query VLLM
|
||||||
|
print(f"\nQuerying VLLM for {agent.name}...")
|
||||||
|
print("-" * 50)
|
||||||
|
response = query_agent(agent, screenshot_path, grounded_text)
|
||||||
|
print(f"\n{agent.name}'s Response:\n{response}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("Multi-Agent Demo Complete")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"\nScreenshots saved to: {SCREENSHOT_DIR}/")
|
||||||
|
for i, agent in enumerate(agents):
|
||||||
|
print(f" - {i}_{agent.name.lower()}_view.png")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# Main execution
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
success = run_demo()
|
||||||
|
if success:
|
||||||
|
print("\nPASS")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print("\nFAIL")
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
sys.exit(1)
|
||||||
Loading…
Reference in New Issue