#!/usr/bin/env python3 """ Multi-Agent VLLM Demo for McRogueFace ===================================== Demonstrates cycling through multiple agent perspectives, each with their own FOV and grounded observations. Three agents: - Wizard (left side) - can see the rat but not the other agents - Blacksmith (right side) - can see the knight, rat, and the wall - Knight (right side) - can see the blacksmith, rat, and the wall Each agent gets their own screenshot and VLLM query. """ import mcrfpy from mcrfpy import automation import sys import requests import base64 import os import random # VLLM configuration VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions" SCREENSHOT_DIR = "/tmp/vllm_multi_agent" # Sprite constants FLOOR_COMMON = 0 FLOOR_SPECKLE1 = 12 FLOOR_SPECKLE2 = 24 WALL_TILE = 40 # Agent sprites WIZARD_SPRITE = 84 BLACKSMITH_SPRITE = 86 KNIGHT_SPRITE = 96 RAT_SPRITE = 123 def file_to_base64(file_path): """Convert any image file to base64 string.""" with open(file_path, 'rb') as f: return base64.b64encode(f.read()).decode('utf-8') def llm_chat_completion(messages: list): """Chat completion endpoint of local LLM""" try: response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60) return response.json() except requests.exceptions.RequestException as e: return {"error": str(e)} def message_with_image(text, image_path): """Create a message with an embedded image for vision models.""" image_data = file_to_base64(image_path) return { "role": "user", "content": [ {"type": "text", "text": text}, {"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}} ] } def get_floor_tile(): """Return a floor tile sprite with realistic distribution.""" roll = random.random() if roll < 0.95: return FLOOR_COMMON elif roll < 0.99: return FLOOR_SPECKLE1 else: return FLOOR_SPECKLE2 class Agent: """Wrapper for an agent entity with metadata.""" def __init__(self, name, entity, description): self.name = name self.entity = entity self.description = description # e.g., "a wizard", "a blacksmith" @property def pos(self): return (int(self.entity.pos[0]), int(self.entity.pos[1])) def setup_scene(): """Create a dungeon scene with multiple agents.""" print("Setting up multi-agent scene...") # Create and set scene mcrfpy.createScene("multi_agent_demo") mcrfpy.setScene("multi_agent_demo") ui = mcrfpy.sceneUI("multi_agent_demo") # Load the game texture texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16) # Create grid grid = mcrfpy.Grid( grid_size=(25, 15), texture=texture, pos=(5, 5), size=(1014, 700) ) grid.fill_color = mcrfpy.Color(20, 20, 30) grid.zoom = 2.0 ui.append(grid) # Set up floor tiles and walls for x in range(25): for y in range(15): point = grid.at(x, y) if x == 0 or x == 24 or y == 0 or y == 14: point.tilesprite = WALL_TILE point.walkable = False point.transparent = False else: point.tilesprite = get_floor_tile() point.walkable = True point.transparent = True # Add a wall divider in the middle (blocks wizard's view of right side) for y in range(3, 12): point = grid.at(10, y) point.tilesprite = WALL_TILE point.walkable = False point.transparent = False # Door opening in the wall door = grid.at(10, 7) door.tilesprite = get_floor_tile() door.walkable = True door.transparent = True # Create FOV layer for fog of war fov_layer = grid.add_layer('color', z_index=10) fov_layer.fill(mcrfpy.Color(0, 0, 0, 255)) # Create agents agents = [] # Wizard on the left side wizard_entity = mcrfpy.Entity(grid_pos=(4, 7), texture=texture, sprite_index=WIZARD_SPRITE) grid.entities.append(wizard_entity) agents.append(Agent("Wizard", wizard_entity, "a wizard")) # Blacksmith on the right side (upper) blacksmith_entity = mcrfpy.Entity(grid_pos=(18, 5), texture=texture, sprite_index=BLACKSMITH_SPRITE) grid.entities.append(blacksmith_entity) agents.append(Agent("Blacksmith", blacksmith_entity, "a blacksmith")) # Knight on the right side (lower) knight_entity = mcrfpy.Entity(grid_pos=(18, 10), texture=texture, sprite_index=KNIGHT_SPRITE) grid.entities.append(knight_entity) agents.append(Agent("Knight", knight_entity, "a knight")) # Rat in the middle-right area (visible to blacksmith and knight, maybe wizard through door) rat_entity = mcrfpy.Entity(grid_pos=(14, 7), texture=texture, sprite_index=RAT_SPRITE) grid.entities.append(rat_entity) return grid, fov_layer, agents, rat_entity def switch_perspective(grid, fov_layer, agent): """Switch the grid view to an agent's perspective.""" # Reset fog layer to all unknown (black) before switching # This prevents discovered tiles from one agent carrying over to another fov_layer.fill(mcrfpy.Color(0, 0, 0, 255)) # Apply this agent's perspective fov_layer.apply_perspective( entity=agent.entity, visible=mcrfpy.Color(0, 0, 0, 0), discovered=mcrfpy.Color(40, 40, 60, 180), unknown=mcrfpy.Color(0, 0, 0, 255) ) # Update visibility from agent's position agent.entity.update_visibility() # Center camera on this agent px, py = agent.pos grid.center = (px * 16 + 8, py * 16 + 8) def get_visible_entities(grid, observer, all_agents, rat): """Get list of entities visible to the observer.""" visible = [] ox, oy = observer.pos # Check rat visibility rx, ry = int(rat.pos[0]), int(rat.pos[1]) if grid.is_in_fov(rx, ry): # Determine direction direction = get_direction(ox, oy, rx, ry) visible.append(f"a rat to the {direction}") # Check other agents for agent in all_agents: if agent.name == observer.name: continue ax, ay = agent.pos if grid.is_in_fov(ax, ay): direction = get_direction(ox, oy, ax, ay) visible.append(f"{agent.description} to the {direction}") return visible def get_direction(from_x, from_y, to_x, to_y): """Get cardinal direction from one point to another.""" dx = to_x - from_x dy = to_y - from_y # Primary direction if abs(dx) > abs(dy): return "east" if dx > 0 else "west" elif abs(dy) > abs(dx): return "south" if dy > 0 else "north" else: # Diagonal - pick one ns = "south" if dy > 0 else "north" ew = "east" if dx > 0 else "west" return f"{ns}{ew}" def build_grounded_prompt(visible_entities): """Build grounded text from visible entities.""" if not visible_entities: return "The area appears clear." if len(visible_entities) == 1: return f"You see {visible_entities[0]}." else: items = ", ".join(visible_entities[:-1]) + f" and {visible_entities[-1]}" return f"You see {items}." def query_agent(agent, screenshot_path, grounded_text): """Query VLLM for a single agent's perspective.""" system_prompt = f"""You are {agent.description} in a roguelike dungeon game. You can see the game world through screenshots. The view shows a top-down grid-based dungeon. Your character is centered in the view. The dark areas are outside your field of vision. Other figures may be allies, enemies, or NPCs. Describe what you observe concisely and decide on an action.""" user_prompt = f"""Look at this game screenshot from your perspective as {agent.description}. {grounded_text} Describe what you see briefly, then choose an action: - GO NORTH / SOUTH / EAST / WEST - WAIT - LOOK State your reasoning in 1-2 sentences, then declare: "Action: " """ messages = [ {"role": "system", "content": system_prompt}, message_with_image(user_prompt, screenshot_path) ] resp = llm_chat_completion(messages) if "error" in resp: return f"VLLM Error: {resp['error']}" else: return resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response') def run_demo(): """Main demo function.""" print("=" * 70) print("Multi-Agent VLLM Demo") print("=" * 70) print() # Create screenshot directory os.makedirs(SCREENSHOT_DIR, exist_ok=True) # Setup scene grid, fov_layer, agents, rat = setup_scene() # Cycle through each agent's perspective for i, agent in enumerate(agents): print(f"\n{'='*70}") print(f"Agent {i+1}/3: {agent.name} ({agent.description})") print(f"Position: {agent.pos}") print("=" * 70) # Switch to this agent's perspective switch_perspective(grid, fov_layer, agent) # Advance simulation mcrfpy.step(0.016) # Take screenshot screenshot_path = os.path.join(SCREENSHOT_DIR, f"{i}_{agent.name.lower()}_view.png") result = automation.screenshot(screenshot_path) if not result: print(f"ERROR: Failed to take screenshot for {agent.name}") continue file_size = os.path.getsize(screenshot_path) print(f"Screenshot: {screenshot_path} ({file_size} bytes)") # Get visible entities for this agent visible = get_visible_entities(grid, agent, agents, rat) grounded_text = build_grounded_prompt(visible) print(f"Grounded observations: {grounded_text}") # Query VLLM print(f"\nQuerying VLLM for {agent.name}...") print("-" * 50) response = query_agent(agent, screenshot_path, grounded_text) print(f"\n{agent.name}'s Response:\n{response}") print() print("\n" + "=" * 70) print("Multi-Agent Demo Complete") print("=" * 70) print(f"\nScreenshots saved to: {SCREENSHOT_DIR}/") for i, agent in enumerate(agents): print(f" - {i}_{agent.name.lower()}_view.png") return True # Main execution if __name__ == "__main__": try: success = run_demo() if success: print("\nPASS") sys.exit(0) else: print("\nFAIL") sys.exit(1) except Exception as e: print(f"\nError: {e}") import traceback traceback.print_exc() sys.exit(1)