diff --git a/tests/vllm_demo/0_basic_vllm_demo.py b/tests/vllm_demo/0_basic_vllm_demo.py new file mode 100644 index 0000000..bf37fa4 --- /dev/null +++ b/tests/vllm_demo/0_basic_vllm_demo.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +""" +VLLM Integration Demo for McRogueFace +===================================== + +Demonstrates using a local Vision-Language Model (Gemma 3) with +McRogueFace headless rendering to create an AI-driven agent. + +Requirements: +- Local VLLM running at http://192.168.1.100:8100 +- McRogueFace built with headless mode support + +This is a research-grade demo for issue #156. +""" + +import mcrfpy +from mcrfpy import automation +import sys +import requests +import base64 +import os +import random + +# VLLM configuration +VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions" +SCREENSHOT_PATH = "/tmp/vllm_demo_screenshot.png" + +# Sprite constants from Crypt of Sokoban tileset +FLOOR_COMMON = 0 # 95% of floors +FLOOR_SPECKLE1 = 12 # 4% of floors +FLOOR_SPECKLE2 = 24 # 1% of floors +WALL_TILE = 40 # Wall sprite +PLAYER_SPRITE = 84 # Player character +RAT_SPRITE = 123 # Enemy/rat creature + +def file_to_base64(file_path): + """Convert any image file to base64 string.""" + with open(file_path, 'rb') as f: + return base64.b64encode(f.read()).decode('utf-8') + +def llm_chat_completion(messages: list): + """Chat completion endpoint of local LLM""" + try: + response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60) + return response.json() + except requests.exceptions.RequestException as e: + return {"error": str(e)} + +def message_with_image(text, image_path): + """Create a message with an embedded image for vision models.""" + image_data = file_to_base64(image_path) + return { + "role": "user", + "content": [ + {"type": "text", "text": text}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}} + ] + } + +def get_floor_tile(): + """Return a floor tile sprite with realistic distribution.""" + roll = random.random() + if roll < 0.95: + return FLOOR_COMMON + elif roll < 0.99: + return FLOOR_SPECKLE1 + else: + return FLOOR_SPECKLE2 + +def setup_scene(): + """Create a dungeon scene with player agent and NPC rat.""" + print("Setting up scene...") + + # Create and set scene + mcrfpy.createScene("vllm_demo") + mcrfpy.setScene("vllm_demo") + ui = mcrfpy.sceneUI("vllm_demo") + + # Load the game texture (16x16 tiles from Crypt of Sokoban) + texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16) + + # Create grid: 1014px wide at position (5,5) + # Using 20x15 grid for a reasonable dungeon size + grid = mcrfpy.Grid( + grid_size=(20, 15), + texture=texture, + pos=(5, 5), + size=(1014, 700) + ) + grid.fill_color = mcrfpy.Color(20, 20, 30) + + # Set zoom factor to 2.0 for better visibility + grid.zoom = 2.0 + + ui.append(grid) + + # Set up floor tiles and walls with proper sprite distribution + for x in range(20): + for y in range(15): + point = grid.at(x, y) + # Create walls around the edges + if x == 0 or x == 19 or y == 0 or y == 14: + point.tilesprite = WALL_TILE + point.walkable = False + point.transparent = False # Walls block FOV + else: + # Floor inside with varied sprites + point.tilesprite = get_floor_tile() + point.walkable = True + point.transparent = True # Floors don't block FOV + + # Add some interior walls for interest - a room divider + for y in range(5, 10): + point = grid.at(10, y) + point.tilesprite = WALL_TILE + point.walkable = False + point.transparent = False + # Door opening + door = grid.at(10, 7) + door.tilesprite = get_floor_tile() + door.walkable = True + door.transparent = True + + # Create a ColorLayer for fog of war (z_index=10 to render on top) + fov_layer = grid.add_layer('color', z_index=10) + fov_layer.fill(mcrfpy.Color(0, 0, 0, 255)) # Start all black (unknown) + + # Create the player entity ("The Agent") + player = mcrfpy.Entity(grid_pos=(5, 7), texture=texture, sprite_index=PLAYER_SPRITE) + grid.entities.append(player) + + # Create an NPC rat entity (closer so it's visible in FOV) + rat = mcrfpy.Entity(grid_pos=(10, 7), texture=texture, sprite_index=RAT_SPRITE) + grid.entities.append(rat) + + # Bind the fog layer to player's perspective + # visible = transparent, discovered = dim, unknown = black + fov_layer.apply_perspective( + entity=player, + visible=mcrfpy.Color(0, 0, 0, 0), # Transparent when visible + discovered=mcrfpy.Color(40, 40, 60, 180), # Dark overlay when discovered but not visible + unknown=mcrfpy.Color(0, 0, 0, 255) # Black when never seen + ) + + # Update visibility from player's position + player.update_visibility() + + # Center the camera on the agent entity + px, py = int(player.pos[0]), int(player.pos[1]) + grid.center = (px * 16 + 8, py * 16 + 8) + + return grid, player, rat + +def check_entity_visible(grid, entity): + """Check if an entity is within the current FOV.""" + ex, ey = int(entity.pos[0]), int(entity.pos[1]) + return grid.is_in_fov(ex, ey) + +def build_grounded_prompt(grid, player, rat): + """Build a text prompt with visually grounded information.""" + observations = [] + + # Check what the agent can see + if check_entity_visible(grid, rat): + observations.append("You see a rat to the east.") + + # Could add more observations here: + # - walls blocking path + # - items on ground + # - doors/exits + + if not observations: + observations.append("The area appears clear.") + + return " ".join(observations) + +def run_demo(): + """Main demo function.""" + print("=" * 60) + print("VLLM Integration Demo (Research Mode)") + print("=" * 60) + print() + + # Setup the scene + grid, player, rat = setup_scene() + + # Advance simulation to ensure scene is ready + mcrfpy.step(0.016) + + # Take screenshot + print(f"Taking screenshot: {SCREENSHOT_PATH}") + result = automation.screenshot(SCREENSHOT_PATH) + if not result: + print("ERROR: Failed to take screenshot") + return False + + file_size = os.path.getsize(SCREENSHOT_PATH) + print(f"Screenshot saved: {file_size} bytes") + print() + + # Build grounded observations + grounded_text = build_grounded_prompt(grid, player, rat) + print(f"Grounded observations: {grounded_text}") + print() + + # Query 1: Ask VLLM to describe what it sees + print("-" * 40) + print("Query 1: Describe what you see") + print("-" * 40) + + system_prompt = """You are an AI agent in a roguelike dungeon game. You can see the game world through screenshots. +The view shows a top-down grid-based dungeon with tiles, walls, and creatures. +Your character is the humanoid figure. The dark areas are outside your field of vision. +Other creatures may be enemies or NPCs. Describe what you observe concisely.""" + + user_prompt = f"""Look at this game screenshot. {grounded_text} + +Describe what you see in the dungeon from your character's perspective. +Be specific about: +- Your position in the room +- Any creatures you can see +- The layout of walls and passages +- Areas obscured by fog of war (darkness)""" + + messages = [ + {"role": "system", "content": system_prompt}, + message_with_image(user_prompt, SCREENSHOT_PATH) + ] + + resp = llm_chat_completion(messages) + + if "error" in resp: + print(f"VLLM Error: {resp['error']}") + print("\nNote: The VLLM server may not be running or accessible.") + print("Screenshot is saved for manual inspection.") + description = "I can see a dungeon scene." + else: + description = resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response') + print(f"\nVLLM Response:\n{description}") + print() + + # Query 2: Ask what action the agent would like to take + print("-" * 40) + print("Query 2: What would you like to do?") + print("-" * 40) + + messages.append({"role": "assistant", "content": description}) + messages.append({ + "role": "user", + "content": f"""Based on what you see, what action would you like to take? + +Available actions: +- GO NORTH / SOUTH / EAST / WEST - move in that direction +- WAIT - stay in place and observe +- LOOK - examine your surroundings more carefully + +{grounded_text} + +State your reasoning briefly, then declare your action clearly (e.g., "Action: GO EAST").""" + }) + + resp = llm_chat_completion(messages) + + if "error" in resp: + print(f"VLLM Error: {resp['error']}") + else: + action = resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response') + print(f"\nVLLM Response:\n{action}") + print() + + print("=" * 60) + print("Demo Complete") + print("=" * 60) + print(f"\nScreenshot preserved at: {SCREENSHOT_PATH}") + print("Grid settings: zoom=2.0, FOV radius=8, perspective rendering enabled") + + return True + +# Main execution +if __name__ == "__main__": + try: + success = run_demo() + if success: + print("\nPASS") + sys.exit(0) + else: + print("\nFAIL") + sys.exit(1) + except Exception as e: + print(f"\nError: {e}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/tests/vllm_demo/1_multi_agent_demo.py b/tests/vllm_demo/1_multi_agent_demo.py new file mode 100644 index 0000000..50e06fb --- /dev/null +++ b/tests/vllm_demo/1_multi_agent_demo.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python3 +""" +Multi-Agent VLLM Demo for McRogueFace +===================================== + +Demonstrates cycling through multiple agent perspectives, +each with their own FOV and grounded observations. + +Three agents: +- Wizard (left side) - can see the rat but not the other agents +- Blacksmith (right side) - can see the knight, rat, and the wall +- Knight (right side) - can see the blacksmith, rat, and the wall + +Each agent gets their own screenshot and VLLM query. +""" + +import mcrfpy +from mcrfpy import automation +import sys +import requests +import base64 +import os +import random + +# VLLM configuration +VLLM_URL = "http://192.168.1.100:8100/v1/chat/completions" +SCREENSHOT_DIR = "/tmp/vllm_multi_agent" + +# Sprite constants +FLOOR_COMMON = 0 +FLOOR_SPECKLE1 = 12 +FLOOR_SPECKLE2 = 24 +WALL_TILE = 40 + +# Agent sprites +WIZARD_SPRITE = 84 +BLACKSMITH_SPRITE = 86 +KNIGHT_SPRITE = 96 +RAT_SPRITE = 123 + + +def file_to_base64(file_path): + """Convert any image file to base64 string.""" + with open(file_path, 'rb') as f: + return base64.b64encode(f.read()).decode('utf-8') + + +def llm_chat_completion(messages: list): + """Chat completion endpoint of local LLM""" + try: + response = requests.post(VLLM_URL, json={'messages': messages}, timeout=60) + return response.json() + except requests.exceptions.RequestException as e: + return {"error": str(e)} + + +def message_with_image(text, image_path): + """Create a message with an embedded image for vision models.""" + image_data = file_to_base64(image_path) + return { + "role": "user", + "content": [ + {"type": "text", "text": text}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64," + image_data}} + ] + } + + +def get_floor_tile(): + """Return a floor tile sprite with realistic distribution.""" + roll = random.random() + if roll < 0.95: + return FLOOR_COMMON + elif roll < 0.99: + return FLOOR_SPECKLE1 + else: + return FLOOR_SPECKLE2 + + +class Agent: + """Wrapper for an agent entity with metadata.""" + def __init__(self, name, entity, description): + self.name = name + self.entity = entity + self.description = description # e.g., "a wizard", "a blacksmith" + + @property + def pos(self): + return (int(self.entity.pos[0]), int(self.entity.pos[1])) + + +def setup_scene(): + """Create a dungeon scene with multiple agents.""" + print("Setting up multi-agent scene...") + + # Create and set scene + mcrfpy.createScene("multi_agent_demo") + mcrfpy.setScene("multi_agent_demo") + ui = mcrfpy.sceneUI("multi_agent_demo") + + # Load the game texture + texture = mcrfpy.Texture("assets/kenney_TD_MR_IP.png", 16, 16) + + # Create grid + grid = mcrfpy.Grid( + grid_size=(25, 15), + texture=texture, + pos=(5, 5), + size=(1014, 700) + ) + grid.fill_color = mcrfpy.Color(20, 20, 30) + grid.zoom = 2.0 + ui.append(grid) + + # Set up floor tiles and walls + for x in range(25): + for y in range(15): + point = grid.at(x, y) + if x == 0 or x == 24 or y == 0 or y == 14: + point.tilesprite = WALL_TILE + point.walkable = False + point.transparent = False + else: + point.tilesprite = get_floor_tile() + point.walkable = True + point.transparent = True + + # Add a wall divider in the middle (blocks wizard's view of right side) + for y in range(3, 12): + point = grid.at(10, y) + point.tilesprite = WALL_TILE + point.walkable = False + point.transparent = False + + # Door opening in the wall + door = grid.at(10, 7) + door.tilesprite = get_floor_tile() + door.walkable = True + door.transparent = True + + # Create FOV layer for fog of war + fov_layer = grid.add_layer('color', z_index=10) + fov_layer.fill(mcrfpy.Color(0, 0, 0, 255)) + + # Create agents + agents = [] + + # Wizard on the left side + wizard_entity = mcrfpy.Entity(grid_pos=(4, 7), texture=texture, sprite_index=WIZARD_SPRITE) + grid.entities.append(wizard_entity) + agents.append(Agent("Wizard", wizard_entity, "a wizard")) + + # Blacksmith on the right side (upper) + blacksmith_entity = mcrfpy.Entity(grid_pos=(18, 5), texture=texture, sprite_index=BLACKSMITH_SPRITE) + grid.entities.append(blacksmith_entity) + agents.append(Agent("Blacksmith", blacksmith_entity, "a blacksmith")) + + # Knight on the right side (lower) + knight_entity = mcrfpy.Entity(grid_pos=(18, 10), texture=texture, sprite_index=KNIGHT_SPRITE) + grid.entities.append(knight_entity) + agents.append(Agent("Knight", knight_entity, "a knight")) + + # Rat in the middle-right area (visible to blacksmith and knight, maybe wizard through door) + rat_entity = mcrfpy.Entity(grid_pos=(14, 7), texture=texture, sprite_index=RAT_SPRITE) + grid.entities.append(rat_entity) + + return grid, fov_layer, agents, rat_entity + + +def switch_perspective(grid, fov_layer, agent): + """Switch the grid view to an agent's perspective.""" + # Reset fog layer to all unknown (black) before switching + # This prevents discovered tiles from one agent carrying over to another + fov_layer.fill(mcrfpy.Color(0, 0, 0, 255)) + + # Apply this agent's perspective + fov_layer.apply_perspective( + entity=agent.entity, + visible=mcrfpy.Color(0, 0, 0, 0), + discovered=mcrfpy.Color(40, 40, 60, 180), + unknown=mcrfpy.Color(0, 0, 0, 255) + ) + + # Update visibility from agent's position + agent.entity.update_visibility() + + # Center camera on this agent + px, py = agent.pos + grid.center = (px * 16 + 8, py * 16 + 8) + + +def get_visible_entities(grid, observer, all_agents, rat): + """Get list of entities visible to the observer.""" + visible = [] + ox, oy = observer.pos + + # Check rat visibility + rx, ry = int(rat.pos[0]), int(rat.pos[1]) + if grid.is_in_fov(rx, ry): + # Determine direction + direction = get_direction(ox, oy, rx, ry) + visible.append(f"a rat to the {direction}") + + # Check other agents + for agent in all_agents: + if agent.name == observer.name: + continue + ax, ay = agent.pos + if grid.is_in_fov(ax, ay): + direction = get_direction(ox, oy, ax, ay) + visible.append(f"{agent.description} to the {direction}") + + return visible + + +def get_direction(from_x, from_y, to_x, to_y): + """Get cardinal direction from one point to another.""" + dx = to_x - from_x + dy = to_y - from_y + + # Primary direction + if abs(dx) > abs(dy): + return "east" if dx > 0 else "west" + elif abs(dy) > abs(dx): + return "south" if dy > 0 else "north" + else: + # Diagonal - pick one + ns = "south" if dy > 0 else "north" + ew = "east" if dx > 0 else "west" + return f"{ns}{ew}" + + +def build_grounded_prompt(visible_entities): + """Build grounded text from visible entities.""" + if not visible_entities: + return "The area appears clear." + + if len(visible_entities) == 1: + return f"You see {visible_entities[0]}." + else: + items = ", ".join(visible_entities[:-1]) + f" and {visible_entities[-1]}" + return f"You see {items}." + + +def query_agent(agent, screenshot_path, grounded_text): + """Query VLLM for a single agent's perspective.""" + system_prompt = f"""You are {agent.description} in a roguelike dungeon game. You can see the game world through screenshots. +The view shows a top-down grid-based dungeon. Your character is centered in the view. +The dark areas are outside your field of vision. Other figures may be allies, enemies, or NPCs. +Describe what you observe concisely and decide on an action.""" + + user_prompt = f"""Look at this game screenshot from your perspective as {agent.description}. {grounded_text} + +Describe what you see briefly, then choose an action: +- GO NORTH / SOUTH / EAST / WEST +- WAIT +- LOOK + +State your reasoning in 1-2 sentences, then declare: "Action: " """ + + messages = [ + {"role": "system", "content": system_prompt}, + message_with_image(user_prompt, screenshot_path) + ] + + resp = llm_chat_completion(messages) + + if "error" in resp: + return f"VLLM Error: {resp['error']}" + else: + return resp.get('choices', [{}])[0].get('message', {}).get('content', 'No response') + + +def run_demo(): + """Main demo function.""" + print("=" * 70) + print("Multi-Agent VLLM Demo") + print("=" * 70) + print() + + # Create screenshot directory + os.makedirs(SCREENSHOT_DIR, exist_ok=True) + + # Setup scene + grid, fov_layer, agents, rat = setup_scene() + + # Cycle through each agent's perspective + for i, agent in enumerate(agents): + print(f"\n{'='*70}") + print(f"Agent {i+1}/3: {agent.name} ({agent.description})") + print(f"Position: {agent.pos}") + print("=" * 70) + + # Switch to this agent's perspective + switch_perspective(grid, fov_layer, agent) + + # Advance simulation + mcrfpy.step(0.016) + + # Take screenshot + screenshot_path = os.path.join(SCREENSHOT_DIR, f"{i}_{agent.name.lower()}_view.png") + result = automation.screenshot(screenshot_path) + if not result: + print(f"ERROR: Failed to take screenshot for {agent.name}") + continue + + file_size = os.path.getsize(screenshot_path) + print(f"Screenshot: {screenshot_path} ({file_size} bytes)") + + # Get visible entities for this agent + visible = get_visible_entities(grid, agent, agents, rat) + grounded_text = build_grounded_prompt(visible) + print(f"Grounded observations: {grounded_text}") + + # Query VLLM + print(f"\nQuerying VLLM for {agent.name}...") + print("-" * 50) + response = query_agent(agent, screenshot_path, grounded_text) + print(f"\n{agent.name}'s Response:\n{response}") + print() + + print("\n" + "=" * 70) + print("Multi-Agent Demo Complete") + print("=" * 70) + print(f"\nScreenshots saved to: {SCREENSHOT_DIR}/") + for i, agent in enumerate(agents): + print(f" - {i}_{agent.name.lower()}_view.png") + + return True + + +# Main execution +if __name__ == "__main__": + try: + success = run_demo() + if success: + print("\nPASS") + sys.exit(0) + else: + print("\nFAIL") + sys.exit(1) + except Exception as e: + print(f"\nError: {e}") + import traceback + traceback.print_exc() + sys.exit(1)