Deterministic Text Descriptions From Room Graph #155

New Issue

john · 2025-12-01T16:00:33Z

john commented

2025-12-01 16:00:33 +00:00

Deterministic Text Descriptions From Room Graph

Python-side implementation of interactive fiction concepts for LLM agent environments.

Overview

A data structure representing room connectivity and available objects serves dual purposes:

Generate 2D tilemaps - Rooms define bounds, doors define connections, fill grid with appropriate tiles
Provide text descriptions - Deterministic prose for LLM context: "You are in the kitchen. There is a cat here. Exits: north (hallway), east (pantry, locked)."

This bridges the visual game state and the textual context that LLM agents consume.

Core Data Structures

@dataclass
class Room:
    name: str                      # "kitchen", "guard_room"
    display_name: str              # "the kitchen", "a dimly lit guard room"
    bounds: Tuple[int,int,int,int] # (x, y, width, height) in tile coords
    properties: Dict[str, Any]     # {"lit": True, "temperature": "warm"}
    description_template: str      # "A {temperature} room with {features}."
    
@dataclass  
class Door:
    room_a: str                    # room name
    room_b: str                    # room name
    position: Tuple[int, int]      # tile position
    direction_from_a: str          # "north", "east", etc.
    locked: bool = False
    key_id: Optional[str] = None
    
@dataclass
class WorldObject:
    name: str                      # "brass_key"
    display_name: str              # "a brass key"
    room: str                      # which room contains it
    position: Tuple[int, int]      # tile position (or None if carried)
    affordances: List[str]         # ["takeable", "unlocks:pantry_door"]
    description: str               # "A tarnished brass key with ornate handle."

class WorldGraph:
    rooms: Dict[str, Room]
    doors: List[Door]
    objects: Dict[str, WorldObject]

Text Generation

Room Description

def describe_room(self, room_name: str, observer_entity) -> str:
    room = self.rooms[room_name]
    
    # Base description
    text = f"You are in {room.display_name}. "
    
    # Visible entities (filtered by FOV)
    visible = self.get_visible_entities(room, observer_entity)
    if visible:
        text += self.describe_entities(visible) + " "
    
    # Exits
    exits = self.get_exits(room_name)
    text += self.describe_exits(exits)
    
    return text

Exit Description

def describe_exits(self, exits: List[Door]) -> str:
    if not exits:
        return "There are no visible exits."
    
    parts = []
    for door in exits:
        direction = door.direction_from_a
        dest = self.rooms[door.room_b].display_name
        if door.locked:
            parts.append(f"{direction} ({dest}, locked)")
        else:
            parts.append(f"{direction} ({dest})")
    
    return "Exits: " + ", ".join(parts) + "."

Entity Description

def describe_entities(self, entities: List) -> str:
    # Group by type
    agents = [e for e in entities if e.is_llm_agent]
    npcs = [e for e in entities if e.is_npc]
    objects = [e for e in entities if e.is_object]
    
    parts = []
    if agents:
        parts.append(f"{self.list_names(agents)} {'is' if len(agents)==1 else 'are'} here")
    if npcs:
        parts.append(f"You see {self.list_names(npcs)}")
    if objects:
        parts.append(f"On the ground: {self.list_names(objects)}")
    
    return ". ".join(parts) + "." if parts else ""

Tilemap Generation

def generate_tilemap(self, grid: mcrfpy.Grid):
    """Fill grid tiles based on room definitions."""
    
    # 1. Fill all with wall
    for y in range(grid.grid_size[1]):
        for x in range(grid.grid_size[0]):
            grid.at((x, y)).walkable = False
            grid.at((x, y)).sprite_index = WALL_TILE
    
    # 2. Carve out rooms
    for room in self.rooms.values():
        x, y, w, h = room.bounds
        for ry in range(y, y + h):
            for rx in range(x, x + w):
                grid.at((rx, ry)).walkable = True
                grid.at((rx, ry)).sprite_index = FLOOR_TILE
    
    # 3. Place doors
    for door in self.doors:
        dx, dy = door.position
        grid.at((dx, dy)).walkable = not door.locked
        grid.at((dx, dy)).sprite_index = DOOR_LOCKED if door.locked else DOOR_OPEN
    
    # 4. Place objects
    for obj in self.objects.values():
        if obj.position:
            # Add entity or modify tile
            pass

Affordance System

Objects and rooms expose affordances that LLMs can query or act upon:

STANDARD_AFFORDANCES = {
    "takeable": "Can be picked up and carried",
    "pushable": "Can be pushed to adjacent tile",
    "openable": "Can be opened (containers, doors)",
    "lockable": "Can be locked/unlocked with appropriate key",
    "readable": "Has text content that can be read",
    "edible": "Can be consumed",
    "speakable": "Can engage in dialogue (NPCs)",
}

def get_available_actions(self, entity, room) -> List[str]:
    """What can this entity do right now?"""
    actions = ["LOOK", "WAIT"]
    
    # Movement
    for door in self.get_exits(room):
        if not door.locked:
            actions.append(f"GO {door.direction_from_a.upper()}")
    
    # Object interactions
    for obj in self.get_objects_in_room(room):
        if "takeable" in obj.affordances:
            actions.append(f"TAKE {obj.name}")
        if "pushable" in obj.affordances:
            actions.append(f"PUSH {obj.name} <direction>")
    
    # Speech (if LLM agent)
    if entity.can_speak:
        actions.append("ANNOUNCE <message>")
        actions.append("SPEAK <message>")
    
    return actions

Integration with VLM Pipeline

When preparing input for an LLM agent:

def prepare_agent_context(self, agent, grid) -> dict:
    room = self.room_at(agent.x, agent.y)
    
    return {
        "image": render_agent_perspective(agent, grid),  # screenshot
        "location": self.describe_room(room.name, agent),
        "inventory": self.describe_inventory(agent),
        "available_actions": self.get_available_actions(agent, room),
        "recent_messages": agent.message_history[-5:],
    }

Consistency Guarantees

The text description must be deterministic given the same world state:

Same room + same entities + same observer = same description
No randomized adjectives or varying prose
Facilitates replay and debugging

Parent Issue

Part of #154 - Grounded Multi-Agent Testbed

# Deterministic Text Descriptions From Room Graph **Python-side implementation of interactive fiction concepts for LLM agent environments.** ## Overview A data structure representing room connectivity and available objects serves dual purposes: 1. **Generate 2D tilemaps** - Rooms define bounds, doors define connections, fill grid with appropriate tiles 2. **Provide text descriptions** - Deterministic prose for LLM context: "You are in the kitchen. There is a cat here. Exits: north (hallway), east (pantry, locked)." This bridges the visual game state and the textual context that LLM agents consume. ## Core Data Structures ```python @dataclass class Room: name: str # "kitchen", "guard_room" display_name: str # "the kitchen", "a dimly lit guard room" bounds: Tuple[int,int,int,int] # (x, y, width, height) in tile coords properties: Dict[str, Any] # {"lit": True, "temperature": "warm"} description_template: str # "A {temperature} room with {features}." @dataclass class Door: room_a: str # room name room_b: str # room name position: Tuple[int, int] # tile position direction_from_a: str # "north", "east", etc. locked: bool = False key_id: Optional[str] = None @dataclass class WorldObject: name: str # "brass_key" display_name: str # "a brass key" room: str # which room contains it position: Tuple[int, int] # tile position (or None if carried) affordances: List[str] # ["takeable", "unlocks:pantry_door"] description: str # "A tarnished brass key with ornate handle." class WorldGraph: rooms: Dict[str, Room] doors: List[Door] objects: Dict[str, WorldObject] ``` ## Text Generation ### Room Description ```python def describe_room(self, room_name: str, observer_entity) -> str: room = self.rooms[room_name] # Base description text = f"You are in {room.display_name}. " # Visible entities (filtered by FOV) visible = self.get_visible_entities(room, observer_entity) if visible: text += self.describe_entities(visible) + " " # Exits exits = self.get_exits(room_name) text += self.describe_exits(exits) return text ``` ### Exit Description ```python def describe_exits(self, exits: List[Door]) -> str: if not exits: return "There are no visible exits." parts = [] for door in exits: direction = door.direction_from_a dest = self.rooms[door.room_b].display_name if door.locked: parts.append(f"{direction} ({dest}, locked)") else: parts.append(f"{direction} ({dest})") return "Exits: " + ", ".join(parts) + "." ``` ### Entity Description ```python def describe_entities(self, entities: List) -> str: # Group by type agents = [e for e in entities if e.is_llm_agent] npcs = [e for e in entities if e.is_npc] objects = [e for e in entities if e.is_object] parts = [] if agents: parts.append(f"{self.list_names(agents)} {'is' if len(agents)==1 else 'are'} here") if npcs: parts.append(f"You see {self.list_names(npcs)}") if objects: parts.append(f"On the ground: {self.list_names(objects)}") return ". ".join(parts) + "." if parts else "" ``` ## Tilemap Generation ```python def generate_tilemap(self, grid: mcrfpy.Grid): """Fill grid tiles based on room definitions.""" # 1. Fill all with wall for y in range(grid.grid_size[1]): for x in range(grid.grid_size[0]): grid.at((x, y)).walkable = False grid.at((x, y)).sprite_index = WALL_TILE # 2. Carve out rooms for room in self.rooms.values(): x, y, w, h = room.bounds for ry in range(y, y + h): for rx in range(x, x + w): grid.at((rx, ry)).walkable = True grid.at((rx, ry)).sprite_index = FLOOR_TILE # 3. Place doors for door in self.doors: dx, dy = door.position grid.at((dx, dy)).walkable = not door.locked grid.at((dx, dy)).sprite_index = DOOR_LOCKED if door.locked else DOOR_OPEN # 4. Place objects for obj in self.objects.values(): if obj.position: # Add entity or modify tile pass ``` ## Affordance System Objects and rooms expose affordances that LLMs can query or act upon: ```python STANDARD_AFFORDANCES = { "takeable": "Can be picked up and carried", "pushable": "Can be pushed to adjacent tile", "openable": "Can be opened (containers, doors)", "lockable": "Can be locked/unlocked with appropriate key", "readable": "Has text content that can be read", "edible": "Can be consumed", "speakable": "Can engage in dialogue (NPCs)", } def get_available_actions(self, entity, room) -> List[str]: """What can this entity do right now?""" actions = ["LOOK", "WAIT"] # Movement for door in self.get_exits(room): if not door.locked: actions.append(f"GO {door.direction_from_a.upper()}") # Object interactions for obj in self.get_objects_in_room(room): if "takeable" in obj.affordances: actions.append(f"TAKE {obj.name}") if "pushable" in obj.affordances: actions.append(f"PUSH {obj.name} <direction>") # Speech (if LLM agent) if entity.can_speak: actions.append("ANNOUNCE <message>") actions.append("SPEAK <message>") return actions ``` ## Integration with VLM Pipeline When preparing input for an LLM agent: ```python def prepare_agent_context(self, agent, grid) -> dict: room = self.room_at(agent.x, agent.y) return { "image": render_agent_perspective(agent, grid), # screenshot "location": self.describe_room(room.name, agent), "inventory": self.describe_inventory(agent), "available_actions": self.get_available_actions(agent, room), "recent_messages": agent.message_history[-5:], } ``` ## Consistency Guarantees The text description must be **deterministic** given the same world state: - Same room + same entities + same observer = same description - No randomized adjectives or varying prose - Facilitates replay and debugging ## Parent Issue Part of #154 - Grounded Multi-Agent Testbed

john referenced this issue

2025-12-01 16:01:09 +00:00

Grounded Multi-Agent Testbed: LLM Agents in Discrete Simulated Environments #154

john added the

Minor Feature

system:python-binding

priority:tier1-active

labels 2025-12-01 16:15:28 +00:00

Sign in to join this conversation.