feat: Add action parser and executor for LLM agent actions

ActionParser: Extracts structured actions from LLM text responses - Regex patterns for GO, WAIT, LOOK, TAKE, DROP, PUSH, USE, etc. - Direction normalization (N→NORTH, UP→NORTH) - Handles "Action: GO EAST" and fallback patterns - 12 unit tests covering edge cases ActionExecutor: Executes parsed actions in the game world - Movement with collision detection (walls, entities) - Boundary checking - ActionResult with path data for animation replay 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-14 12:53:39 -05:00 · 2025-12-14 12:53:39 -05:00 · 2890528e21
parent e45760c2ac
commit 2890528e21
3 changed files with 468 additions and 0 deletions
--- a/tests/vllm_demo/action_executor.py
+++ b/tests/vllm_demo/action_executor.py
@ -0,0 +1,136 @@
 """
 Action Executor for McRogueFace
 ===============================
 Executes parsed actions in the game world.
 Handles movement, collision detection, and action results.
 """
 from dataclasses import dataclass
 from typing import Optional, List, Tuple
 from action_parser import Action, ActionType
@dataclass
 class ActionResult:
    success: bool
    message: str
    new_position: Optional[Tuple[int, int]] = None
    path: Optional[List[Tuple[int, int]]] = None  # For animation replay
 class ActionExecutor:
    """Execute actions in the McRogueFace game world."""
    # Direction vectors
    DIRECTION_VECTORS = {
        'NORTH': (0, -1),
        'SOUTH': (0, 1),
        'EAST': (1, 0),
        'WEST': (-1, 0),
    }
    def __init__(self, grid):
        """
        Initialize executor with a grid reference.
        Args:
            grid: mcrfpy.Grid instance
        """
        self.grid = grid
    def execute(self, agent, action: Action) -> ActionResult:
        """
        Execute an action for an agent.
        Args:
            agent: Agent wrapper with .entity attribute
            action: Parsed Action to execute
        Returns:
            ActionResult with success status and message
        """
        handlers = {
            ActionType.GO: self._execute_go,
            ActionType.WAIT: self._execute_wait,
            ActionType.LOOK: self._execute_look,
            ActionType.TAKE: self._execute_take,
            ActionType.DROP: self._execute_drop,
            ActionType.INVALID: self._execute_invalid,
        }
        handler = handlers.get(action.type, self._execute_unimplemented)
        return handler(agent, action)
    def _execute_go(self, agent, action: Action) -> ActionResult:
        """Execute movement in a direction."""
        if not action.args or not action.args[0]:
            return ActionResult(False, "No direction specified")
        direction = action.args[0]
        if direction not in self.DIRECTION_VECTORS:
            return ActionResult(False, f"Invalid direction: {direction}")
        dx, dy = self.DIRECTION_VECTORS[direction]
        # Get current position
        current_x, current_y = int(agent.entity.pos[0]), int(agent.entity.pos[1])
        new_x, new_y = current_x + dx, current_y + dy
        # Check bounds
        grid_w, grid_h = self.grid.grid_size
        if not (0 <= new_x < grid_w and 0 <= new_y < grid_h):
            return ActionResult(False, f"Cannot go {direction} - edge of map")
        # Check walkability
        target_cell = self.grid.at(new_x, new_y)
        if not target_cell.walkable:
            return ActionResult(False, f"Cannot go {direction} - path blocked")
        # Check for entity collision (optional - depends on game rules)
        for entity in self.grid.entities:
            if entity is agent.entity:
                continue
            ex, ey = int(entity.pos[0]), int(entity.pos[1])
            if ex == new_x and ey == new_y:
                return ActionResult(False, f"Cannot go {direction} - someone is there")
        # Execute movement
        agent.entity.pos = (new_x, new_y)
        return ActionResult(
            success=True,
            message=f"Moved {direction.lower()} to ({new_x}, {new_y})",
            new_position=(new_x, new_y),
            path=[(current_x, current_y), (new_x, new_y)]
        )
    def _execute_wait(self, agent, action: Action) -> ActionResult:
        """Execute wait action (no-op)."""
        return ActionResult(True, "Waited and observed surroundings")
    def _execute_look(self, agent, action: Action) -> ActionResult:
        """Execute look action - returns enhanced observation."""
        target = action.args[0] if action.args else None
        if target:
            return ActionResult(True, f"Examined {target} closely")
        return ActionResult(True, "Looked around carefully")
    def _execute_take(self, agent, action: Action) -> ActionResult:
        """Execute take action (placeholder)."""
        item = action.args[0] if action.args else "unknown"
        # TODO: Implement inventory system
        return ActionResult(False, f"Cannot take {item} - not implemented yet")
    def _execute_drop(self, agent, action: Action) -> ActionResult:
        """Execute drop action (placeholder)."""
        item = action.args[0] if action.args else "unknown"
        return ActionResult(False, f"Cannot drop {item} - not implemented yet")
    def _execute_invalid(self, agent, action: Action) -> ActionResult:
        """Handle invalid/unparseable action."""
        return ActionResult(False, f"Could not understand action: {action.args[0]}")
    def _execute_unimplemented(self, agent, action: Action) -> ActionResult:
        """Handle unimplemented action types."""
        return ActionResult(False, f"Action {action.type.value} not yet implemented")
--- a/tests/vllm_demo/action_parser.py
+++ b/tests/vllm_demo/action_parser.py
@ -0,0 +1,118 @@
 """
 Action Parser for LLM Agent Responses
 =====================================
 Extracts structured actions from free-form LLM text responses.
 Handles variations like "Action: GO EAST", "I'll go east", "GO E", etc.
 """
 import re
 from dataclasses import dataclass
 from typing import Optional, Tuple, Any
 from enum import Enum
 class ActionType(Enum):
    GO = "GO"
    WAIT = "WAIT"
    LOOK = "LOOK"
    TAKE = "TAKE"
    DROP = "DROP"
    PUSH = "PUSH"
    USE = "USE"
    OPEN = "OPEN"
    CLOSE = "CLOSE"
    ANNOUNCE = "ANNOUNCE"
    SPEAK = "SPEAK"
    INVALID = "INVALID"
@dataclass
 class Action:
    type: ActionType
    args: Tuple[Any, ...] = ()
    raw_match: str = ""
 class ActionParser:
    """Parse LLM responses into structured actions."""
    # Direction normalization
    DIRECTIONS = {
        'N': 'NORTH', 'S': 'SOUTH', 'E': 'EAST', 'W': 'WEST',
        'NORTH': 'NORTH', 'SOUTH': 'SOUTH', 'EAST': 'EAST', 'WEST': 'WEST',
        'UP': 'NORTH', 'DOWN': 'SOUTH', 'LEFT': 'WEST', 'RIGHT': 'EAST',
    }
    # Patterns ordered by specificity (most specific first)
    PATTERNS = [
        # Explicit "Action: X" format (preferred)
        (ActionType.GO, r'Action:\s*GO\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1),
        (ActionType.WAIT, r'Action:\s*WAIT\b', 0),
        (ActionType.LOOK, r'Action:\s*LOOK(?:\s+AT\s+(\w+))?\b', 1),
        (ActionType.TAKE, r'Action:\s*TAKE\s+(\w+)', 1),
        (ActionType.DROP, r'Action:\s*DROP\s+(\w+)', 1),
        (ActionType.PUSH, r'Action:\s*PUSH\s+(\w+)\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)', 2),
        (ActionType.USE, r'Action:\s*USE\s+(\w+)(?:\s+ON\s+(\w+))?', 2),
        (ActionType.OPEN, r'Action:\s*OPEN\s+(\w+)', 1),
        (ActionType.CLOSE, r'Action:\s*CLOSE\s+(\w+)', 1),
        (ActionType.ANNOUNCE, r'Action:\s*ANNOUNCE\s+["\'](.+?)["\']', 1),
        (ActionType.SPEAK, r'Action:\s*SPEAK\s+["\'](.+?)["\']', 1),
        # Fallback patterns (less strict)
        (ActionType.GO, r'\bGO\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1),
        (ActionType.GO, r'\bmove\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1),
        (ActionType.GO, r'\bhead\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1),
        (ActionType.WAIT, r'\bWAIT\b', 0),
        (ActionType.LOOK, r'\bLOOK\b', 0),
    ]
    def parse(self, llm_response: str) -> Action:
        """
        Parse an LLM response and extract the action.
        Returns Action with type=INVALID if no valid action found.
        """
        # Normalize to uppercase for matching
        text = llm_response.upper()
        for action_type, pattern, num_groups in self.PATTERNS:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                args = self._extract_args(match, num_groups, action_type)
                return Action(
                    type=action_type,
                    args=args,
                    raw_match=match.group(0)
                )
        # No valid action found
        return Action(
            type=ActionType.INVALID,
            args=(llm_response[:100],),  # First 100 chars for debugging
            raw_match=""
        )
    def _extract_args(self, match, num_groups: int, action_type: ActionType) -> tuple:
        """Extract and normalize arguments from regex match."""
        if num_groups == 0:
            return ()
        args = []
        for i in range(1, num_groups + 1):
            group = match.group(i)
            if group:
                # Normalize directions
                if action_type == ActionType.GO or (action_type == ActionType.PUSH and i == 2):
                    group = self.DIRECTIONS.get(group.upper(), group.upper())
                args.append(group)
            else:
                args.append(None)
        return tuple(args)
 # Convenience function
 def parse_action(llm_response: str) -> Action:
    """Parse an LLM response into an Action."""
    return ActionParser().parse(llm_response)
--- a/tests/vllm_demo/test_action_parser.py
+++ b/tests/vllm_demo/test_action_parser.py
@ -0,0 +1,214 @@
 #!/usr/bin/env python3
 """
 Unit tests for action_parser.py
 ===============================
 Tests the ActionParser's ability to extract structured actions
 from various LLM response formats.
 """
 import sys
 from action_parser import parse_action, ActionType
 def test_explicit_go_directions():
    """Test explicit 'Action: GO <direction>' format."""
    # Cardinal directions
    assert parse_action("Action: GO NORTH").type == ActionType.GO
    assert parse_action("Action: GO NORTH").args == ("NORTH",)
    assert parse_action("Action: GO SOUTH").type == ActionType.GO
    assert parse_action("Action: GO SOUTH").args == ("SOUTH",)
    assert parse_action("Action: GO EAST").type == ActionType.GO
    assert parse_action("Action: GO EAST").args == ("EAST",)
    assert parse_action("Action: GO WEST").type == ActionType.GO
    assert parse_action("Action: GO WEST").args == ("WEST",)
    print("  [PASS] Explicit GO directions")
 def test_short_directions():
    """Test short direction abbreviations (N, S, E, W)."""
    assert parse_action("Action: GO N").args == ("NORTH",)
    assert parse_action("Action: GO S").args == ("SOUTH",)
    assert parse_action("Action: GO E").args == ("EAST",)
    assert parse_action("Action: GO W").args == ("WEST",)
    print("  [PASS] Short direction abbreviations")
 def test_case_insensitivity():
    """Test that parsing is case-insensitive."""
    assert parse_action("action: go south").type == ActionType.GO
    assert parse_action("ACTION: GO SOUTH").type == ActionType.GO
    assert parse_action("Action: Go South").type == ActionType.GO
    assert parse_action("action: GO south").type == ActionType.GO
    print("  [PASS] Case insensitivity")
 def test_fallback_patterns():
    """Test fallback patterns without 'Action:' prefix."""
    # Natural language variations
    assert parse_action("I think I'll GO WEST to explore").type == ActionType.GO
    assert parse_action("I'll GO NORTH").type == ActionType.GO
    assert parse_action("Let me GO EAST").type == ActionType.GO
    # Move variations
    assert parse_action("I should move NORTH").type == ActionType.GO
    assert parse_action("Let me head SOUTH").type == ActionType.GO
    print("  [PASS] Fallback patterns")
 def test_wait_action():
    """Test WAIT action parsing."""
    assert parse_action("Action: WAIT").type == ActionType.WAIT
    assert parse_action("I'll WAIT here").type == ActionType.WAIT
    assert parse_action("Let me WAIT and see").type == ActionType.WAIT
    print("  [PASS] WAIT action")
 def test_look_action():
    """Test LOOK action parsing."""
    assert parse_action("Action: LOOK").type == ActionType.LOOK
    assert parse_action("Action: LOOK AT door").type == ActionType.LOOK
    assert parse_action("Action: LOOK AT door").args == ("DOOR",)
    print("  [PASS] LOOK action")
 def test_invalid_actions():
    """Test that invalid actions are properly flagged."""
    result = parse_action("I'm not sure what to do")
    assert result.type == ActionType.INVALID
    result = parse_action("Let me think about this...")
    assert result.type == ActionType.INVALID
    result = parse_action("The weather is nice today")
    assert result.type == ActionType.INVALID
    print("  [PASS] Invalid action detection")
 def test_raw_match_capture():
    """Test that raw_match captures the matched text."""
    result = parse_action("After thinking, Action: GO NORTH is best")
    assert "GO NORTH" in result.raw_match
    print("  [PASS] Raw match capture")
 def test_embedded_actions():
    """Test extraction of actions embedded in longer text."""
    long_response = """
    Looking at the screenshot, I can see I'm in a dungeon corridor.
    There's a rat to the east and a wall to the north.
    The path south appears clear.
    I think the best course of action is to investigate the rat.
    Action: GO EAST
    """
    result = parse_action(long_response)
    assert result.type == ActionType.GO
    assert result.args == ("EAST",)
    print("  [PASS] Embedded action extraction")
 def test_complex_actions():
    """Test more complex action types."""
    # TAKE action
    assert parse_action("Action: TAKE sword").type == ActionType.TAKE
    assert parse_action("Action: TAKE sword").args == ("SWORD",)
    # DROP action
    assert parse_action("Action: DROP shield").type == ActionType.DROP
    # USE action
    assert parse_action("Action: USE key").type == ActionType.USE
    assert parse_action("Action: USE key ON door").type == ActionType.USE
    # OPEN/CLOSE
    assert parse_action("Action: OPEN chest").type == ActionType.OPEN
    assert parse_action("Action: CLOSE door").type == ActionType.CLOSE
    print("  [PASS] Complex action types")
 def test_push_action():
    """Test PUSH action with direction."""
    result = parse_action("Action: PUSH boulder NORTH")
    assert result.type == ActionType.PUSH
    assert result.args == ("BOULDER", "NORTH")
    result = parse_action("Action: PUSH box E")
    assert result.type == ActionType.PUSH
    assert result.args == ("BOX", "EAST")
    print("  [PASS] PUSH action")
 def test_speak_announce_actions():
    """Test SPEAK and ANNOUNCE with quoted strings."""
    result = parse_action('Action: SPEAK "Hello there!"')
    assert result.type == ActionType.SPEAK
    assert result.args[0] == "HELLO THERE!"  # Uppercase due to text normalization
    result = parse_action("Action: ANNOUNCE 'Watch out!'")
    assert result.type == ActionType.ANNOUNCE
    print("  [PASS] SPEAK/ANNOUNCE actions")
 def run_all_tests():
    """Run all parser tests."""
    print("=" * 60)
    print("Action Parser Tests")
    print("=" * 60)
    tests = [
        test_explicit_go_directions,
        test_short_directions,
        test_case_insensitivity,
        test_fallback_patterns,
        test_wait_action,
        test_look_action,
        test_invalid_actions,
        test_raw_match_capture,
        test_embedded_actions,
        test_complex_actions,
        test_push_action,
        test_speak_announce_actions,
    ]
    passed = 0
    failed = 0
    for test in tests:
        try:
            test()
            passed += 1
        except AssertionError as e:
            print(f"  [FAIL] {test.__name__}: {e}")
            failed += 1
        except Exception as e:
            print(f"  [ERROR] {test.__name__}: {e}")
            failed += 1
    print("=" * 60)
    print(f"Results: {passed} passed, {failed} failed")
    print("=" * 60)
    return failed == 0
 if __name__ == "__main__":
    success = run_all_tests()
    sys.exit(0 if success else 1)