From 2890528e2191536ea41c7b83e638c9c2a0561e04 Mon Sep 17 00:00:00 2001 From: John McCardle Date: Sun, 14 Dec 2025 12:53:39 -0500 Subject: [PATCH] feat: Add action parser and executor for LLM agent actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ActionParser: Extracts structured actions from LLM text responses - Regex patterns for GO, WAIT, LOOK, TAKE, DROP, PUSH, USE, etc. - Direction normalization (N→NORTH, UP→NORTH) - Handles "Action: GO EAST" and fallback patterns - 12 unit tests covering edge cases ActionExecutor: Executes parsed actions in the game world - Movement with collision detection (walls, entities) - Boundary checking - ActionResult with path data for animation replay 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- tests/vllm_demo/action_executor.py | 136 ++++++++++++++++ tests/vllm_demo/action_parser.py | 118 ++++++++++++++ tests/vllm_demo/test_action_parser.py | 214 ++++++++++++++++++++++++++ 3 files changed, 468 insertions(+) create mode 100644 tests/vllm_demo/action_executor.py create mode 100644 tests/vllm_demo/action_parser.py create mode 100644 tests/vllm_demo/test_action_parser.py diff --git a/tests/vllm_demo/action_executor.py b/tests/vllm_demo/action_executor.py new file mode 100644 index 0000000..d95caf2 --- /dev/null +++ b/tests/vllm_demo/action_executor.py @@ -0,0 +1,136 @@ +""" +Action Executor for McRogueFace +=============================== + +Executes parsed actions in the game world. +Handles movement, collision detection, and action results. +""" + +from dataclasses import dataclass +from typing import Optional, List, Tuple +from action_parser import Action, ActionType + + +@dataclass +class ActionResult: + success: bool + message: str + new_position: Optional[Tuple[int, int]] = None + path: Optional[List[Tuple[int, int]]] = None # For animation replay + + +class ActionExecutor: + """Execute actions in the McRogueFace game world.""" + + # Direction vectors + DIRECTION_VECTORS = { + 'NORTH': (0, -1), + 'SOUTH': (0, 1), + 'EAST': (1, 0), + 'WEST': (-1, 0), + } + + def __init__(self, grid): + """ + Initialize executor with a grid reference. + + Args: + grid: mcrfpy.Grid instance + """ + self.grid = grid + + def execute(self, agent, action: Action) -> ActionResult: + """ + Execute an action for an agent. + + Args: + agent: Agent wrapper with .entity attribute + action: Parsed Action to execute + + Returns: + ActionResult with success status and message + """ + handlers = { + ActionType.GO: self._execute_go, + ActionType.WAIT: self._execute_wait, + ActionType.LOOK: self._execute_look, + ActionType.TAKE: self._execute_take, + ActionType.DROP: self._execute_drop, + ActionType.INVALID: self._execute_invalid, + } + + handler = handlers.get(action.type, self._execute_unimplemented) + return handler(agent, action) + + def _execute_go(self, agent, action: Action) -> ActionResult: + """Execute movement in a direction.""" + if not action.args or not action.args[0]: + return ActionResult(False, "No direction specified") + + direction = action.args[0] + if direction not in self.DIRECTION_VECTORS: + return ActionResult(False, f"Invalid direction: {direction}") + + dx, dy = self.DIRECTION_VECTORS[direction] + + # Get current position + current_x, current_y = int(agent.entity.pos[0]), int(agent.entity.pos[1]) + new_x, new_y = current_x + dx, current_y + dy + + # Check bounds + grid_w, grid_h = self.grid.grid_size + if not (0 <= new_x < grid_w and 0 <= new_y < grid_h): + return ActionResult(False, f"Cannot go {direction} - edge of map") + + # Check walkability + target_cell = self.grid.at(new_x, new_y) + if not target_cell.walkable: + return ActionResult(False, f"Cannot go {direction} - path blocked") + + # Check for entity collision (optional - depends on game rules) + for entity in self.grid.entities: + if entity is agent.entity: + continue + ex, ey = int(entity.pos[0]), int(entity.pos[1]) + if ex == new_x and ey == new_y: + return ActionResult(False, f"Cannot go {direction} - someone is there") + + # Execute movement + agent.entity.pos = (new_x, new_y) + + return ActionResult( + success=True, + message=f"Moved {direction.lower()} to ({new_x}, {new_y})", + new_position=(new_x, new_y), + path=[(current_x, current_y), (new_x, new_y)] + ) + + def _execute_wait(self, agent, action: Action) -> ActionResult: + """Execute wait action (no-op).""" + return ActionResult(True, "Waited and observed surroundings") + + def _execute_look(self, agent, action: Action) -> ActionResult: + """Execute look action - returns enhanced observation.""" + target = action.args[0] if action.args else None + if target: + return ActionResult(True, f"Examined {target} closely") + return ActionResult(True, "Looked around carefully") + + def _execute_take(self, agent, action: Action) -> ActionResult: + """Execute take action (placeholder).""" + item = action.args[0] if action.args else "unknown" + # TODO: Implement inventory system + return ActionResult(False, f"Cannot take {item} - not implemented yet") + + def _execute_drop(self, agent, action: Action) -> ActionResult: + """Execute drop action (placeholder).""" + item = action.args[0] if action.args else "unknown" + return ActionResult(False, f"Cannot drop {item} - not implemented yet") + + def _execute_invalid(self, agent, action: Action) -> ActionResult: + """Handle invalid/unparseable action.""" + return ActionResult(False, f"Could not understand action: {action.args[0]}") + + def _execute_unimplemented(self, agent, action: Action) -> ActionResult: + """Handle unimplemented action types.""" + return ActionResult(False, f"Action {action.type.value} not yet implemented") diff --git a/tests/vllm_demo/action_parser.py b/tests/vllm_demo/action_parser.py new file mode 100644 index 0000000..18ec209 --- /dev/null +++ b/tests/vllm_demo/action_parser.py @@ -0,0 +1,118 @@ +""" +Action Parser for LLM Agent Responses +===================================== + +Extracts structured actions from free-form LLM text responses. +Handles variations like "Action: GO EAST", "I'll go east", "GO E", etc. +""" + +import re +from dataclasses import dataclass +from typing import Optional, Tuple, Any +from enum import Enum + + +class ActionType(Enum): + GO = "GO" + WAIT = "WAIT" + LOOK = "LOOK" + TAKE = "TAKE" + DROP = "DROP" + PUSH = "PUSH" + USE = "USE" + OPEN = "OPEN" + CLOSE = "CLOSE" + ANNOUNCE = "ANNOUNCE" + SPEAK = "SPEAK" + INVALID = "INVALID" + + +@dataclass +class Action: + type: ActionType + args: Tuple[Any, ...] = () + raw_match: str = "" + + +class ActionParser: + """Parse LLM responses into structured actions.""" + + # Direction normalization + DIRECTIONS = { + 'N': 'NORTH', 'S': 'SOUTH', 'E': 'EAST', 'W': 'WEST', + 'NORTH': 'NORTH', 'SOUTH': 'SOUTH', 'EAST': 'EAST', 'WEST': 'WEST', + 'UP': 'NORTH', 'DOWN': 'SOUTH', 'LEFT': 'WEST', 'RIGHT': 'EAST', + } + + # Patterns ordered by specificity (most specific first) + PATTERNS = [ + # Explicit "Action: X" format (preferred) + (ActionType.GO, r'Action:\s*GO\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1), + (ActionType.WAIT, r'Action:\s*WAIT\b', 0), + (ActionType.LOOK, r'Action:\s*LOOK(?:\s+AT\s+(\w+))?\b', 1), + (ActionType.TAKE, r'Action:\s*TAKE\s+(\w+)', 1), + (ActionType.DROP, r'Action:\s*DROP\s+(\w+)', 1), + (ActionType.PUSH, r'Action:\s*PUSH\s+(\w+)\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)', 2), + (ActionType.USE, r'Action:\s*USE\s+(\w+)(?:\s+ON\s+(\w+))?', 2), + (ActionType.OPEN, r'Action:\s*OPEN\s+(\w+)', 1), + (ActionType.CLOSE, r'Action:\s*CLOSE\s+(\w+)', 1), + (ActionType.ANNOUNCE, r'Action:\s*ANNOUNCE\s+["\'](.+?)["\']', 1), + (ActionType.SPEAK, r'Action:\s*SPEAK\s+["\'](.+?)["\']', 1), + + # Fallback patterns (less strict) + (ActionType.GO, r'\bGO\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1), + (ActionType.GO, r'\bmove\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1), + (ActionType.GO, r'\bhead\s+(NORTH|SOUTH|EAST|WEST|N|S|E|W)\b', 1), + (ActionType.WAIT, r'\bWAIT\b', 0), + (ActionType.LOOK, r'\bLOOK\b', 0), + ] + + def parse(self, llm_response: str) -> Action: + """ + Parse an LLM response and extract the action. + + Returns Action with type=INVALID if no valid action found. + """ + # Normalize to uppercase for matching + text = llm_response.upper() + + for action_type, pattern, num_groups in self.PATTERNS: + match = re.search(pattern, text, re.IGNORECASE) + if match: + args = self._extract_args(match, num_groups, action_type) + return Action( + type=action_type, + args=args, + raw_match=match.group(0) + ) + + # No valid action found + return Action( + type=ActionType.INVALID, + args=(llm_response[:100],), # First 100 chars for debugging + raw_match="" + ) + + def _extract_args(self, match, num_groups: int, action_type: ActionType) -> tuple: + """Extract and normalize arguments from regex match.""" + if num_groups == 0: + return () + + args = [] + for i in range(1, num_groups + 1): + group = match.group(i) + if group: + # Normalize directions + if action_type == ActionType.GO or (action_type == ActionType.PUSH and i == 2): + group = self.DIRECTIONS.get(group.upper(), group.upper()) + args.append(group) + else: + args.append(None) + + return tuple(args) + + +# Convenience function +def parse_action(llm_response: str) -> Action: + """Parse an LLM response into an Action.""" + return ActionParser().parse(llm_response) diff --git a/tests/vllm_demo/test_action_parser.py b/tests/vllm_demo/test_action_parser.py new file mode 100644 index 0000000..4c2173d --- /dev/null +++ b/tests/vllm_demo/test_action_parser.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +""" +Unit tests for action_parser.py +=============================== + +Tests the ActionParser's ability to extract structured actions +from various LLM response formats. +""" + +import sys +from action_parser import parse_action, ActionType + + +def test_explicit_go_directions(): + """Test explicit 'Action: GO ' format.""" + # Cardinal directions + assert parse_action("Action: GO NORTH").type == ActionType.GO + assert parse_action("Action: GO NORTH").args == ("NORTH",) + + assert parse_action("Action: GO SOUTH").type == ActionType.GO + assert parse_action("Action: GO SOUTH").args == ("SOUTH",) + + assert parse_action("Action: GO EAST").type == ActionType.GO + assert parse_action("Action: GO EAST").args == ("EAST",) + + assert parse_action("Action: GO WEST").type == ActionType.GO + assert parse_action("Action: GO WEST").args == ("WEST",) + + print(" [PASS] Explicit GO directions") + + +def test_short_directions(): + """Test short direction abbreviations (N, S, E, W).""" + assert parse_action("Action: GO N").args == ("NORTH",) + assert parse_action("Action: GO S").args == ("SOUTH",) + assert parse_action("Action: GO E").args == ("EAST",) + assert parse_action("Action: GO W").args == ("WEST",) + + print(" [PASS] Short direction abbreviations") + + +def test_case_insensitivity(): + """Test that parsing is case-insensitive.""" + assert parse_action("action: go south").type == ActionType.GO + assert parse_action("ACTION: GO SOUTH").type == ActionType.GO + assert parse_action("Action: Go South").type == ActionType.GO + assert parse_action("action: GO south").type == ActionType.GO + + print(" [PASS] Case insensitivity") + + +def test_fallback_patterns(): + """Test fallback patterns without 'Action:' prefix.""" + # Natural language variations + assert parse_action("I think I'll GO WEST to explore").type == ActionType.GO + assert parse_action("I'll GO NORTH").type == ActionType.GO + assert parse_action("Let me GO EAST").type == ActionType.GO + + # Move variations + assert parse_action("I should move NORTH").type == ActionType.GO + assert parse_action("Let me head SOUTH").type == ActionType.GO + + print(" [PASS] Fallback patterns") + + +def test_wait_action(): + """Test WAIT action parsing.""" + assert parse_action("Action: WAIT").type == ActionType.WAIT + assert parse_action("I'll WAIT here").type == ActionType.WAIT + assert parse_action("Let me WAIT and see").type == ActionType.WAIT + + print(" [PASS] WAIT action") + + +def test_look_action(): + """Test LOOK action parsing.""" + assert parse_action("Action: LOOK").type == ActionType.LOOK + assert parse_action("Action: LOOK AT door").type == ActionType.LOOK + assert parse_action("Action: LOOK AT door").args == ("DOOR",) + + print(" [PASS] LOOK action") + + +def test_invalid_actions(): + """Test that invalid actions are properly flagged.""" + result = parse_action("I'm not sure what to do") + assert result.type == ActionType.INVALID + + result = parse_action("Let me think about this...") + assert result.type == ActionType.INVALID + + result = parse_action("The weather is nice today") + assert result.type == ActionType.INVALID + + print(" [PASS] Invalid action detection") + + +def test_raw_match_capture(): + """Test that raw_match captures the matched text.""" + result = parse_action("After thinking, Action: GO NORTH is best") + assert "GO NORTH" in result.raw_match + + print(" [PASS] Raw match capture") + + +def test_embedded_actions(): + """Test extraction of actions embedded in longer text.""" + long_response = """ + Looking at the screenshot, I can see I'm in a dungeon corridor. + There's a rat to the east and a wall to the north. + The path south appears clear. + + I think the best course of action is to investigate the rat. + + Action: GO EAST + """ + + result = parse_action(long_response) + assert result.type == ActionType.GO + assert result.args == ("EAST",) + + print(" [PASS] Embedded action extraction") + + +def test_complex_actions(): + """Test more complex action types.""" + # TAKE action + assert parse_action("Action: TAKE sword").type == ActionType.TAKE + assert parse_action("Action: TAKE sword").args == ("SWORD",) + + # DROP action + assert parse_action("Action: DROP shield").type == ActionType.DROP + + # USE action + assert parse_action("Action: USE key").type == ActionType.USE + assert parse_action("Action: USE key ON door").type == ActionType.USE + + # OPEN/CLOSE + assert parse_action("Action: OPEN chest").type == ActionType.OPEN + assert parse_action("Action: CLOSE door").type == ActionType.CLOSE + + print(" [PASS] Complex action types") + + +def test_push_action(): + """Test PUSH action with direction.""" + result = parse_action("Action: PUSH boulder NORTH") + assert result.type == ActionType.PUSH + assert result.args == ("BOULDER", "NORTH") + + result = parse_action("Action: PUSH box E") + assert result.type == ActionType.PUSH + assert result.args == ("BOX", "EAST") + + print(" [PASS] PUSH action") + + +def test_speak_announce_actions(): + """Test SPEAK and ANNOUNCE with quoted strings.""" + result = parse_action('Action: SPEAK "Hello there!"') + assert result.type == ActionType.SPEAK + assert result.args[0] == "HELLO THERE!" # Uppercase due to text normalization + + result = parse_action("Action: ANNOUNCE 'Watch out!'") + assert result.type == ActionType.ANNOUNCE + + print(" [PASS] SPEAK/ANNOUNCE actions") + + +def run_all_tests(): + """Run all parser tests.""" + print("=" * 60) + print("Action Parser Tests") + print("=" * 60) + + tests = [ + test_explicit_go_directions, + test_short_directions, + test_case_insensitivity, + test_fallback_patterns, + test_wait_action, + test_look_action, + test_invalid_actions, + test_raw_match_capture, + test_embedded_actions, + test_complex_actions, + test_push_action, + test_speak_announce_actions, + ] + + passed = 0 + failed = 0 + + for test in tests: + try: + test() + passed += 1 + except AssertionError as e: + print(f" [FAIL] {test.__name__}: {e}") + failed += 1 + except Exception as e: + print(f" [ERROR] {test.__name__}: {e}") + failed += 1 + + print("=" * 60) + print(f"Results: {passed} passed, {failed} failed") + print("=" * 60) + + return failed == 0 + + +if __name__ == "__main__": + success = run_all_tests() + sys.exit(0 if success else 1)