From 7f253da58127967b5de3935f1517d0877b44d446 Mon Sep 17 00:00:00 2001 From: John McCardle Date: Thu, 30 Oct 2025 11:48:09 -0400 Subject: [PATCH] fix: escape HTML in descriptions before link transformation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes HTML injection vulnerability in generate_dynamic_docs.py where description text was not HTML-escaped before being inserted into HTML output. Special characters like <, >, & could be interpreted as HTML. Changes: - Modified transform_doc_links() to escape all non-link text when format='html' or format='web' - Link text and hrefs are also properly escaped - Non-HTML formats (markdown, python) remain unchanged - Added proper handling for descriptions with mixed plain text and links The fix splits docstrings into link and non-link segments, escapes non-link segments, and properly escapes content within link patterns. Tested with comprehensive test suite covering: - Basic HTML special characters - Special chars with links - Special chars in link text - Multiple links with special chars 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/api_reference_dynamic.html | 56 +++++++++++++++++++++++---------- tools/generate_dynamic_docs.py | 44 +++++++++++++++++++++----- 2 files changed, 75 insertions(+), 25 deletions(-) diff --git a/docs/api_reference_dynamic.html b/docs/api_reference_dynamic.html index 82c247d..c7656ba 100644 --- a/docs/api_reference_dynamic.html +++ b/docs/api_reference_dynamic.html @@ -108,7 +108,7 @@

McRogueFace API Reference

-

Generated on 2025-07-15 21:28:24

+

Generated on 2025-10-30 11:47:05

This documentation was dynamically generated from the compiled module.

@@ -717,8 +717,8 @@ Attributes:
-
compute_fovcompute_fov(x: int, y: int, radius: int = 0, light_walls: bool = True, algorithm: int = FOV_BASIC) -> None
-

Compute field of view from a position.

+
compute_fovcompute_fov(x: int, y: int, radius: int = 0, light_walls: bool = True, algorithm: int = FOV_BASIC) -> List[Tuple[int, int, bool, bool]]
+

Compute field of view from a position and return visible cells.

x: X coordinate of the viewer
y: Y coordinate of the viewer
@@ -726,6 +726,7 @@ Attributes:
light_walls: Whether walls are lit when visible
algorithm: FOV algorithm to use (FOV_BASIC, FOV_DIAMOND, FOV_SHADOW, FOV_PERMISSIVE_0-8)
+

Returns: List of tuples (x, y, visible, discovered) for all visible cells: - x, y: Grid coordinates - visible: True (all returned cells are visible) - discovered: True (FOV implies discovery)

@@ -981,38 +982,59 @@ Has no effect if the timer is not paused.

Methods:

-
angle(...)
-

Return the angle in radians from the positive X axis

+
angleangle() -> float
+

Get the angle of this vector in radians.

+

Returns: float: Angle in radians from positive x-axis

-
copy(...)
-

Return a copy of this vector

+
copycopy() -> Vector
+

Create a copy of this vector.

+

Returns: Vector: New Vector object with same x and y values

-
distance_to(...)
-

Return the distance to another vector

+
distance_todistance_to(other: Vector) -> float
+

Calculate the distance to another vector.

+
+
other: The other vector
+
+

Returns: float: Distance between the two vectors

-
dot(...)
-

Return the dot product with another vector

+
dotdot(other: Vector) -> float
+

Calculate the dot product with another vector.

+
+
other: The other vector
+
+

Returns: float: Dot product of the two vectors

-
magnitude(...)
-

Return the length of the vector

+
magnitudemagnitude() -> float
+

Calculate the length/magnitude of this vector.

+

Returns: float: The magnitude of the vector

-
magnitude_squared(...)
-

Return the squared length of the vector

+
magnitude_squaredmagnitude_squared() -> float
+

Calculate the squared magnitude of this vector. + + + +Note:

+

Returns: float: The squared magnitude (faster than magnitude()) Use this for comparisons to avoid expensive square root calculation.

-
normalize(...)
-

Return a unit vector in the same direction

+
normalizenormalize() -> Vector
+

Return a unit vector in the same direction. + + + +Note:

+

Returns: Vector: New normalized vector with magnitude 1.0 For zero vectors (magnitude 0.0), returns a zero vector rather than raising an exception

diff --git a/tools/generate_dynamic_docs.py b/tools/generate_dynamic_docs.py index 4b79315..426fdcd 100644 --- a/tools/generate_dynamic_docs.py +++ b/tools/generate_dynamic_docs.py @@ -17,6 +17,8 @@ def transform_doc_links(docstring, format='html', base_url=''): Detects pattern: "See also: TEXT (docs/path.md)" Transforms to appropriate format for output type. + + For HTML/web formats, properly escapes content before inserting HTML tags. """ if not docstring: return docstring @@ -27,14 +29,17 @@ def transform_doc_links(docstring, format='html', base_url=''): text, ref = match.group(1).strip(), match.group(2).strip() if format == 'html': - # Convert docs/foo.md → foo.html - href = ref.replace('docs/', '').replace('.md', '.html') - return f'

See also: {text}

' + # Convert docs/foo.md → foo.html and escape for safe HTML + href = html.escape(ref.replace('docs/', '').replace('.md', '.html'), quote=True) + text_escaped = html.escape(text) + return f'

See also: {text_escaped}

' elif format == 'web': - # Link to hosted docs + # Link to hosted docs and escape for safe HTML web_path = ref.replace('docs/', '').replace('.md', '') - return f'

See also: {text}

' + href = html.escape(f"{base_url}/{web_path}", quote=True) + text_escaped = html.escape(text) + return f'

See also: {text_escaped}

' elif format == 'markdown': # Markdown link @@ -44,7 +49,29 @@ def transform_doc_links(docstring, format='html', base_url=''): # Keep as plain text for Python docstrings return match.group(0) - return re.sub(link_pattern, replace_link, docstring) + # For HTML formats, escape the entire docstring first, then process links + if format in ('html', 'web'): + # Split by the link pattern, escape non-link parts, then reassemble + parts = [] + last_end = 0 + + for match in re.finditer(link_pattern, docstring): + # Escape the text before this match + if match.start() > last_end: + parts.append(html.escape(docstring[last_end:match.start()])) + + # Process the link (replace_link handles escaping internally) + parts.append(replace_link(match)) + last_end = match.end() + + # Escape any remaining text after the last match + if last_end < len(docstring): + parts.append(html.escape(docstring[last_end:])) + + return ''.join(parts) + else: + # For non-HTML formats, just do simple replacement + return re.sub(link_pattern, replace_link, docstring) # Must be run with McRogueFace as interpreter try: @@ -339,8 +366,9 @@ def generate_html_docs():

{func_name}{parsed['signature'] if parsed['signature'] else '(...)'}

""" - description = transform_doc_links(parsed['description'], format='html') - html_content += f"

{description}

\n" + if parsed['description']: + description = transform_doc_links(parsed['description'], format='html') + html_content += f"

{description}

\n" if parsed['args']: html_content += "

Arguments:

\n
    \n"