fix: escape HTML in descriptions before link transformation

Fixes HTML injection vulnerability in generate_dynamic_docs.py where description text was not HTML-escaped before being inserted into HTML output. Special characters like <, >, & could be interpreted as HTML. Changes: - Modified transform_doc_links() to escape all non-link text when format='html' or format='web' - Link text and hrefs are also properly escaped - Non-HTML formats (markdown, python) remain unchanged - Added proper handling for descriptions with mixed plain text and links The fix splits docstrings into link and non-link segments, escapes non-link segments, and properly escapes content within link patterns. Tested with comprehensive test suite covering: - Basic HTML special characters - Special chars with links - Special chars in link text - Multiple links with special chars 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-30 11:48:09 -04:00 · 2025-10-30 11:48:09 -04:00 · 7f253da581
parent fac6a9a457
commit 7f253da581
2 changed files with 75 additions and 25 deletions
--- a/docs/api_reference_dynamic.html
+++ b/docs/api_reference_dynamic.html
@ -108,7 +108,7 @@
 <body>
    <div class="container">
        <h1>McRogueFace API Reference</h1>
-        <p><em>Generated on 2025-07-15 21:28:24</em></p>
+        <p><em>Generated on 2025-10-30 11:47:05</em></p>
        <p><em>This documentation was dynamically generated from the compiled module.</em></p>
        
        <div class="toc">
@ -717,8 +717,8 @@ Attributes:
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">compute_fovcompute_fov(x: int, y: int, radius: int = 0, light_walls: bool = True, algorithm: int = FOV_BASIC) -> None</code></h5>
-                <p>Compute field of view from a position.</p>
+                <h5><code class="method-name">compute_fovcompute_fov(x: int, y: int, radius: int = 0, light_walls: bool = True, algorithm: int = FOV_BASIC) -> List[Tuple[int, int, bool, bool]]</code></h5>
+                <p>Compute field of view from a position and return visible cells.</p>
                <div style='margin-left: 20px;'>
                    <div><span class='arg-name'>x</span>: X coordinate of the viewer</div>
                    <div><span class='arg-name'>y</span>: Y coordinate of the viewer</div>
@ -726,6 +726,7 @@ Attributes:
                    <div><span class='arg-name'>light_walls</span>: Whether walls are lit when visible</div>
                    <div><span class='arg-name'>algorithm</span>: FOV algorithm to use (FOV_BASIC, FOV_DIAMOND, FOV_SHADOW, FOV_PERMISSIVE_0-8)</div>
                </div>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  List of tuples (x, y, visible, discovered) for all visible cells: - x, y: Grid coordinates - visible: True (all returned cells are visible) - discovered: True (FOV implies discovery)</p>
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
@ -981,38 +982,59 @@ Has no effect if the timer is not paused.</p>
            <h4>Methods:</h4>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">angle(...)</code></h5>
-                <p>Return the angle in radians from the positive X axis</p>
+                <h5><code class="method-name">angleangle() -> float</code></h5>
+                <p>Get the angle of this vector in radians.</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: Angle in radians from positive x-axis</p>
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">copy(...)</code></h5>
-                <p>Return a copy of this vector</p>
+                <h5><code class="method-name">copycopy() -> Vector</code></h5>
+                <p>Create a copy of this vector.</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  Vector: New Vector object with same x and y values</p>
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">distance_to(...)</code></h5>
-                <p>Return the distance to another vector</p>
+                <h5><code class="method-name">distance_todistance_to(other: Vector) -> float</code></h5>
+                <p>Calculate the distance to another vector.</p>
+                <div style='margin-left: 20px;'>
+                    <div><span class='arg-name'>other</span>: The other vector</div>
+                </div>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: Distance between the two vectors</p>
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">dot(...)</code></h5>
-                <p>Return the dot product with another vector</p>
+                <h5><code class="method-name">dotdot(other: Vector) -> float</code></h5>
+                <p>Calculate the dot product with another vector.</p>
+                <div style='margin-left: 20px;'>
+                    <div><span class='arg-name'>other</span>: The other vector</div>
+                </div>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: Dot product of the two vectors</p>
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">magnitude(...)</code></h5>
-                <p>Return the length of the vector</p>
+                <h5><code class="method-name">magnitudemagnitude() -> float</code></h5>
+                <p>Calculate the length/magnitude of this vector.</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: The magnitude of the vector</p>
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">magnitude_squared(...)</code></h5>
-                <p>Return the squared length of the vector</p>
+                <h5><code class="method-name">magnitude_squaredmagnitude_squared() -> float</code></h5>
+                <p>Calculate the squared magnitude of this vector.
+
+
+
+Note:</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: The squared magnitude (faster than magnitude()) Use this for comparisons to avoid expensive square root calculation.</p>
            </div>

            <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">normalize(...)</code></h5>
-                <p>Return a unit vector in the same direction</p>
+                <h5><code class="method-name">normalizenormalize() -> Vector</code></h5>
+                <p>Return a unit vector in the same direction.
+
+
+
+Note:</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  Vector: New normalized vector with magnitude 1.0 For zero vectors (magnitude 0.0), returns a zero vector rather than raising an exception</p>
            </div>
        </div>

--- a/tools/generate_dynamic_docs.py
+++ b/tools/generate_dynamic_docs.py
@ -17,6 +17,8 @@ def transform_doc_links(docstring, format='html', base_url=''):

    Detects pattern: "See also: TEXT (docs/path.md)"
    Transforms to appropriate format for output type.
+
+    For HTML/web formats, properly escapes content before inserting HTML tags.
    """
    if not docstring:
        return docstring
@ -27,14 +29,17 @@ def transform_doc_links(docstring, format='html', base_url=''):
        text, ref = match.group(1).strip(), match.group(2).strip()

        if format == 'html':
-            # Convert docs/foo.md → foo.html
-            href = ref.replace('docs/', '').replace('.md', '.html')
-            return f'<p class="see-also">See also: <a href="{href}">{text}</a></p>'
+            # Convert docs/foo.md → foo.html and escape for safe HTML
+            href = html.escape(ref.replace('docs/', '').replace('.md', '.html'), quote=True)
+            text_escaped = html.escape(text)
+            return f'<p class="see-also">See also: <a href="{href}">{text_escaped}</a></p>'

        elif format == 'web':
-            # Link to hosted docs
+            # Link to hosted docs and escape for safe HTML
            web_path = ref.replace('docs/', '').replace('.md', '')
-            return f'<p class="see-also">See also: <a href="{base_url}/{web_path}">{text}</a></p>'
+            href = html.escape(f"{base_url}/{web_path}", quote=True)
+            text_escaped = html.escape(text)
+            return f'<p class="see-also">See also: <a href="{href}">{text_escaped}</a></p>'

        elif format == 'markdown':
            # Markdown link
@ -44,7 +49,29 @@ def transform_doc_links(docstring, format='html', base_url=''):
            # Keep as plain text for Python docstrings
            return match.group(0)

-    return re.sub(link_pattern, replace_link, docstring)
+    # For HTML formats, escape the entire docstring first, then process links
+    if format in ('html', 'web'):
+        # Split by the link pattern, escape non-link parts, then reassemble
+        parts = []
+        last_end = 0
+
+        for match in re.finditer(link_pattern, docstring):
+            # Escape the text before this match
+            if match.start() > last_end:
+                parts.append(html.escape(docstring[last_end:match.start()]))
+
+            # Process the link (replace_link handles escaping internally)
+            parts.append(replace_link(match))
+            last_end = match.end()
+
+        # Escape any remaining text after the last match
+        if last_end < len(docstring):
+            parts.append(html.escape(docstring[last_end:]))
+
+        return ''.join(parts)
+    else:
+        # For non-HTML formats, just do simple replacement
+        return re.sub(link_pattern, replace_link, docstring)

 # Must be run with McRogueFace as interpreter
 try:
@ -339,8 +366,9 @@ def generate_html_docs():
        <div class="method-section">
            <h3><code class="function-signature">{func_name}{parsed['signature'] if parsed['signature'] else '(...)'}</code></h3>
 """
-        description = transform_doc_links(parsed['description'], format='html')
-        html_content += f"            <p>{description}</p>\n"
+        if parsed['description']:
+            description = transform_doc_links(parsed['description'], format='html')
+            html_content += f"            <p>{description}</p>\n"
        
        if parsed['args']:
            html_content += "            <h4>Arguments:</h4>\n            <ul>\n"