From 7f253da58127967b5de3935f1517d0877b44d446 Mon Sep 17 00:00:00 2001
From: John McCardle <mccardle.john@gmail.com>
Date: Thu, 30 Oct 2025 11:48:09 -0400
Subject: [PATCH] fix: escape HTML in descriptions before link transformation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes HTML injection vulnerability in generate_dynamic_docs.py where
description text was not HTML-escaped before being inserted into HTML
output. Special characters like <, >, & could be interpreted as HTML.

Changes:
- Modified transform_doc_links() to escape all non-link text when
  format='html' or format='web'
- Link text and hrefs are also properly escaped
- Non-HTML formats (markdown, python) remain unchanged
- Added proper handling for descriptions with mixed plain text and links

The fix splits docstrings into link and non-link segments, escapes
non-link segments, and properly escapes content within link patterns.

Tested with comprehensive test suite covering:
- Basic HTML special characters
- Special chars with links
- Special chars in link text
- Multiple links with special chars

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/api_reference_dynamic.html | 56 +++++++++++++++++++++++----------
 tools/generate_dynamic_docs.py  | 44 +++++++++++++++++++++-----
 2 files changed, 75 insertions(+), 25 deletions(-)
diff --git a/docs/api_reference_dynamic.html b/docs/api_reference_dynamic.html
index 82c247d..c7656ba 100644
--- a/docs/api_reference_dynamic.html
+++ b/docs/api_reference_dynamic.html
@@ -108,7 +108,7 @@
 <body>
     <div class="container">
         <h1>McRogueFace API Reference</h1>
-        <p><em>Generated on 2025-07-15 21:28:24</em></p>
+        <p><em>Generated on 2025-10-30 11:47:05</em></p>
         <p><em>This documentation was dynamically generated from the compiled module.</em></p>
         
         <div class="toc">
@@ -717,8 +717,8 @@ Attributes:
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">compute_fovcompute_fov(x: int, y: int, radius: int = 0, light_walls: bool = True, algorithm: int = FOV_BASIC) -> None</code></h5>
-                <p>Compute field of view from a position.</p>
+                <h5><code class="method-name">compute_fovcompute_fov(x: int, y: int, radius: int = 0, light_walls: bool = True, algorithm: int = FOV_BASIC) -> List[Tuple[int, int, bool, bool]]</code></h5>
+                <p>Compute field of view from a position and return visible cells.</p>
                 <div style='margin-left: 20px;'>
                     <div><span class='arg-name'>x</span>: X coordinate of the viewer</div>
                     <div><span class='arg-name'>y</span>: Y coordinate of the viewer</div>
@@ -726,6 +726,7 @@ Attributes:
                     <div><span class='arg-name'>light_walls</span>: Whether walls are lit when visible</div>
                     <div><span class='arg-name'>algorithm</span>: FOV algorithm to use (FOV_BASIC, FOV_DIAMOND, FOV_SHADOW, FOV_PERMISSIVE_0-8)</div>
                 </div>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  List of tuples (x, y, visible, discovered) for all visible cells: - x, y: Grid coordinates - visible: True (all returned cells are visible) - discovered: True (FOV implies discovery)</p>
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
@@ -981,38 +982,59 @@ Has no effect if the timer is not paused.</p>
             <h4>Methods:</h4>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">angle(...)</code></h5>
-                <p>Return the angle in radians from the positive X axis</p>
+                <h5><code class="method-name">angleangle() -> float</code></h5>
+                <p>Get the angle of this vector in radians.</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: Angle in radians from positive x-axis</p>
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">copy(...)</code></h5>
-                <p>Return a copy of this vector</p>
+                <h5><code class="method-name">copycopy() -> Vector</code></h5>
+                <p>Create a copy of this vector.</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  Vector: New Vector object with same x and y values</p>
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">distance_to(...)</code></h5>
-                <p>Return the distance to another vector</p>
+                <h5><code class="method-name">distance_todistance_to(other: Vector) -> float</code></h5>
+                <p>Calculate the distance to another vector.</p>
+                <div style='margin-left: 20px;'>
+                    <div><span class='arg-name'>other</span>: The other vector</div>
+                </div>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: Distance between the two vectors</p>
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">dot(...)</code></h5>
-                <p>Return the dot product with another vector</p>
+                <h5><code class="method-name">dotdot(other: Vector) -> float</code></h5>
+                <p>Calculate the dot product with another vector.</p>
+                <div style='margin-left: 20px;'>
+                    <div><span class='arg-name'>other</span>: The other vector</div>
+                </div>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: Dot product of the two vectors</p>
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">magnitude(...)</code></h5>
-                <p>Return the length of the vector</p>
+                <h5><code class="method-name">magnitudemagnitude() -> float</code></h5>
+                <p>Calculate the length/magnitude of this vector.</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: The magnitude of the vector</p>
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">magnitude_squared(...)</code></h5>
-                <p>Return the squared length of the vector</p>
+                <h5><code class="method-name">magnitude_squaredmagnitude_squared() -> float</code></h5>
+                <p>Calculate the squared magnitude of this vector.
+
+
+
+Note:</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  float: The squared magnitude (faster than magnitude()) Use this for comparisons to avoid expensive square root calculation.</p>
             </div>
 
             <div style="margin-left: 20px; margin-bottom: 15px;">
-                <h5><code class="method-name">normalize(...)</code></h5>
-                <p>Return a unit vector in the same direction</p>
+                <h5><code class="method-name">normalizenormalize() -> Vector</code></h5>
+                <p>Return a unit vector in the same direction.
+
+
+
+Note:</p>
+                <p style='margin-left: 20px;'><span class='returns'>Returns:</span>  Vector: New normalized vector with magnitude 1.0 For zero vectors (magnitude 0.0), returns a zero vector rather than raising an exception</p>
             </div>
         </div>
 
diff --git a/tools/generate_dynamic_docs.py b/tools/generate_dynamic_docs.py
index 4b79315..426fdcd 100644
--- a/tools/generate_dynamic_docs.py
+++ b/tools/generate_dynamic_docs.py
@@ -17,6 +17,8 @@ def transform_doc_links(docstring, format='html', base_url=''):
 
     Detects pattern: "See also: TEXT (docs/path.md)"
     Transforms to appropriate format for output type.
+
+    For HTML/web formats, properly escapes content before inserting HTML tags.
     """
     if not docstring:
         return docstring
@@ -27,14 +29,17 @@ def transform_doc_links(docstring, format='html', base_url=''):
         text, ref = match.group(1).strip(), match.group(2).strip()
 
         if format == 'html':
-            # Convert docs/foo.md → foo.html
-            href = ref.replace('docs/', '').replace('.md', '.html')
-            return f'<p class="see-also">See also: <a href="{href}">{text}</a></p>'
+            # Convert docs/foo.md → foo.html and escape for safe HTML
+            href = html.escape(ref.replace('docs/', '').replace('.md', '.html'), quote=True)
+            text_escaped = html.escape(text)
+            return f'<p class="see-also">See also: <a href="{href}">{text_escaped}</a></p>'
 
         elif format == 'web':
-            # Link to hosted docs
+            # Link to hosted docs and escape for safe HTML
             web_path = ref.replace('docs/', '').replace('.md', '')
-            return f'<p class="see-also">See also: <a href="{base_url}/{web_path}">{text}</a></p>'
+            href = html.escape(f"{base_url}/{web_path}", quote=True)
+            text_escaped = html.escape(text)
+            return f'<p class="see-also">See also: <a href="{href}">{text_escaped}</a></p>'
 
         elif format == 'markdown':
             # Markdown link
@@ -44,7 +49,29 @@ def transform_doc_links(docstring, format='html', base_url=''):
             # Keep as plain text for Python docstrings
             return match.group(0)
 
-    return re.sub(link_pattern, replace_link, docstring)
+    # For HTML formats, escape the entire docstring first, then process links
+    if format in ('html', 'web'):
+        # Split by the link pattern, escape non-link parts, then reassemble
+        parts = []
+        last_end = 0
+
+        for match in re.finditer(link_pattern, docstring):
+            # Escape the text before this match
+            if match.start() > last_end:
+                parts.append(html.escape(docstring[last_end:match.start()]))
+
+            # Process the link (replace_link handles escaping internally)
+            parts.append(replace_link(match))
+            last_end = match.end()
+
+        # Escape any remaining text after the last match
+        if last_end < len(docstring):
+            parts.append(html.escape(docstring[last_end:]))
+
+        return ''.join(parts)
+    else:
+        # For non-HTML formats, just do simple replacement
+        return re.sub(link_pattern, replace_link, docstring)
 
 # Must be run with McRogueFace as interpreter
 try:
@@ -339,8 +366,9 @@ def generate_html_docs():
         <div class="method-section">
             <h3><code class="function-signature">{func_name}{parsed['signature'] if parsed['signature'] else '(...)'}</code></h3>
 """
-        description = transform_doc_links(parsed['description'], format='html')
-        html_content += f"            <p>{description}</p>\n"
+        if parsed['description']:
+            description = transform_doc_links(parsed['description'], format='html')
+            html_content += f"            <p>{description}</p>\n"
         
         if parsed['args']:
             html_content += "            <h4>Arguments:</h4>\n            <ul>\n"