Appendix D: Exercise Solutions

This appendix provides detailed solutions to exercises throughout the guide, along with alternative approaches and performance considerations.

Chapter 2: The Universal Traversal Pattern

Exercise: Identifying the Pattern in Familiar Code

Task: Analyze this code and identify the four questions being answered:

for root, dirs, files in os.walk('/projects'):
    for file in files:
        if file.endswith('.py'):
            print(os.path.join(root, file))

Solution:

WHERE AM I? → root (current directory path)
WHAT'S HERE? → files (list of files in current directory)
WHERE CAN I GO? → dirs (subdirectories to visit next)
WHAT AM I LOOKING FOR? → Files ending with '.py' (file.endswith('.py'))

Key insight: Even though os.walk() handles the traversal mechanics, these four questions are still being answered—just by the library instead of your code.

Chapter 4: Mastering os.walk()

Exercise: Building a Smart File Finder

Task: Build a file finder that:

Searches for files by multiple extensions
Skips hidden directories and __pycache__
Shows relative paths from start location
Counts total files found

Solution:

import os

def smart_find(start_path, extensions, skip_dirs=None):
    """
    Find files by extension with smart filtering.

    Args:
        start_path: Where to start searching
        extensions: Tuple of extensions like ('.py', '.txt')
        skip_dirs: Set of directory names to skip

    Returns:
        List of (relative_path, size) tuples
    """
    if skip_dirs is None:
        skip_dirs = {'.git', '__pycache__', '.venv', 'node_modules'}

    results = []

    for root, dirs, files in os.walk(start_path):
        # Skip directories in-place
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in skip_dirs]

        for file in files:
            if file.endswith(extensions):
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(full_path, start_path)
                size = os.path.getsize(full_path)
                results.append((rel_path, size))

    return results

# Usage
files = smart_find('/projects/myapp', ('.py', '.js'))
print(f"Found {len(files)} files")
for path, size in files[:10]:
    print(f"  {path} ({size} bytes)")

Alternative approach using pathlib:

from pathlib import Path

def smart_find_pathlib(start_path, extensions, skip_dirs=None):
    if skip_dirs is None:
        skip_dirs = {'.git', '__pycache__', '.venv'}

    results = []
    start = Path(start_path)

    for path in start.rglob('*'):
        # Skip if in excluded directory
        if any(part in skip_dirs for part in path.parts):
            continue

        if path.is_file() and path.suffix in extensions:
            rel_path = path.relative_to(start)
            size = path.stat().st_size
            results.append((str(rel_path), size))

    return results

Performance comparison:

os.walk() version: Faster because it skips directories early
pathlib version: Cleaner code but checks every path

When to use each:

Use os.walk() for large directory trees where skipping matters
Use pathlib for simpler cases or when you want the Path API

Chapter 7: Searching Nested JSON

Exercise: Building a JSON Query Tool

Task: Build a tool that can:

Find all occurrences of a key
Find all values matching a condition
Return results with paths

Solution:

class JSONQuery:
    """Simple JSON query tool."""

    def __init__(self, data):
        self.data = data

    def find_key(self, target_key):
        """Find all values for a specific key."""
        results = []

        def _search(obj, path):
            if isinstance(obj, dict):
                for key, value in obj.items():
                    current_path = path + [key]
                    if key == target_key:
                        results.append({
                            'path': current_path,
                            'value': value
                        })
                    _search(value, current_path)
            elif isinstance(obj, list):
                for idx, item in enumerate(obj):
                    _search(item, path + [idx])

        _search(self.data, [])
        return results

    def find_where(self, condition):
        """Find all values matching condition."""
        results = []

        def _search(obj, path):
            # Check current object
            try:
                if condition(obj):
                    results.append({
                        'path': path,
                        'value': obj
                    })
            except:
                pass

            # Recurse
            if isinstance(obj, dict):
                for key, value in obj.items():
                    _search(value, path + [key])
            elif isinstance(obj, list):
                for idx, item in enumerate(obj):
                    _search(item, path + [idx])

        _search(self.data, [])
        return results

    def get_path(self, path):
        """Get value at specific path."""
        current = self.data
        for key in path:
            if isinstance(current, dict):
                current = current.get(key)
            elif isinstance(current, list):
                current = current[key]
            else:
                return None
            if current is None:
                return None
        return current

# Usage example
data = {
    'users': [
        {'name': 'Alice', 'email': 'alice@example.com', 'age': 30},
        {'name': 'Bob', 'email': 'bob@example.com', 'age': 25}
    ],
    'admin': {
        'email': 'admin@example.com',
        'permissions': ['read', 'write']
    }
}

query = JSONQuery(data)

# Find all emails
emails = query.find_key('email')
print("All emails:")
for result in emails:
    print(f"  {' -> '.join(map(str, result['path']))}: {result['value']}")

# Find users over 25
adults = query.find_where(lambda x: isinstance(x, dict) and x.get('age', 0) > 25)
print("\nUsers over 25:")
for result in adults:
    print(f"  {result['value']['name']}")

Alternative using jsonpath-ng:

from jsonpath_ng import parse

# Find all emails
email_path = parse('$..email')
emails = [match.value for match in email_path.find(data)]

# More complex: find names of users over 25
# (Note: jsonpath-ng has limited support for complex conditions)

When to use custom vs library:

Custom: More control, can implement complex conditions
jsonpath-ng: Standard syntax, good for simple queries
Custom: Better for learning the pattern
Library: Better for production code

Chapter 11: Introduction to Tree-sitter

Exercise: Visualizing a Simple Python File's AST

Task: Parse a Python file and print its AST structure to understand the tree.

Solution:

import tree_sitter_python as tspython
from tree_sitter import Language, Parser

def visualize_ast(source_code, max_depth=3):
    """Print AST structure with indentation."""
    PY_LANGUAGE = Language(tspython.language())
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))

    def _print_node(node, depth=0):
        if depth > max_depth:
            return

        indent = "  " * depth

        # Show named nodes in bold (simulated with uppercase)
        node_type = node.type.upper() if node.is_named else node.type

        # Get text preview
        text = node.text.decode('utf8')
        preview = text[:30].replace('\n', '\\n')
        if len(text) > 30:
            preview += "..."

        print(f"{indent}{node_type}: {repr(preview)}")

        # Print children
        for child in node.children:
            _print_node(child, depth + 1)

    _print_node(tree.root_node)

# Test with sample code
sample_code = """
def greet(name):
    message = f"Hello, {name}!"
    return message

result = greet("World")
"""

visualize_ast(sample_code)

Output interpretation:

MODULE: 'def greet(name):\n    messag...'
  FUNCTION_DEFINITION: 'def greet(name):\n    messag...'
    def: 'def'
    IDENTIFIER: 'greet'
    PARAMETERS: '(name)'
      (: '('
      IDENTIFIER: 'name'
      ): ')'
    :: ':'
    BLOCK: 'message = f"Hello, {name}!...'
      ...

Key observations:

Named nodes (UPPERCASE) are semantic elements
Syntax tokens (lowercase) are punctuation
Each level of indentation is one level deeper in tree
Text shows what source code the node represents

Extended version with fields:

def visualize_with_fields(source_code, max_depth=3):
    """Show field names for semantic access."""
    PY_LANGUAGE = Language(tspython.language())
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))

    def _print_node(node, depth=0, field_name=None):
        if depth > max_depth:
            return

        indent = "  " * depth
        prefix = f"[{field_name}] " if field_name else ""

        print(f"{indent}{prefix}{node.type}")

        # For interesting node types, show how to access children
        if node.type == 'function_definition':
            name = node.child_by_field_name('name')
            params = node.child_by_field_name('parameters')
            print(f"{indent}  → name: {name.text.decode('utf8') if name else None}")
            print(f"{indent}  → params: {params.text.decode('utf8') if params else None}")

        # Print children with field names
        for child in node.children:
            # Try to find field name for this child
            field = None
            for field_name in ['name', 'parameters', 'body', 'value', 'left', 'right']:
                if node.child_by_field_name(field_name) == child:
                    field = field_name
                    break
            _print_node(child, depth + 1, field)

    _print_node(tree.root_node)

Chapter 13: Building AST Analysis Tools

Exercise: Building a Function Documentation Extractor

Task: Extract all functions with their docstrings and parameter lists.

Solution:

import tree_sitter_python as tspython
from tree_sitter import Language, Parser

def extract_function_docs(source_code):
    """
    Extract function documentation.

    Returns: [
        {
            'name': 'function_name',
            'params': ['param1', 'param2'],
            'docstring': 'Function description',
            'line': 10
        },
        ...
    ]
    """
    PY_LANGUAGE = Language(tspython.language())
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))
    source_bytes = bytes(source_code, 'utf8')

    functions = []

    def extract_docstring(body_node):
        """Extract docstring from function body if present."""
        if not body_node or not body_node.children:
            return None

        # First statement might be docstring
        for child in body_node.children:
            if child.type == 'expression_statement':
                # Check if it contains a string
                for expr_child in child.children:
                    if expr_child.type == 'string':
                        # Extract string content (remove quotes)
                        text = expr_child.text.decode('utf8')
                        # Remove quotes and unescape
                        return text.strip('"\'').strip()
        return None

    def extract_params(params_node):
        """Extract parameter names from parameters node."""
        if not params_node:
            return []

        params = []
        for child in params_node.children:
            if child.type == 'identifier':
                params.append(child.text.decode('utf8'))
            elif child.type == 'typed_parameter':
                # Parameter with type annotation
                name_node = child.child_by_field_name('name')
                if name_node:
                    params.append(name_node.text.decode('utf8'))
        return params

    def traverse(node):
        if node.type == 'function_definition':
            func_info = {}

            # Get name
            name_node = node.child_by_field_name('name')
            if name_node:
                func_info['name'] = name_node.text.decode('utf8')
            else:
                func_info['name'] = '<anonymous>'

            # Get parameters
            params_node = node.child_by_field_name('parameters')
            func_info['params'] = extract_params(params_node)

            # Get docstring
            body_node = node.child_by_field_name('body')
            func_info['docstring'] = extract_docstring(body_node)

            # Get line number
            func_info['line'] = node.start_point[0] + 1

            # Get full source
            func_info['source'] = source_bytes[node.start_byte:node.end_byte].decode('utf8')

            functions.append(func_info)

        # Recurse on children
        for child in node.children:
            traverse(child)

    traverse(tree.root_node)
    return functions

# Test
sample_code = '''
def greet(name, greeting="Hello"):
    """Greet someone with a custom message."""
    return f"{greeting}, {name}!"

def calculate_sum(a, b):
    """
    Calculate the sum of two numbers.

    Args:
        a: First number
        b: Second number
    """
    return a + b

def no_docstring():
    return None
'''

docs = extract_function_docs(sample_code)
for doc in docs:
    print(f"\nFunction: {doc['name']} (line {doc['line']})")
    print(f"Parameters: {', '.join(doc['params']) if doc['params'] else 'none'}")
    print(f"Docstring: {doc['docstring'] or 'No docstring'}")

Alternative: Generate Markdown documentation:

def generate_markdown_docs(source_code, title="API Documentation"):
    """Generate Markdown documentation from Python code."""
    docs = extract_function_docs(source_code)

    md = [f"# {title}\n"]

    for doc in docs:
        md.append(f"## `{doc['name']}()`\n")

        if doc['params']:
            md.append("**Parameters:**")
            for param in doc['params']:
                md.append(f"- `{param}`")
            md.append("")

        if doc['docstring']:
            md.append(doc['docstring'])
            md.append("")

        md.append(f"*Defined at line {doc['line']}*\n")

    return '\n'.join(md)

Performance notes:

Tree-sitter parsing is fast (handles files with 10,000+ lines easily)
Keep source as bytes to avoid encoding issues
This approach works for any language tree-sitter supports

Chapter 15: Problem Type Recognition

Exercise: Classifying Real-World Problems

Task: Classify these problems and recommend approaches:

"Get the user's email from API response"
"Find all TODO comments in a codebase"
"List all files in a directory tree"
"Extract all chapter titles from an HTML book"

Solutions:

Problem 1: Get user email (Path Navigation)

# Known path → Direct access
email = response.get('data', {}).get('user', {}).get('email')

# Or with error handling
try:
    email = response['data']['user']['email']
except (KeyError, TypeError):
    email = None

Problem 2: Find TODO comments (Search/Collection)

# Recursive search through AST
def find_todos(source_code):
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))
    todos = []

    def traverse(node):
        if node.type == 'comment':
            text = node.text.decode('utf8')
            if 'TODO' in text:
                todos.append({
                    'line': node.start_point[0] + 1,
                    'text': text.strip('#').strip()
                })
        for child in node.children:
            traverse(child)

    traverse(tree.root_node)
    return todos

Problem 3: List files (Library Feature)

# Use os.walk - no custom traversal needed
import os

files = []
for root, dirs, filenames in os.walk(start_path):
    for filename in filenames:
        files.append(os.path.join(root, filename))

Problem 4: Extract chapter titles (Contextual Navigation)

# Need to track which section we're in
from bs4 import BeautifulSoup

def extract_chapters(html):
    soup = BeautifulSoup(html, 'html.parser')
    chapters = []
    current_chapter = None

    for element in soup.find_all(['h1', 'h2', 'p']):
        if element.name == 'h1' and 'chapter' in element.get_text().lower():
            current_chapter = {
                'title': element.get_text(strip=True),
                'sections': []
            }
            chapters.append(current_chapter)
        elif element.name == 'h2' and current_chapter:
            current_chapter['sections'].append(element.get_text(strip=True))

    return chapters

Performance Comparison Examples

Generator vs List Return

Problem: Does it matter if we return a generator vs a list?

Test:

import time
import os

# Generator version
def find_files_gen(path, ext):
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(ext):
                yield os.path.join(root, file)

# List version
def find_files_list(path, ext):
    results = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(ext):
                results.append(os.path.join(root, file))
    return results

# Test
start = time.time()
gen_files = find_files_gen('/usr', '.py')
first_10 = list(itertools.islice(gen_files, 10))
gen_time = time.time() - start

start = time.time()
list_files = find_files_list('/usr', '.py')
first_10 = list_files[:10]
list_time = time.time() - start

print(f"Generator (first 10): {gen_time:.4f}s")
print(f"List (all files): {list_time:.4f}s")

Results: Generator returns first results immediately; list must complete entirely first.

Use generators when:

Processing results one at a time
Might not need all results
Memory is constrained

Use lists when:

Need to iterate multiple times
Need length before iterating
Results fit comfortably in memory