🏠

Appendix D: Exercise Solutions

This appendix provides detailed solutions to exercises throughout the guide, along with alternative approaches and performance considerations.


Chapter 2: The Universal Traversal Pattern

Exercise: Identifying the Pattern in Familiar Code

Task: Analyze this code and identify the four questions being answered:

for root, dirs, files in os.walk('/projects'):
    for file in files:
        if file.endswith('.py'):
            print(os.path.join(root, file))

Solution:

  1. WHERE AM I? → root (current directory path)
  2. WHAT'S HERE? → files (list of files in current directory)
  3. WHERE CAN I GO? → dirs (subdirectories to visit next)
  4. WHAT AM I LOOKING FOR? → Files ending with '.py' (file.endswith('.py'))

Key insight: Even though os.walk() handles the traversal mechanics, these four questions are still being answered—just by the library instead of your code.


Chapter 4: Mastering os.walk()

Exercise: Building a Smart File Finder

Task: Build a file finder that:

Solution:

import os

def smart_find(start_path, extensions, skip_dirs=None):
    """
    Find files by extension with smart filtering.

    Args:
        start_path: Where to start searching
        extensions: Tuple of extensions like ('.py', '.txt')
        skip_dirs: Set of directory names to skip

    Returns:
        List of (relative_path, size) tuples
    """
    if skip_dirs is None:
        skip_dirs = {'.git', '__pycache__', '.venv', 'node_modules'}

    results = []

    for root, dirs, files in os.walk(start_path):
        # Skip directories in-place
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in skip_dirs]

        for file in files:
            if file.endswith(extensions):
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(full_path, start_path)
                size = os.path.getsize(full_path)
                results.append((rel_path, size))

    return results

# Usage
files = smart_find('/projects/myapp', ('.py', '.js'))
print(f"Found {len(files)} files")
for path, size in files[:10]:
    print(f"  {path} ({size} bytes)")

Alternative approach using pathlib:

from pathlib import Path

def smart_find_pathlib(start_path, extensions, skip_dirs=None):
    if skip_dirs is None:
        skip_dirs = {'.git', '__pycache__', '.venv'}

    results = []
    start = Path(start_path)

    for path in start.rglob('*'):
        # Skip if in excluded directory
        if any(part in skip_dirs for part in path.parts):
            continue

        if path.is_file() and path.suffix in extensions:
            rel_path = path.relative_to(start)
            size = path.stat().st_size
            results.append((str(rel_path), size))

    return results

Performance comparison:

When to use each:


Chapter 7: Searching Nested JSON

Exercise: Building a JSON Query Tool

Task: Build a tool that can:

  1. Find all occurrences of a key
  2. Find all values matching a condition
  3. Return results with paths

Solution:

class JSONQuery:
    """Simple JSON query tool."""

    def __init__(self, data):
        self.data = data

    def find_key(self, target_key):
        """Find all values for a specific key."""
        results = []

        def _search(obj, path):
            if isinstance(obj, dict):
                for key, value in obj.items():
                    current_path = path + [key]
                    if key == target_key:
                        results.append({
                            'path': current_path,
                            'value': value
                        })
                    _search(value, current_path)
            elif isinstance(obj, list):
                for idx, item in enumerate(obj):
                    _search(item, path + [idx])

        _search(self.data, [])
        return results

    def find_where(self, condition):
        """Find all values matching condition."""
        results = []

        def _search(obj, path):
            # Check current object
            try:
                if condition(obj):
                    results.append({
                        'path': path,
                        'value': obj
                    })
            except:
                pass

            # Recurse
            if isinstance(obj, dict):
                for key, value in obj.items():
                    _search(value, path + [key])
            elif isinstance(obj, list):
                for idx, item in enumerate(obj):
                    _search(item, path + [idx])

        _search(self.data, [])
        return results

    def get_path(self, path):
        """Get value at specific path."""
        current = self.data
        for key in path:
            if isinstance(current, dict):
                current = current.get(key)
            elif isinstance(current, list):
                current = current[key]
            else:
                return None
            if current is None:
                return None
        return current

# Usage example
data = {
    'users': [
        {'name': 'Alice', 'email': 'alice@example.com', 'age': 30},
        {'name': 'Bob', 'email': 'bob@example.com', 'age': 25}
    ],
    'admin': {
        'email': 'admin@example.com',
        'permissions': ['read', 'write']
    }
}

query = JSONQuery(data)

# Find all emails
emails = query.find_key('email')
print("All emails:")
for result in emails:
    print(f"  {' -> '.join(map(str, result['path']))}: {result['value']}")

# Find users over 25
adults = query.find_where(lambda x: isinstance(x, dict) and x.get('age', 0) > 25)
print("\nUsers over 25:")
for result in adults:
    print(f"  {result['value']['name']}")

Alternative using jsonpath-ng:

from jsonpath_ng import parse

# Find all emails
email_path = parse('$..email')
emails = [match.value for match in email_path.find(data)]

# More complex: find names of users over 25
# (Note: jsonpath-ng has limited support for complex conditions)

When to use custom vs library:


Chapter 11: Introduction to Tree-sitter

Exercise: Visualizing a Simple Python File's AST

Task: Parse a Python file and print its AST structure to understand the tree.

Solution:

import tree_sitter_python as tspython
from tree_sitter import Language, Parser

def visualize_ast(source_code, max_depth=3):
    """Print AST structure with indentation."""
    PY_LANGUAGE = Language(tspython.language())
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))

    def _print_node(node, depth=0):
        if depth > max_depth:
            return

        indent = "  " * depth

        # Show named nodes in bold (simulated with uppercase)
        node_type = node.type.upper() if node.is_named else node.type

        # Get text preview
        text = node.text.decode('utf8')
        preview = text[:30].replace('\n', '\\n')
        if len(text) > 30:
            preview += "..."

        print(f"{indent}{node_type}: {repr(preview)}")

        # Print children
        for child in node.children:
            _print_node(child, depth + 1)

    _print_node(tree.root_node)

# Test with sample code
sample_code = """
def greet(name):
    message = f"Hello, {name}!"
    return message

result = greet("World")
"""

visualize_ast(sample_code)

Output interpretation:

MODULE: 'def greet(name):\n    messag...'
  FUNCTION_DEFINITION: 'def greet(name):\n    messag...'
    def: 'def'
    IDENTIFIER: 'greet'
    PARAMETERS: '(name)'
      (: '('
      IDENTIFIER: 'name'
      ): ')'
    :: ':'
    BLOCK: 'message = f"Hello, {name}!...'
      ...

Key observations:

Extended version with fields:

def visualize_with_fields(source_code, max_depth=3):
    """Show field names for semantic access."""
    PY_LANGUAGE = Language(tspython.language())
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))

    def _print_node(node, depth=0, field_name=None):
        if depth > max_depth:
            return

        indent = "  " * depth
        prefix = f"[{field_name}] " if field_name else ""

        print(f"{indent}{prefix}{node.type}")

        # For interesting node types, show how to access children
        if node.type == 'function_definition':
            name = node.child_by_field_name('name')
            params = node.child_by_field_name('parameters')
            print(f"{indent}  → name: {name.text.decode('utf8') if name else None}")
            print(f"{indent}  → params: {params.text.decode('utf8') if params else None}")

        # Print children with field names
        for child in node.children:
            # Try to find field name for this child
            field = None
            for field_name in ['name', 'parameters', 'body', 'value', 'left', 'right']:
                if node.child_by_field_name(field_name) == child:
                    field = field_name
                    break
            _print_node(child, depth + 1, field)

    _print_node(tree.root_node)

Chapter 13: Building AST Analysis Tools

Exercise: Building a Function Documentation Extractor

Task: Extract all functions with their docstrings and parameter lists.

Solution:

import tree_sitter_python as tspython
from tree_sitter import Language, Parser

def extract_function_docs(source_code):
    """
    Extract function documentation.

    Returns: [
        {
            'name': 'function_name',
            'params': ['param1', 'param2'],
            'docstring': 'Function description',
            'line': 10
        },
        ...
    ]
    """
    PY_LANGUAGE = Language(tspython.language())
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))
    source_bytes = bytes(source_code, 'utf8')

    functions = []

    def extract_docstring(body_node):
        """Extract docstring from function body if present."""
        if not body_node or not body_node.children:
            return None

        # First statement might be docstring
        for child in body_node.children:
            if child.type == 'expression_statement':
                # Check if it contains a string
                for expr_child in child.children:
                    if expr_child.type == 'string':
                        # Extract string content (remove quotes)
                        text = expr_child.text.decode('utf8')
                        # Remove quotes and unescape
                        return text.strip('"\'').strip()
        return None

    def extract_params(params_node):
        """Extract parameter names from parameters node."""
        if not params_node:
            return []

        params = []
        for child in params_node.children:
            if child.type == 'identifier':
                params.append(child.text.decode('utf8'))
            elif child.type == 'typed_parameter':
                # Parameter with type annotation
                name_node = child.child_by_field_name('name')
                if name_node:
                    params.append(name_node.text.decode('utf8'))
        return params

    def traverse(node):
        if node.type == 'function_definition':
            func_info = {}

            # Get name
            name_node = node.child_by_field_name('name')
            if name_node:
                func_info['name'] = name_node.text.decode('utf8')
            else:
                func_info['name'] = '<anonymous>'

            # Get parameters
            params_node = node.child_by_field_name('parameters')
            func_info['params'] = extract_params(params_node)

            # Get docstring
            body_node = node.child_by_field_name('body')
            func_info['docstring'] = extract_docstring(body_node)

            # Get line number
            func_info['line'] = node.start_point[0] + 1

            # Get full source
            func_info['source'] = source_bytes[node.start_byte:node.end_byte].decode('utf8')

            functions.append(func_info)

        # Recurse on children
        for child in node.children:
            traverse(child)

    traverse(tree.root_node)
    return functions

# Test
sample_code = '''
def greet(name, greeting="Hello"):
    """Greet someone with a custom message."""
    return f"{greeting}, {name}!"

def calculate_sum(a, b):
    """
    Calculate the sum of two numbers.

    Args:
        a: First number
        b: Second number
    """
    return a + b

def no_docstring():
    return None
'''

docs = extract_function_docs(sample_code)
for doc in docs:
    print(f"\nFunction: {doc['name']} (line {doc['line']})")
    print(f"Parameters: {', '.join(doc['params']) if doc['params'] else 'none'}")
    print(f"Docstring: {doc['docstring'] or 'No docstring'}")

Alternative: Generate Markdown documentation:

def generate_markdown_docs(source_code, title="API Documentation"):
    """Generate Markdown documentation from Python code."""
    docs = extract_function_docs(source_code)

    md = [f"# {title}\n"]

    for doc in docs:
        md.append(f"## `{doc['name']}()`\n")

        if doc['params']:
            md.append("**Parameters:**")
            for param in doc['params']:
                md.append(f"- `{param}`")
            md.append("")

        if doc['docstring']:
            md.append(doc['docstring'])
            md.append("")

        md.append(f"*Defined at line {doc['line']}*\n")

    return '\n'.join(md)

Performance notes:


Chapter 15: Problem Type Recognition

Exercise: Classifying Real-World Problems

Task: Classify these problems and recommend approaches:

  1. "Get the user's email from API response"
  2. "Find all TODO comments in a codebase"
  3. "List all files in a directory tree"
  4. "Extract all chapter titles from an HTML book"

Solutions:

Problem 1: Get user email (Path Navigation)

# Known path → Direct access
email = response.get('data', {}).get('user', {}).get('email')

# Or with error handling
try:
    email = response['data']['user']['email']
except (KeyError, TypeError):
    email = None

Problem 2: Find TODO comments (Search/Collection)

# Recursive search through AST
def find_todos(source_code):
    parser = Parser(PY_LANGUAGE)
    tree = parser.parse(bytes(source_code, 'utf8'))
    todos = []

    def traverse(node):
        if node.type == 'comment':
            text = node.text.decode('utf8')
            if 'TODO' in text:
                todos.append({
                    'line': node.start_point[0] + 1,
                    'text': text.strip('#').strip()
                })
        for child in node.children:
            traverse(child)

    traverse(tree.root_node)
    return todos

Problem 3: List files (Library Feature)

# Use os.walk - no custom traversal needed
import os

files = []
for root, dirs, filenames in os.walk(start_path):
    for filename in filenames:
        files.append(os.path.join(root, filename))

Problem 4: Extract chapter titles (Contextual Navigation)

# Need to track which section we're in
from bs4 import BeautifulSoup

def extract_chapters(html):
    soup = BeautifulSoup(html, 'html.parser')
    chapters = []
    current_chapter = None

    for element in soup.find_all(['h1', 'h2', 'p']):
        if element.name == 'h1' and 'chapter' in element.get_text().lower():
            current_chapter = {
                'title': element.get_text(strip=True),
                'sections': []
            }
            chapters.append(current_chapter)
        elif element.name == 'h2' and current_chapter:
            current_chapter['sections'].append(element.get_text(strip=True))

    return chapters

Performance Comparison Examples

Generator vs List Return

Problem: Does it matter if we return a generator vs a list?

Test:

import time
import os

# Generator version
def find_files_gen(path, ext):
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(ext):
                yield os.path.join(root, file)

# List version
def find_files_list(path, ext):
    results = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(ext):
                results.append(os.path.join(root, file))
    return results

# Test
start = time.time()
gen_files = find_files_gen('/usr', '.py')
first_10 = list(itertools.islice(gen_files, 10))
gen_time = time.time() - start

start = time.time()
list_files = find_files_list('/usr', '.py')
first_10 = list_files[:10]
list_time = time.time() - start

print(f"Generator (first 10): {gen_time:.4f}s")
print(f"List (all files): {list_time:.4f}s")

Results: Generator returns first results immediately; list must complete entirely first.

Use generators when:

Use lists when: