Appendix D: Exercise Solutions
This appendix provides detailed solutions to exercises throughout the guide, along with alternative approaches and performance considerations.
Chapter 2: The Universal Traversal Pattern
Exercise: Identifying the Pattern in Familiar Code
Task: Analyze this code and identify the four questions being answered:
for root, dirs, files in os.walk('/projects'):
for file in files:
if file.endswith('.py'):
print(os.path.join(root, file))
Solution:
- WHERE AM I? →
root(current directory path) - WHAT'S HERE? →
files(list of files in current directory) - WHERE CAN I GO? →
dirs(subdirectories to visit next) - WHAT AM I LOOKING FOR? → Files ending with '.py' (
file.endswith('.py'))
Key insight: Even though os.walk() handles the traversal mechanics, these four questions are still being answered—just by the library instead of your code.
Chapter 4: Mastering os.walk()
Exercise: Building a Smart File Finder
Task: Build a file finder that:
- Searches for files by multiple extensions
- Skips hidden directories and
__pycache__ - Shows relative paths from start location
- Counts total files found
Solution:
import os
def smart_find(start_path, extensions, skip_dirs=None):
"""
Find files by extension with smart filtering.
Args:
start_path: Where to start searching
extensions: Tuple of extensions like ('.py', '.txt')
skip_dirs: Set of directory names to skip
Returns:
List of (relative_path, size) tuples
"""
if skip_dirs is None:
skip_dirs = {'.git', '__pycache__', '.venv', 'node_modules'}
results = []
for root, dirs, files in os.walk(start_path):
# Skip directories in-place
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in skip_dirs]
for file in files:
if file.endswith(extensions):
full_path = os.path.join(root, file)
rel_path = os.path.relpath(full_path, start_path)
size = os.path.getsize(full_path)
results.append((rel_path, size))
return results
# Usage
files = smart_find('/projects/myapp', ('.py', '.js'))
print(f"Found {len(files)} files")
for path, size in files[:10]:
print(f" {path} ({size} bytes)")
Alternative approach using pathlib:
from pathlib import Path
def smart_find_pathlib(start_path, extensions, skip_dirs=None):
if skip_dirs is None:
skip_dirs = {'.git', '__pycache__', '.venv'}
results = []
start = Path(start_path)
for path in start.rglob('*'):
# Skip if in excluded directory
if any(part in skip_dirs for part in path.parts):
continue
if path.is_file() and path.suffix in extensions:
rel_path = path.relative_to(start)
size = path.stat().st_size
results.append((str(rel_path), size))
return results
Performance comparison:
os.walk()version: Faster because it skips directories earlypathlibversion: Cleaner code but checks every path
When to use each:
- Use
os.walk()for large directory trees where skipping matters - Use
pathlibfor simpler cases or when you want the Path API
Chapter 7: Searching Nested JSON
Exercise: Building a JSON Query Tool
Task: Build a tool that can:
- Find all occurrences of a key
- Find all values matching a condition
- Return results with paths
Solution:
class JSONQuery:
"""Simple JSON query tool."""
def __init__(self, data):
self.data = data
def find_key(self, target_key):
"""Find all values for a specific key."""
results = []
def _search(obj, path):
if isinstance(obj, dict):
for key, value in obj.items():
current_path = path + [key]
if key == target_key:
results.append({
'path': current_path,
'value': value
})
_search(value, current_path)
elif isinstance(obj, list):
for idx, item in enumerate(obj):
_search(item, path + [idx])
_search(self.data, [])
return results
def find_where(self, condition):
"""Find all values matching condition."""
results = []
def _search(obj, path):
# Check current object
try:
if condition(obj):
results.append({
'path': path,
'value': obj
})
except:
pass
# Recurse
if isinstance(obj, dict):
for key, value in obj.items():
_search(value, path + [key])
elif isinstance(obj, list):
for idx, item in enumerate(obj):
_search(item, path + [idx])
_search(self.data, [])
return results
def get_path(self, path):
"""Get value at specific path."""
current = self.data
for key in path:
if isinstance(current, dict):
current = current.get(key)
elif isinstance(current, list):
current = current[key]
else:
return None
if current is None:
return None
return current
# Usage example
data = {
'users': [
{'name': 'Alice', 'email': 'alice@example.com', 'age': 30},
{'name': 'Bob', 'email': 'bob@example.com', 'age': 25}
],
'admin': {
'email': 'admin@example.com',
'permissions': ['read', 'write']
}
}
query = JSONQuery(data)
# Find all emails
emails = query.find_key('email')
print("All emails:")
for result in emails:
print(f" {' -> '.join(map(str, result['path']))}: {result['value']}")
# Find users over 25
adults = query.find_where(lambda x: isinstance(x, dict) and x.get('age', 0) > 25)
print("\nUsers over 25:")
for result in adults:
print(f" {result['value']['name']}")
Alternative using jsonpath-ng:
from jsonpath_ng import parse
# Find all emails
email_path = parse('$..email')
emails = [match.value for match in email_path.find(data)]
# More complex: find names of users over 25
# (Note: jsonpath-ng has limited support for complex conditions)
When to use custom vs library:
- Custom: More control, can implement complex conditions
- jsonpath-ng: Standard syntax, good for simple queries
- Custom: Better for learning the pattern
- Library: Better for production code
Chapter 11: Introduction to Tree-sitter
Exercise: Visualizing a Simple Python File's AST
Task: Parse a Python file and print its AST structure to understand the tree.
Solution:
import tree_sitter_python as tspython
from tree_sitter import Language, Parser
def visualize_ast(source_code, max_depth=3):
"""Print AST structure with indentation."""
PY_LANGUAGE = Language(tspython.language())
parser = Parser(PY_LANGUAGE)
tree = parser.parse(bytes(source_code, 'utf8'))
def _print_node(node, depth=0):
if depth > max_depth:
return
indent = " " * depth
# Show named nodes in bold (simulated with uppercase)
node_type = node.type.upper() if node.is_named else node.type
# Get text preview
text = node.text.decode('utf8')
preview = text[:30].replace('\n', '\\n')
if len(text) > 30:
preview += "..."
print(f"{indent}{node_type}: {repr(preview)}")
# Print children
for child in node.children:
_print_node(child, depth + 1)
_print_node(tree.root_node)
# Test with sample code
sample_code = """
def greet(name):
message = f"Hello, {name}!"
return message
result = greet("World")
"""
visualize_ast(sample_code)
Output interpretation:
MODULE: 'def greet(name):\n messag...'
FUNCTION_DEFINITION: 'def greet(name):\n messag...'
def: 'def'
IDENTIFIER: 'greet'
PARAMETERS: '(name)'
(: '('
IDENTIFIER: 'name'
): ')'
:: ':'
BLOCK: 'message = f"Hello, {name}!...'
...
Key observations:
- Named nodes (UPPERCASE) are semantic elements
- Syntax tokens (lowercase) are punctuation
- Each level of indentation is one level deeper in tree
- Text shows what source code the node represents
Extended version with fields:
def visualize_with_fields(source_code, max_depth=3):
"""Show field names for semantic access."""
PY_LANGUAGE = Language(tspython.language())
parser = Parser(PY_LANGUAGE)
tree = parser.parse(bytes(source_code, 'utf8'))
def _print_node(node, depth=0, field_name=None):
if depth > max_depth:
return
indent = " " * depth
prefix = f"[{field_name}] " if field_name else ""
print(f"{indent}{prefix}{node.type}")
# For interesting node types, show how to access children
if node.type == 'function_definition':
name = node.child_by_field_name('name')
params = node.child_by_field_name('parameters')
print(f"{indent} → name: {name.text.decode('utf8') if name else None}")
print(f"{indent} → params: {params.text.decode('utf8') if params else None}")
# Print children with field names
for child in node.children:
# Try to find field name for this child
field = None
for field_name in ['name', 'parameters', 'body', 'value', 'left', 'right']:
if node.child_by_field_name(field_name) == child:
field = field_name
break
_print_node(child, depth + 1, field)
_print_node(tree.root_node)
Chapter 13: Building AST Analysis Tools
Exercise: Building a Function Documentation Extractor
Task: Extract all functions with their docstrings and parameter lists.
Solution:
import tree_sitter_python as tspython
from tree_sitter import Language, Parser
def extract_function_docs(source_code):
"""
Extract function documentation.
Returns: [
{
'name': 'function_name',
'params': ['param1', 'param2'],
'docstring': 'Function description',
'line': 10
},
...
]
"""
PY_LANGUAGE = Language(tspython.language())
parser = Parser(PY_LANGUAGE)
tree = parser.parse(bytes(source_code, 'utf8'))
source_bytes = bytes(source_code, 'utf8')
functions = []
def extract_docstring(body_node):
"""Extract docstring from function body if present."""
if not body_node or not body_node.children:
return None
# First statement might be docstring
for child in body_node.children:
if child.type == 'expression_statement':
# Check if it contains a string
for expr_child in child.children:
if expr_child.type == 'string':
# Extract string content (remove quotes)
text = expr_child.text.decode('utf8')
# Remove quotes and unescape
return text.strip('"\'').strip()
return None
def extract_params(params_node):
"""Extract parameter names from parameters node."""
if not params_node:
return []
params = []
for child in params_node.children:
if child.type == 'identifier':
params.append(child.text.decode('utf8'))
elif child.type == 'typed_parameter':
# Parameter with type annotation
name_node = child.child_by_field_name('name')
if name_node:
params.append(name_node.text.decode('utf8'))
return params
def traverse(node):
if node.type == 'function_definition':
func_info = {}
# Get name
name_node = node.child_by_field_name('name')
if name_node:
func_info['name'] = name_node.text.decode('utf8')
else:
func_info['name'] = '<anonymous>'
# Get parameters
params_node = node.child_by_field_name('parameters')
func_info['params'] = extract_params(params_node)
# Get docstring
body_node = node.child_by_field_name('body')
func_info['docstring'] = extract_docstring(body_node)
# Get line number
func_info['line'] = node.start_point[0] + 1
# Get full source
func_info['source'] = source_bytes[node.start_byte:node.end_byte].decode('utf8')
functions.append(func_info)
# Recurse on children
for child in node.children:
traverse(child)
traverse(tree.root_node)
return functions
# Test
sample_code = '''
def greet(name, greeting="Hello"):
"""Greet someone with a custom message."""
return f"{greeting}, {name}!"
def calculate_sum(a, b):
"""
Calculate the sum of two numbers.
Args:
a: First number
b: Second number
"""
return a + b
def no_docstring():
return None
'''
docs = extract_function_docs(sample_code)
for doc in docs:
print(f"\nFunction: {doc['name']} (line {doc['line']})")
print(f"Parameters: {', '.join(doc['params']) if doc['params'] else 'none'}")
print(f"Docstring: {doc['docstring'] or 'No docstring'}")
Alternative: Generate Markdown documentation:
def generate_markdown_docs(source_code, title="API Documentation"):
"""Generate Markdown documentation from Python code."""
docs = extract_function_docs(source_code)
md = [f"# {title}\n"]
for doc in docs:
md.append(f"## `{doc['name']}()`\n")
if doc['params']:
md.append("**Parameters:**")
for param in doc['params']:
md.append(f"- `{param}`")
md.append("")
if doc['docstring']:
md.append(doc['docstring'])
md.append("")
md.append(f"*Defined at line {doc['line']}*\n")
return '\n'.join(md)
Performance notes:
- Tree-sitter parsing is fast (handles files with 10,000+ lines easily)
- Keep source as bytes to avoid encoding issues
- This approach works for any language tree-sitter supports
Chapter 15: Problem Type Recognition
Exercise: Classifying Real-World Problems
Task: Classify these problems and recommend approaches:
- "Get the user's email from API response"
- "Find all TODO comments in a codebase"
- "List all files in a directory tree"
- "Extract all chapter titles from an HTML book"
Solutions:
Problem 1: Get user email (Path Navigation)
# Known path → Direct access
email = response.get('data', {}).get('user', {}).get('email')
# Or with error handling
try:
email = response['data']['user']['email']
except (KeyError, TypeError):
email = None
Problem 2: Find TODO comments (Search/Collection)
# Recursive search through AST
def find_todos(source_code):
parser = Parser(PY_LANGUAGE)
tree = parser.parse(bytes(source_code, 'utf8'))
todos = []
def traverse(node):
if node.type == 'comment':
text = node.text.decode('utf8')
if 'TODO' in text:
todos.append({
'line': node.start_point[0] + 1,
'text': text.strip('#').strip()
})
for child in node.children:
traverse(child)
traverse(tree.root_node)
return todos
Problem 3: List files (Library Feature)
# Use os.walk - no custom traversal needed
import os
files = []
for root, dirs, filenames in os.walk(start_path):
for filename in filenames:
files.append(os.path.join(root, filename))
Problem 4: Extract chapter titles (Contextual Navigation)
# Need to track which section we're in
from bs4 import BeautifulSoup
def extract_chapters(html):
soup = BeautifulSoup(html, 'html.parser')
chapters = []
current_chapter = None
for element in soup.find_all(['h1', 'h2', 'p']):
if element.name == 'h1' and 'chapter' in element.get_text().lower():
current_chapter = {
'title': element.get_text(strip=True),
'sections': []
}
chapters.append(current_chapter)
elif element.name == 'h2' and current_chapter:
current_chapter['sections'].append(element.get_text(strip=True))
return chapters
Performance Comparison Examples
Generator vs List Return
Problem: Does it matter if we return a generator vs a list?
Test:
import time
import os
# Generator version
def find_files_gen(path, ext):
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(ext):
yield os.path.join(root, file)
# List version
def find_files_list(path, ext):
results = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(ext):
results.append(os.path.join(root, file))
return results
# Test
start = time.time()
gen_files = find_files_gen('/usr', '.py')
first_10 = list(itertools.islice(gen_files, 10))
gen_time = time.time() - start
start = time.time()
list_files = find_files_list('/usr', '.py')
first_10 = list_files[:10]
list_time = time.time() - start
print(f"Generator (first 10): {gen_time:.4f}s")
print(f"List (all files): {list_time:.4f}s")
Results: Generator returns first results immediately; list must complete entirely first.
Use generators when:
- Processing results one at a time
- Might not need all results
- Memory is constrained
Use lists when:
- Need to iterate multiple times
- Need length before iterating
- Results fit comfortably in memory