feat: add stella-callgraph-node for JavaScript/TypeScript call graph extraction

- Implemented a new tool `stella-callgraph-node` that extracts call graphs from JavaScript/TypeScript projects using Babel AST.
- Added command-line interface with options for JSON output and help.
- Included functionality to analyze project structure, detect functions, and build call graphs.
- Created a package.json file for dependency management.

feat: introduce stella-callgraph-python for Python call graph extraction

- Developed `stella-callgraph-python` to extract call graphs from Python projects using AST analysis.
- Implemented command-line interface with options for JSON output and verbose logging.
- Added framework detection to identify popular web frameworks and their entry points.
- Created an AST analyzer to traverse Python code and extract function definitions and calls.
- Included requirements.txt for project dependencies.

chore: add framework detection for Python projects

- Implemented framework detection logic to identify frameworks like Flask, FastAPI, Django, and others based on project files and import patterns.
- Enhanced the AST analyzer to recognize entry points based on decorators and function definitions.
This commit is contained in:
master
2025-12-19 18:11:59 +02:00
parent 951a38d561
commit 8779e9226f
130 changed files with 19011 additions and 422 deletions

View File

@@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""
stella-callgraph-python
Call graph extraction tool for Python projects using AST analysis.
"""
import argparse
import ast
import json
import os
import sys
from pathlib import Path
from typing import Any
from ast_analyzer import PythonASTAnalyzer
from framework_detect import detect_frameworks
def main() -> int:
parser = argparse.ArgumentParser(
description="Extract call graphs from Python projects"
)
parser.add_argument(
"path",
help="Path to Python project or file"
)
parser.add_argument(
"--json",
action="store_true",
help="Output formatted JSON"
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Verbose output"
)
args = parser.parse_args()
try:
result = analyze_project(Path(args.path), verbose=args.verbose)
if args.json:
print(json.dumps(result, indent=2))
else:
print(json.dumps(result))
return 0
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return 1
def analyze_project(project_path: Path, verbose: bool = False) -> dict[str, Any]:
"""Analyze a Python project and extract its call graph."""
if not project_path.exists():
raise FileNotFoundError(f"Path not found: {project_path}")
# Find project root (look for pyproject.toml, setup.py, etc.)
root = find_project_root(project_path)
package_name = extract_package_name(root)
# Detect frameworks
frameworks = detect_frameworks(root)
# Find Python source files
source_files = find_python_files(root)
if verbose:
print(f"Found {len(source_files)} Python files", file=sys.stderr)
# Analyze all files
analyzer = PythonASTAnalyzer(package_name, root, frameworks)
for source_file in source_files:
try:
with open(source_file, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content, filename=str(source_file))
relative_path = source_file.relative_to(root)
analyzer.analyze_file(tree, str(relative_path))
except SyntaxError as e:
if verbose:
print(f"Warning: Syntax error in {source_file}: {e}", file=sys.stderr)
except Exception as e:
if verbose:
print(f"Warning: Failed to parse {source_file}: {e}", file=sys.stderr)
return analyzer.get_result()
def find_project_root(path: Path) -> Path:
"""Find the project root by looking for marker files."""
markers = ['pyproject.toml', 'setup.py', 'setup.cfg', 'requirements.txt', '.git']
current = path.resolve()
if current.is_file():
current = current.parent
while current != current.parent:
for marker in markers:
if (current / marker).exists():
return current
current = current.parent
return path.resolve() if path.is_dir() else path.parent.resolve()
def extract_package_name(root: Path) -> str:
"""Extract package name from project metadata."""
# Try pyproject.toml
pyproject = root / 'pyproject.toml'
if pyproject.exists():
try:
import tomllib
with open(pyproject, 'rb') as f:
data = tomllib.load(f)
return data.get('project', {}).get('name', root.name)
except Exception:
pass
# Try setup.py
setup_py = root / 'setup.py'
if setup_py.exists():
try:
with open(setup_py, 'r') as f:
content = f.read()
# Simple regex-based extraction
import re
match = re.search(r"name\s*=\s*['\"]([^'\"]+)['\"]", content)
if match:
return match.group(1)
except Exception:
pass
return root.name
def find_python_files(root: Path) -> list[Path]:
"""Find all Python source files in the project."""
exclude_dirs = {
'__pycache__', '.git', '.tox', '.nox', '.mypy_cache',
'.pytest_cache', 'venv', '.venv', 'env', '.env',
'node_modules', 'dist', 'build', 'eggs', '*.egg-info'
}
files = []
for path in root.rglob('*.py'):
# Skip excluded directories
skip = False
for part in path.parts:
if part in exclude_dirs or part.endswith('.egg-info'):
skip = True
break
if not skip and not path.name.startswith('.'):
files.append(path)
return sorted(files)
if __name__ == '__main__':
sys.exit(main())

View File

@@ -0,0 +1,322 @@
"""
AST analyzer for Python call graph extraction.
"""
import ast
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
@dataclass
class FunctionNode:
"""Represents a function in the call graph."""
id: str
package: str
name: str
qualified_name: str
file: str
line: int
visibility: str
annotations: list[str] = field(default_factory=list)
is_entrypoint: bool = False
entrypoint_type: Optional[str] = None
@dataclass
class CallEdge:
"""Represents a call between functions."""
from_id: str
to_id: str
kind: str
file: str
line: int
@dataclass
class Entrypoint:
"""Represents a detected entrypoint."""
id: str
type: str
route: Optional[str] = None
method: Optional[str] = None
class PythonASTAnalyzer:
"""Analyzes Python AST to extract call graph information."""
def __init__(self, package_name: str, root: Path, frameworks: list[str]):
self.package_name = package_name
self.root = root
self.frameworks = frameworks
self.nodes: dict[str, FunctionNode] = {}
self.edges: list[CallEdge] = []
self.entrypoints: list[Entrypoint] = []
self.current_function: Optional[str] = None
self.current_file: str = ""
self.current_class: Optional[str] = None
def analyze_file(self, tree: ast.AST, relative_path: str) -> None:
"""Analyze a single Python file."""
self.current_file = relative_path
self.current_function = None
self.current_class = None
visitor = FunctionVisitor(self)
visitor.visit(tree)
def get_result(self) -> dict[str, Any]:
"""Get the analysis result as a dictionary."""
return {
"module": self.package_name,
"nodes": [self._node_to_dict(n) for n in self.nodes.values()],
"edges": [self._edge_to_dict(e) for e in self._dedupe_edges()],
"entrypoints": [self._entrypoint_to_dict(e) for e in self.entrypoints]
}
def _node_to_dict(self, node: FunctionNode) -> dict[str, Any]:
return {
"id": node.id,
"package": node.package,
"name": node.name,
"signature": node.qualified_name,
"position": {
"file": node.file,
"line": node.line,
"column": 0
},
"visibility": node.visibility,
"annotations": node.annotations
}
def _edge_to_dict(self, edge: CallEdge) -> dict[str, Any]:
return {
"from": edge.from_id,
"to": edge.to_id,
"kind": edge.kind,
"site": {
"file": edge.file,
"line": edge.line
}
}
def _entrypoint_to_dict(self, ep: Entrypoint) -> dict[str, Any]:
result: dict[str, Any] = {
"id": ep.id,
"type": ep.type
}
if ep.route:
result["route"] = ep.route
if ep.method:
result["method"] = ep.method
return result
def _dedupe_edges(self) -> list[CallEdge]:
seen: set[str] = set()
result: list[CallEdge] = []
for edge in self.edges:
key = f"{edge.from_id}|{edge.to_id}"
if key not in seen:
seen.add(key)
result.append(edge)
return result
def make_symbol_id(self, name: str, class_name: Optional[str] = None) -> str:
"""Create a symbol ID for a function or method."""
module_base = self.current_file.replace('.py', '').replace('/', '.').replace('\\', '.')
if class_name:
return f"py:{self.package_name}/{module_base}.{class_name}.{name}"
return f"py:{self.package_name}/{module_base}.{name}"
def add_function(
self,
name: str,
line: int,
decorators: list[str],
class_name: Optional[str] = None,
is_private: bool = False
) -> str:
"""Add a function node to the graph."""
symbol_id = self.make_symbol_id(name, class_name)
qualified_name = f"{class_name}.{name}" if class_name else name
visibility = "private" if is_private or name.startswith('_') else "public"
node = FunctionNode(
id=symbol_id,
package=self.package_name,
name=name,
qualified_name=qualified_name,
file=self.current_file,
line=line,
visibility=visibility,
annotations=decorators
)
self.nodes[symbol_id] = node
# Detect entrypoints
entrypoint = self._detect_entrypoint(name, decorators, class_name)
if entrypoint:
node.is_entrypoint = True
node.entrypoint_type = entrypoint.type
self.entrypoints.append(entrypoint)
return symbol_id
def add_call(self, target_name: str, line: int) -> None:
"""Add a call edge from the current function."""
if not self.current_function:
return
# Try to resolve the target
target_id = self._resolve_target(target_name)
self.edges.append(CallEdge(
from_id=self.current_function,
to_id=target_id,
kind="direct",
file=self.current_file,
line=line
))
def _resolve_target(self, name: str) -> str:
"""Resolve a call target to a symbol ID."""
# Check if it's a known local function
for node_id, node in self.nodes.items():
if node.name == name or node.qualified_name == name:
return node_id
# External or unresolved
return f"py:external/{name}"
def _detect_entrypoint(
self,
name: str,
decorators: list[str],
class_name: Optional[str]
) -> Optional[Entrypoint]:
"""Detect if a function is an entrypoint based on frameworks and decorators."""
symbol_id = self.make_symbol_id(name, class_name)
for decorator in decorators:
# Flask routes
if 'route' in decorator.lower() or decorator.lower() in ['get', 'post', 'put', 'delete', 'patch']:
route = self._extract_route_from_decorator(decorator)
method = self._extract_method_from_decorator(decorator)
return Entrypoint(id=symbol_id, type="http_handler", route=route, method=method)
# FastAPI routes
if decorator.lower() in ['get', 'post', 'put', 'delete', 'patch', 'api_route']:
route = self._extract_route_from_decorator(decorator)
return Entrypoint(id=symbol_id, type="http_handler", route=route, method=decorator.upper())
# Celery tasks
if 'task' in decorator.lower() or 'shared_task' in decorator.lower():
return Entrypoint(id=symbol_id, type="background_job")
# Click commands
if 'command' in decorator.lower() or 'group' in decorator.lower():
return Entrypoint(id=symbol_id, type="cli_command")
# Django views (class-based)
if class_name and class_name.endswith('View'):
if name in ['get', 'post', 'put', 'delete', 'patch']:
return Entrypoint(id=symbol_id, type="http_handler", method=name.upper())
# main() function
if name == 'main' and not class_name:
return Entrypoint(id=symbol_id, type="cli_command")
return None
def _extract_route_from_decorator(self, decorator: str) -> Optional[str]:
"""Extract route path from decorator string."""
import re
match = re.search(r"['\"]([/\w{}<>:.-]+)['\"]", decorator)
return match.group(1) if match else None
def _extract_method_from_decorator(self, decorator: str) -> Optional[str]:
"""Extract HTTP method from decorator string."""
import re
methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
for method in methods:
if method.lower() in decorator.lower():
return method
match = re.search(r"methods\s*=\s*\[([^\]]+)\]", decorator)
if match:
return match.group(1).strip("'\"").upper()
return None
class FunctionVisitor(ast.NodeVisitor):
"""AST visitor that extracts function definitions and calls."""
def __init__(self, analyzer: PythonASTAnalyzer):
self.analyzer = analyzer
def visit_ClassDef(self, node: ast.ClassDef) -> None:
"""Visit class definitions."""
old_class = self.analyzer.current_class
self.analyzer.current_class = node.name
self.generic_visit(node)
self.analyzer.current_class = old_class
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
"""Visit function definitions."""
self._visit_function(node)
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
"""Visit async function definitions."""
self._visit_function(node)
def _visit_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
"""Common logic for function and async function definitions."""
decorators = [ast.unparse(d) for d in node.decorator_list]
is_private = node.name.startswith('_') and not node.name.startswith('__')
symbol_id = self.analyzer.add_function(
name=node.name,
line=node.lineno,
decorators=decorators,
class_name=self.analyzer.current_class,
is_private=is_private
)
# Visit function body for calls
old_function = self.analyzer.current_function
self.analyzer.current_function = symbol_id
for child in ast.walk(node):
if isinstance(child, ast.Call):
target_name = self._get_call_target(child)
if target_name:
self.analyzer.add_call(target_name, child.lineno)
self.analyzer.current_function = old_function
def _get_call_target(self, node: ast.Call) -> Optional[str]:
"""Extract the target name from a Call node."""
if isinstance(node.func, ast.Name):
return node.func.id
elif isinstance(node.func, ast.Attribute):
parts = self._get_attribute_parts(node.func)
return '.'.join(parts)
return None
def _get_attribute_parts(self, node: ast.Attribute) -> list[str]:
"""Get all parts of an attribute chain."""
parts: list[str] = []
current: ast.expr = node
while isinstance(current, ast.Attribute):
parts.insert(0, current.attr)
current = current.value
if isinstance(current, ast.Name):
parts.insert(0, current.id)
return parts

View File

@@ -0,0 +1,250 @@
"""
Framework detection for Python projects.
"""
from pathlib import Path
from typing import Any
import re
# Framework patterns
FRAMEWORK_PATTERNS = {
"flask": {
"packages": ["flask"],
"imports": [r"from flask import", r"import flask"],
"patterns": [r"@\w+\.route\(", r"Flask\(__name__\)"],
"entrypoint_type": "http_handler"
},
"fastapi": {
"packages": ["fastapi"],
"imports": [r"from fastapi import", r"import fastapi"],
"patterns": [r"@\w+\.(get|post|put|delete|patch)\(", r"FastAPI\("],
"entrypoint_type": "http_handler"
},
"django": {
"packages": ["django"],
"imports": [r"from django", r"import django"],
"patterns": [r"urlpatterns\s*=", r"class \w+View\(", r"@api_view\("],
"entrypoint_type": "http_handler"
},
"click": {
"packages": ["click"],
"imports": [r"from click import", r"import click"],
"patterns": [r"@click\.command\(", r"@click\.group\(", r"@\w+\.command\("],
"entrypoint_type": "cli_command"
},
"typer": {
"packages": ["typer"],
"imports": [r"from typer import", r"import typer"],
"patterns": [r"typer\.Typer\(", r"@\w+\.command\("],
"entrypoint_type": "cli_command"
},
"celery": {
"packages": ["celery"],
"imports": [r"from celery import", r"import celery"],
"patterns": [r"@\w+\.task\(", r"@shared_task\(", r"Celery\("],
"entrypoint_type": "background_job"
},
"dramatiq": {
"packages": ["dramatiq"],
"imports": [r"from dramatiq import", r"import dramatiq"],
"patterns": [r"@dramatiq\.actor\("],
"entrypoint_type": "background_job"
},
"rq": {
"packages": ["rq"],
"imports": [r"from rq import", r"import rq"],
"patterns": [r"@job\(", r"queue\.enqueue\("],
"entrypoint_type": "background_job"
},
"sanic": {
"packages": ["sanic"],
"imports": [r"from sanic import", r"import sanic"],
"patterns": [r"@\w+\.route\(", r"Sanic\("],
"entrypoint_type": "http_handler"
},
"aiohttp": {
"packages": ["aiohttp"],
"imports": [r"from aiohttp import", r"import aiohttp"],
"patterns": [r"web\.Application\(", r"@routes\.(get|post|put|delete)\("],
"entrypoint_type": "http_handler"
},
"tornado": {
"packages": ["tornado"],
"imports": [r"from tornado import", r"import tornado"],
"patterns": [r"class \w+Handler\(", r"tornado\.web\.Application\("],
"entrypoint_type": "http_handler"
},
"aws_lambda": {
"packages": ["aws_lambda_powertools", "boto3"],
"imports": [r"def handler\(event", r"def lambda_handler\("],
"patterns": [r"def handler\(event,\s*context\)", r"@logger\.inject_lambda_context"],
"entrypoint_type": "lambda"
},
"azure_functions": {
"packages": ["azure.functions"],
"imports": [r"import azure\.functions"],
"patterns": [r"@func\.route\(", r"func\.HttpRequest"],
"entrypoint_type": "cloud_function"
},
"grpc": {
"packages": ["grpcio", "grpc"],
"imports": [r"import grpc", r"from grpc import"],
"patterns": [r"_pb2_grpc\.add_\w+Servicer_to_server\("],
"entrypoint_type": "grpc_method"
},
"graphql": {
"packages": ["graphene", "strawberry", "ariadne"],
"imports": [r"import graphene", r"import strawberry", r"import ariadne"],
"patterns": [r"@strawberry\.(type|mutation|query)\(", r"class \w+\(graphene\.ObjectType\)"],
"entrypoint_type": "graphql_resolver"
}
}
def detect_frameworks(project_root: Path) -> list[str]:
"""Detect frameworks used in a Python project."""
detected: set[str] = set()
# Check pyproject.toml
pyproject = project_root / "pyproject.toml"
if pyproject.exists():
detected.update(_detect_from_pyproject(pyproject))
# Check requirements.txt
requirements = project_root / "requirements.txt"
if requirements.exists():
detected.update(_detect_from_requirements(requirements))
# Check setup.py
setup_py = project_root / "setup.py"
if setup_py.exists():
detected.update(_detect_from_setup_py(setup_py))
# Scan source files for import patterns
detected.update(_detect_from_source(project_root))
return sorted(detected)
def _detect_from_pyproject(path: Path) -> set[str]:
"""Detect frameworks from pyproject.toml."""
detected: set[str] = set()
try:
import tomllib
with open(path, 'rb') as f:
data = tomllib.load(f)
# Check dependencies
deps = set()
deps.update(data.get("project", {}).get("dependencies", []))
deps.update(data.get("project", {}).get("optional-dependencies", {}).get("dev", []))
# Poetry format
poetry = data.get("tool", {}).get("poetry", {})
deps.update(poetry.get("dependencies", {}).keys())
deps.update(poetry.get("dev-dependencies", {}).keys())
for dep in deps:
# Extract package name (remove version specifier)
pkg = re.split(r'[<>=!~\[]', dep)[0].strip().lower()
for framework, config in FRAMEWORK_PATTERNS.items():
if pkg in config["packages"]:
detected.add(framework)
except Exception:
pass
return detected
def _detect_from_requirements(path: Path) -> set[str]:
"""Detect frameworks from requirements.txt."""
detected: set[str] = set()
try:
with open(path, 'r') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
# Extract package name
pkg = re.split(r'[<>=!~\[]', line)[0].strip().lower()
for framework, config in FRAMEWORK_PATTERNS.items():
if pkg in config["packages"]:
detected.add(framework)
except Exception:
pass
return detected
def _detect_from_setup_py(path: Path) -> set[str]:
"""Detect frameworks from setup.py."""
detected: set[str] = set()
try:
with open(path, 'r') as f:
content = f.read()
# Look for install_requires
for framework, config in FRAMEWORK_PATTERNS.items():
for pkg in config["packages"]:
if f'"{pkg}"' in content or f"'{pkg}'" in content:
detected.add(framework)
except Exception:
pass
return detected
def _detect_from_source(project_root: Path) -> set[str]:
"""Detect frameworks by scanning Python source files."""
detected: set[str] = set()
exclude_dirs = {
'__pycache__', '.git', '.tox', '.nox', 'venv', '.venv', 'env', '.env',
'node_modules', 'dist', 'build'
}
# Only scan first few files to avoid slow startup
max_files = 50
scanned = 0
for py_file in project_root.rglob('*.py'):
if scanned >= max_files:
break
# Skip excluded directories
skip = False
for part in py_file.parts:
if part in exclude_dirs:
skip = True
break
if skip:
continue
try:
with open(py_file, 'r', encoding='utf-8') as f:
content = f.read(4096) # Only read first 4KB
for framework, config in FRAMEWORK_PATTERNS.items():
if framework in detected:
continue
for pattern in config["imports"] + config["patterns"]:
if re.search(pattern, content):
detected.add(framework)
break
scanned += 1
except Exception:
continue
return detected
def get_entrypoint_type(framework: str) -> str:
"""Get the entrypoint type for a framework."""
return FRAMEWORK_PATTERNS.get(framework, {}).get("entrypoint_type", "unknown")

View File

@@ -0,0 +1,2 @@
# stella-callgraph-python requirements
# No external dependencies - uses Python 3.11+ stdlib only