up
Some checks failed
Docs CI / lint-and-preview (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Notify Smoke Test / Notify Unit Tests (push) Has been cancelled
Notify Smoke Test / Notifier Service Tests (push) Has been cancelled
Notify Smoke Test / Notification Smoke Test (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Scanner Analyzers / Discover Analyzers (push) Has been cancelled
Scanner Analyzers / Build Analyzers (push) Has been cancelled
Scanner Analyzers / Test Language Analyzers (push) Has been cancelled
Scanner Analyzers / Validate Test Fixtures (push) Has been cancelled
Scanner Analyzers / Verify Deterministic Output (push) Has been cancelled
Signals CI & Image / signals-ci (push) Has been cancelled
Signals Reachability Scoring & Events / reachability-smoke (push) Has been cancelled
Signals Reachability Scoring & Events / sign-and-upload (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-13 00:20:26 +02:00
parent e1f1bef4c1
commit 564df71bfb
2376 changed files with 334389 additions and 328032 deletions

View File

@@ -1,485 +1,485 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Text;
namespace StellaOps.Scanner.EntryTrace.Parsing;
/// <summary>
/// Deterministic parser producing a lightweight AST for Bourne shell constructs needed by EntryTrace.
/// Supports: simple commands, exec, source/dot, run-parts, if/elif/else/fi, case/esac.
/// </summary>
public sealed class ShellParser
{
private readonly IReadOnlyList<ShellToken> _tokens;
private int _index;
private ShellParser(IReadOnlyList<ShellToken> tokens)
{
_tokens = tokens;
}
public static ShellScript Parse(string source)
{
var tokenizer = new ShellTokenizer();
var tokens = tokenizer.Tokenize(source);
var parser = new ShellParser(tokens);
var nodes = parser.ParseNodes(untilKeywords: null);
return new ShellScript(nodes.ToImmutableArray());
}
private List<ShellNode> ParseNodes(HashSet<string>? untilKeywords)
{
var nodes = new List<ShellNode>();
while (true)
{
SkipNewLines();
var token = Peek();
if (token.Kind == ShellTokenKind.EndOfFile)
{
break;
}
if (token.Kind == ShellTokenKind.Word && untilKeywords is not null && untilKeywords.Contains(token.Value))
{
break;
}
ShellNode? node = token.Kind switch
{
ShellTokenKind.Word when token.Value == "if" => ParseIf(),
ShellTokenKind.Word when token.Value == "case" => ParseCase(),
_ => ParseCommandLike()
};
if (node is not null)
{
nodes.Add(node);
}
SkipCommandSeparators();
}
return nodes;
}
private ShellNode ParseCommandLike()
{
var start = Peek();
var tokens = ReadUntilTerminator();
if (tokens.Count == 0)
{
return new ShellCommandNode(string.Empty, ImmutableArray<ShellToken>.Empty, CreateSpan(start, start));
}
var normalizedName = ExtractCommandName(tokens);
var immutableTokens = tokens.ToImmutableArray();
var span = CreateSpan(tokens[0], tokens[^1]);
return normalizedName switch
{
"exec" => new ShellExecNode(immutableTokens, span),
"source" or "." => new ShellIncludeNode(
ExtractPrimaryArgument(immutableTokens),
immutableTokens,
span),
"run-parts" => new ShellRunPartsNode(
ExtractPrimaryArgument(immutableTokens),
immutableTokens,
span),
_ => new ShellCommandNode(normalizedName, immutableTokens, span)
};
}
private ShellIfNode ParseIf()
{
var start = Expect(ShellTokenKind.Word, "if");
var predicateTokens = ReadUntilKeyword("then");
Expect(ShellTokenKind.Word, "then");
var branches = new List<ShellConditionalBranch>();
var predicateSummary = JoinTokens(predicateTokens);
var thenNodes = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"elif",
"else",
"fi"
});
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.If,
thenNodes.ToImmutableArray(),
CreateSpan(start, thenNodes.LastOrDefault()?.Span ?? CreateSpan(start, start)),
predicateSummary));
while (true)
{
SkipNewLines();
var next = Peek();
if (next.Kind != ShellTokenKind.Word)
{
break;
}
if (next.Value == "elif")
{
var elifStart = Advance();
var elifPredicate = ReadUntilKeyword("then");
Expect(ShellTokenKind.Word, "then");
var elifBody = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"elif",
"else",
"fi"
});
var span = elifBody.Count > 0
? CreateSpan(elifStart, elifBody[^1].Span)
: CreateSpan(elifStart, elifStart);
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.Elif,
elifBody.ToImmutableArray(),
span,
JoinTokens(elifPredicate)));
continue;
}
if (next.Value == "else")
{
var elseStart = Advance();
var elseBody = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"fi"
});
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.Else,
elseBody.ToImmutableArray(),
elseBody.Count > 0 ? CreateSpan(elseStart, elseBody[^1].Span) : CreateSpan(elseStart, elseStart),
null));
break;
}
break;
}
Expect(ShellTokenKind.Word, "fi");
var end = Previous();
return new ShellIfNode(branches.ToImmutableArray(), CreateSpan(start, end));
}
private ShellCaseNode ParseCase()
{
var start = Expect(ShellTokenKind.Word, "case");
var selectorTokens = ReadUntilKeyword("in");
Expect(ShellTokenKind.Word, "in");
var arms = new List<ShellCaseArm>();
while (true)
{
SkipNewLines();
var token = Peek();
if (token.Kind == ShellTokenKind.Word && token.Value == "esac")
{
break;
}
if (token.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException("Unexpected end of file while parsing case arms.");
}
var patterns = ReadPatterns();
Expect(ShellTokenKind.Operator, ")");
var body = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
";;",
"esac"
});
ShellSpan span;
if (body.Count > 0)
{
span = CreateSpan(patterns.FirstToken ?? token, body[^1].Span);
}
else
{
span = CreateSpan(patterns.FirstToken ?? token, token);
}
arms.Add(new ShellCaseArm(
patterns.Values.ToImmutableArray(),
body.ToImmutableArray(),
span));
SkipNewLines();
var separator = Peek();
if (separator.Kind == ShellTokenKind.Operator && separator.Value == ";;")
{
Advance();
continue;
}
if (separator.Kind == ShellTokenKind.Word && separator.Value == "esac")
{
break;
}
}
Expect(ShellTokenKind.Word, "esac");
return new ShellCaseNode(arms.ToImmutableArray(), CreateSpan(start, Previous()));
(List<string> Values, ShellToken? FirstToken) ReadPatterns()
{
var values = new List<string>();
ShellToken? first = null;
var sb = new StringBuilder();
while (true)
{
var current = Peek();
if (current.Kind is ShellTokenKind.Operator && current.Value is ")" or "|")
{
if (sb.Length > 0)
{
values.Add(sb.ToString());
sb.Clear();
}
if (current.Value == "|")
{
Advance();
continue;
}
break;
}
if (current.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException("Unexpected EOF in case arm pattern.");
}
if (first is null)
{
first = current;
}
sb.Append(current.Value);
Advance();
}
if (values.Count == 0 && sb.Length > 0)
{
values.Add(sb.ToString());
}
return (values, first);
}
}
private List<ShellToken> ReadUntilTerminator()
{
var tokens = new List<ShellToken>();
while (true)
{
var token = Peek();
if (token.Kind is ShellTokenKind.EndOfFile or ShellTokenKind.NewLine)
{
break;
}
if (token.Kind == ShellTokenKind.Operator && token.Value is ";" or "&&" or "||")
{
break;
}
tokens.Add(Advance());
}
return tokens;
}
private ImmutableArray<ShellToken> ReadUntilKeyword(string keyword)
{
var tokens = new List<ShellToken>();
while (true)
{
var token = Peek();
if (token.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException($"Unexpected EOF while looking for keyword '{keyword}'.");
}
if (token.Kind == ShellTokenKind.Word && token.Value == keyword)
{
break;
}
tokens.Add(Advance());
}
return tokens.ToImmutableArray();
}
private static string ExtractCommandName(IReadOnlyList<ShellToken> tokens)
{
foreach (var token in tokens)
{
if (token.Kind is not ShellTokenKind.Word and not ShellTokenKind.SingleQuoted and not ShellTokenKind.DoubleQuoted)
{
continue;
}
if (token.Value.Contains('=', StringComparison.Ordinal))
{
// Skip environment assignments e.g. FOO=bar exec /app
var eqIndex = token.Value.IndexOf('=', StringComparison.Ordinal);
if (eqIndex > 0 && token.Value[..eqIndex].All(IsIdentifierChar))
{
continue;
}
}
return NormalizeCommandName(token.Value);
}
return string.Empty;
static bool IsIdentifierChar(char c) => char.IsLetterOrDigit(c) || c == '_';
}
private static string NormalizeCommandName(string value)
{
if (string.IsNullOrEmpty(value))
{
return string.Empty;
}
return value switch
{
"." => ".",
_ => value.Trim()
};
}
private void SkipCommandSeparators()
{
while (true)
{
var token = Peek();
if (token.Kind == ShellTokenKind.NewLine)
{
Advance();
continue;
}
if (token.Kind == ShellTokenKind.Operator && (token.Value == ";" || token.Value == "&"))
{
Advance();
continue;
}
break;
}
}
private void SkipNewLines()
{
while (Peek().Kind == ShellTokenKind.NewLine)
{
Advance();
}
}
private ShellToken Expect(ShellTokenKind kind, string? value = null)
{
var token = Peek();
if (token.Kind != kind || (value is not null && token.Value != value))
{
throw new FormatException($"Unexpected token '{token.Value}' at line {token.Line}, expected {value ?? kind.ToString()}.");
}
return Advance();
}
private ShellToken Advance()
{
if (_index >= _tokens.Count)
{
return _tokens[^1];
}
return _tokens[_index++];
}
private ShellToken Peek()
{
if (_index >= _tokens.Count)
{
return _tokens[^1];
}
return _tokens[_index];
}
private ShellToken Previous()
{
if (_index == 0)
{
return _tokens[0];
}
return _tokens[_index - 1];
}
private static ShellSpan CreateSpan(ShellToken start, ShellToken end)
{
return new ShellSpan(start.Line, start.Column, end.Line, end.Column + end.Value.Length);
}
private static ShellSpan CreateSpan(ShellToken start, ShellSpan end)
{
return new ShellSpan(start.Line, start.Column, end.EndLine, end.EndColumn);
}
private static string JoinTokens(IEnumerable<ShellToken> tokens)
{
var builder = new StringBuilder();
var first = true;
foreach (var token in tokens)
{
if (!first)
{
builder.Append(' ');
}
builder.Append(token.Value);
first = false;
}
return builder.ToString();
}
private static string ExtractPrimaryArgument(ImmutableArray<ShellToken> tokens)
{
if (tokens.Length <= 1)
{
return string.Empty;
}
for (var i = 1; i < tokens.Length; i++)
{
var token = tokens[i];
if (token.Kind is ShellTokenKind.Word or ShellTokenKind.SingleQuoted or ShellTokenKind.DoubleQuoted)
{
return token.Value;
}
}
return string.Empty;
}
}
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Text;
namespace StellaOps.Scanner.EntryTrace.Parsing;
/// <summary>
/// Deterministic parser producing a lightweight AST for Bourne shell constructs needed by EntryTrace.
/// Supports: simple commands, exec, source/dot, run-parts, if/elif/else/fi, case/esac.
/// </summary>
public sealed class ShellParser
{
private readonly IReadOnlyList<ShellToken> _tokens;
private int _index;
private ShellParser(IReadOnlyList<ShellToken> tokens)
{
_tokens = tokens;
}
public static ShellScript Parse(string source)
{
var tokenizer = new ShellTokenizer();
var tokens = tokenizer.Tokenize(source);
var parser = new ShellParser(tokens);
var nodes = parser.ParseNodes(untilKeywords: null);
return new ShellScript(nodes.ToImmutableArray());
}
private List<ShellNode> ParseNodes(HashSet<string>? untilKeywords)
{
var nodes = new List<ShellNode>();
while (true)
{
SkipNewLines();
var token = Peek();
if (token.Kind == ShellTokenKind.EndOfFile)
{
break;
}
if (token.Kind == ShellTokenKind.Word && untilKeywords is not null && untilKeywords.Contains(token.Value))
{
break;
}
ShellNode? node = token.Kind switch
{
ShellTokenKind.Word when token.Value == "if" => ParseIf(),
ShellTokenKind.Word when token.Value == "case" => ParseCase(),
_ => ParseCommandLike()
};
if (node is not null)
{
nodes.Add(node);
}
SkipCommandSeparators();
}
return nodes;
}
private ShellNode ParseCommandLike()
{
var start = Peek();
var tokens = ReadUntilTerminator();
if (tokens.Count == 0)
{
return new ShellCommandNode(string.Empty, ImmutableArray<ShellToken>.Empty, CreateSpan(start, start));
}
var normalizedName = ExtractCommandName(tokens);
var immutableTokens = tokens.ToImmutableArray();
var span = CreateSpan(tokens[0], tokens[^1]);
return normalizedName switch
{
"exec" => new ShellExecNode(immutableTokens, span),
"source" or "." => new ShellIncludeNode(
ExtractPrimaryArgument(immutableTokens),
immutableTokens,
span),
"run-parts" => new ShellRunPartsNode(
ExtractPrimaryArgument(immutableTokens),
immutableTokens,
span),
_ => new ShellCommandNode(normalizedName, immutableTokens, span)
};
}
private ShellIfNode ParseIf()
{
var start = Expect(ShellTokenKind.Word, "if");
var predicateTokens = ReadUntilKeyword("then");
Expect(ShellTokenKind.Word, "then");
var branches = new List<ShellConditionalBranch>();
var predicateSummary = JoinTokens(predicateTokens);
var thenNodes = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"elif",
"else",
"fi"
});
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.If,
thenNodes.ToImmutableArray(),
CreateSpan(start, thenNodes.LastOrDefault()?.Span ?? CreateSpan(start, start)),
predicateSummary));
while (true)
{
SkipNewLines();
var next = Peek();
if (next.Kind != ShellTokenKind.Word)
{
break;
}
if (next.Value == "elif")
{
var elifStart = Advance();
var elifPredicate = ReadUntilKeyword("then");
Expect(ShellTokenKind.Word, "then");
var elifBody = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"elif",
"else",
"fi"
});
var span = elifBody.Count > 0
? CreateSpan(elifStart, elifBody[^1].Span)
: CreateSpan(elifStart, elifStart);
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.Elif,
elifBody.ToImmutableArray(),
span,
JoinTokens(elifPredicate)));
continue;
}
if (next.Value == "else")
{
var elseStart = Advance();
var elseBody = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"fi"
});
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.Else,
elseBody.ToImmutableArray(),
elseBody.Count > 0 ? CreateSpan(elseStart, elseBody[^1].Span) : CreateSpan(elseStart, elseStart),
null));
break;
}
break;
}
Expect(ShellTokenKind.Word, "fi");
var end = Previous();
return new ShellIfNode(branches.ToImmutableArray(), CreateSpan(start, end));
}
private ShellCaseNode ParseCase()
{
var start = Expect(ShellTokenKind.Word, "case");
var selectorTokens = ReadUntilKeyword("in");
Expect(ShellTokenKind.Word, "in");
var arms = new List<ShellCaseArm>();
while (true)
{
SkipNewLines();
var token = Peek();
if (token.Kind == ShellTokenKind.Word && token.Value == "esac")
{
break;
}
if (token.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException("Unexpected end of file while parsing case arms.");
}
var patterns = ReadPatterns();
Expect(ShellTokenKind.Operator, ")");
var body = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
";;",
"esac"
});
ShellSpan span;
if (body.Count > 0)
{
span = CreateSpan(patterns.FirstToken ?? token, body[^1].Span);
}
else
{
span = CreateSpan(patterns.FirstToken ?? token, token);
}
arms.Add(new ShellCaseArm(
patterns.Values.ToImmutableArray(),
body.ToImmutableArray(),
span));
SkipNewLines();
var separator = Peek();
if (separator.Kind == ShellTokenKind.Operator && separator.Value == ";;")
{
Advance();
continue;
}
if (separator.Kind == ShellTokenKind.Word && separator.Value == "esac")
{
break;
}
}
Expect(ShellTokenKind.Word, "esac");
return new ShellCaseNode(arms.ToImmutableArray(), CreateSpan(start, Previous()));
(List<string> Values, ShellToken? FirstToken) ReadPatterns()
{
var values = new List<string>();
ShellToken? first = null;
var sb = new StringBuilder();
while (true)
{
var current = Peek();
if (current.Kind is ShellTokenKind.Operator && current.Value is ")" or "|")
{
if (sb.Length > 0)
{
values.Add(sb.ToString());
sb.Clear();
}
if (current.Value == "|")
{
Advance();
continue;
}
break;
}
if (current.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException("Unexpected EOF in case arm pattern.");
}
if (first is null)
{
first = current;
}
sb.Append(current.Value);
Advance();
}
if (values.Count == 0 && sb.Length > 0)
{
values.Add(sb.ToString());
}
return (values, first);
}
}
private List<ShellToken> ReadUntilTerminator()
{
var tokens = new List<ShellToken>();
while (true)
{
var token = Peek();
if (token.Kind is ShellTokenKind.EndOfFile or ShellTokenKind.NewLine)
{
break;
}
if (token.Kind == ShellTokenKind.Operator && token.Value is ";" or "&&" or "||")
{
break;
}
tokens.Add(Advance());
}
return tokens;
}
private ImmutableArray<ShellToken> ReadUntilKeyword(string keyword)
{
var tokens = new List<ShellToken>();
while (true)
{
var token = Peek();
if (token.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException($"Unexpected EOF while looking for keyword '{keyword}'.");
}
if (token.Kind == ShellTokenKind.Word && token.Value == keyword)
{
break;
}
tokens.Add(Advance());
}
return tokens.ToImmutableArray();
}
private static string ExtractCommandName(IReadOnlyList<ShellToken> tokens)
{
foreach (var token in tokens)
{
if (token.Kind is not ShellTokenKind.Word and not ShellTokenKind.SingleQuoted and not ShellTokenKind.DoubleQuoted)
{
continue;
}
if (token.Value.Contains('=', StringComparison.Ordinal))
{
// Skip environment assignments e.g. FOO=bar exec /app
var eqIndex = token.Value.IndexOf('=', StringComparison.Ordinal);
if (eqIndex > 0 && token.Value[..eqIndex].All(IsIdentifierChar))
{
continue;
}
}
return NormalizeCommandName(token.Value);
}
return string.Empty;
static bool IsIdentifierChar(char c) => char.IsLetterOrDigit(c) || c == '_';
}
private static string NormalizeCommandName(string value)
{
if (string.IsNullOrEmpty(value))
{
return string.Empty;
}
return value switch
{
"." => ".",
_ => value.Trim()
};
}
private void SkipCommandSeparators()
{
while (true)
{
var token = Peek();
if (token.Kind == ShellTokenKind.NewLine)
{
Advance();
continue;
}
if (token.Kind == ShellTokenKind.Operator && (token.Value == ";" || token.Value == "&"))
{
Advance();
continue;
}
break;
}
}
private void SkipNewLines()
{
while (Peek().Kind == ShellTokenKind.NewLine)
{
Advance();
}
}
private ShellToken Expect(ShellTokenKind kind, string? value = null)
{
var token = Peek();
if (token.Kind != kind || (value is not null && token.Value != value))
{
throw new FormatException($"Unexpected token '{token.Value}' at line {token.Line}, expected {value ?? kind.ToString()}.");
}
return Advance();
}
private ShellToken Advance()
{
if (_index >= _tokens.Count)
{
return _tokens[^1];
}
return _tokens[_index++];
}
private ShellToken Peek()
{
if (_index >= _tokens.Count)
{
return _tokens[^1];
}
return _tokens[_index];
}
private ShellToken Previous()
{
if (_index == 0)
{
return _tokens[0];
}
return _tokens[_index - 1];
}
private static ShellSpan CreateSpan(ShellToken start, ShellToken end)
{
return new ShellSpan(start.Line, start.Column, end.Line, end.Column + end.Value.Length);
}
private static ShellSpan CreateSpan(ShellToken start, ShellSpan end)
{
return new ShellSpan(start.Line, start.Column, end.EndLine, end.EndColumn);
}
private static string JoinTokens(IEnumerable<ShellToken> tokens)
{
var builder = new StringBuilder();
var first = true;
foreach (var token in tokens)
{
if (!first)
{
builder.Append(' ');
}
builder.Append(token.Value);
first = false;
}
return builder.ToString();
}
private static string ExtractPrimaryArgument(ImmutableArray<ShellToken> tokens)
{
if (tokens.Length <= 1)
{
return string.Empty;
}
for (var i = 1; i < tokens.Length; i++)
{
var token = tokens[i];
if (token.Kind is ShellTokenKind.Word or ShellTokenKind.SingleQuoted or ShellTokenKind.DoubleQuoted)
{
return token.Value;
}
}
return string.Empty;
}
}