using System.Collections.Immutable; using System.Globalization; using System.Linq; using System.Text; namespace StellaOps.Scanner.EntryTrace.Parsing; /// /// Deterministic parser producing a lightweight AST for Bourne shell constructs needed by EntryTrace. /// Supports: simple commands, exec, source/dot, run-parts, if/elif/else/fi, case/esac. /// public sealed class ShellParser { private readonly IReadOnlyList _tokens; private int _index; private ShellParser(IReadOnlyList tokens) { _tokens = tokens; } public static ShellScript Parse(string source) { var tokenizer = new ShellTokenizer(); var tokens = tokenizer.Tokenize(source); var parser = new ShellParser(tokens); var nodes = parser.ParseNodes(untilKeywords: null); return new ShellScript(nodes.ToImmutableArray()); } private List ParseNodes(HashSet? untilKeywords) { var nodes = new List(); while (true) { SkipNewLines(); var token = Peek(); if (token.Kind == ShellTokenKind.EndOfFile) { break; } if (token.Kind == ShellTokenKind.Word && untilKeywords is not null && untilKeywords.Contains(token.Value)) { break; } ShellNode? node = token.Kind switch { ShellTokenKind.Word when token.Value == "if" => ParseIf(), ShellTokenKind.Word when token.Value == "case" => ParseCase(), _ => ParseCommandLike() }; if (node is not null) { nodes.Add(node); } SkipCommandSeparators(); } return nodes; } private ShellNode ParseCommandLike() { var start = Peek(); var tokens = ReadUntilTerminator(); if (tokens.Count == 0) { return new ShellCommandNode(string.Empty, ImmutableArray.Empty, CreateSpan(start, start)); } var normalizedName = ExtractCommandName(tokens); var immutableTokens = tokens.ToImmutableArray(); var span = CreateSpan(tokens[0], tokens[^1]); return normalizedName switch { "exec" => new ShellExecNode(immutableTokens, span), "source" or "." => new ShellIncludeNode( ExtractPrimaryArgument(immutableTokens), immutableTokens, span), "run-parts" => new ShellRunPartsNode( ExtractPrimaryArgument(immutableTokens), immutableTokens, span), _ => new ShellCommandNode(normalizedName, immutableTokens, span) }; } private ShellIfNode ParseIf() { var start = Expect(ShellTokenKind.Word, "if"); var predicateTokens = ReadUntilKeyword("then"); Expect(ShellTokenKind.Word, "then"); var branches = new List(); var predicateSummary = JoinTokens(predicateTokens); var thenNodes = ParseNodes(new HashSet(StringComparer.Ordinal) { "elif", "else", "fi" }); branches.Add(new ShellConditionalBranch( ShellConditionalKind.If, thenNodes.ToImmutableArray(), CreateSpan(start, thenNodes.LastOrDefault()?.Span ?? CreateSpan(start, start)), predicateSummary)); while (true) { SkipNewLines(); var next = Peek(); if (next.Kind != ShellTokenKind.Word) { break; } if (next.Value == "elif") { var elifStart = Advance(); var elifPredicate = ReadUntilKeyword("then"); Expect(ShellTokenKind.Word, "then"); var elifBody = ParseNodes(new HashSet(StringComparer.Ordinal) { "elif", "else", "fi" }); var span = elifBody.Count > 0 ? CreateSpan(elifStart, elifBody[^1].Span) : CreateSpan(elifStart, elifStart); branches.Add(new ShellConditionalBranch( ShellConditionalKind.Elif, elifBody.ToImmutableArray(), span, JoinTokens(elifPredicate))); continue; } if (next.Value == "else") { var elseStart = Advance(); var elseBody = ParseNodes(new HashSet(StringComparer.Ordinal) { "fi" }); branches.Add(new ShellConditionalBranch( ShellConditionalKind.Else, elseBody.ToImmutableArray(), elseBody.Count > 0 ? CreateSpan(elseStart, elseBody[^1].Span) : CreateSpan(elseStart, elseStart), null)); break; } break; } Expect(ShellTokenKind.Word, "fi"); var end = Previous(); return new ShellIfNode(branches.ToImmutableArray(), CreateSpan(start, end)); } private ShellCaseNode ParseCase() { var start = Expect(ShellTokenKind.Word, "case"); var selectorTokens = ReadUntilKeyword("in"); Expect(ShellTokenKind.Word, "in"); var arms = new List(); while (true) { SkipNewLines(); var token = Peek(); if (token.Kind == ShellTokenKind.Word && token.Value == "esac") { break; } if (token.Kind == ShellTokenKind.EndOfFile) { throw new FormatException("Unexpected end of file while parsing case arms."); } var patterns = ReadPatterns(); Expect(ShellTokenKind.Operator, ")"); var body = ParseNodes(new HashSet(StringComparer.Ordinal) { ";;", "esac" }); ShellSpan span; if (body.Count > 0) { span = CreateSpan(patterns.FirstToken ?? token, body[^1].Span); } else { span = CreateSpan(patterns.FirstToken ?? token, token); } arms.Add(new ShellCaseArm( patterns.Values.ToImmutableArray(), body.ToImmutableArray(), span)); SkipNewLines(); var separator = Peek(); if (separator.Kind == ShellTokenKind.Operator && separator.Value == ";;") { Advance(); continue; } if (separator.Kind == ShellTokenKind.Word && separator.Value == "esac") { break; } } Expect(ShellTokenKind.Word, "esac"); return new ShellCaseNode(arms.ToImmutableArray(), CreateSpan(start, Previous())); (List Values, ShellToken? FirstToken) ReadPatterns() { var values = new List(); ShellToken? first = null; var sb = new StringBuilder(); while (true) { var current = Peek(); if (current.Kind is ShellTokenKind.Operator && current.Value is ")" or "|") { if (sb.Length > 0) { values.Add(sb.ToString()); sb.Clear(); } if (current.Value == "|") { Advance(); continue; } break; } if (current.Kind == ShellTokenKind.EndOfFile) { throw new FormatException("Unexpected EOF in case arm pattern."); } if (first is null) { first = current; } sb.Append(current.Value); Advance(); } if (values.Count == 0 && sb.Length > 0) { values.Add(sb.ToString()); } return (values, first); } } private List ReadUntilTerminator() { var tokens = new List(); while (true) { var token = Peek(); if (token.Kind is ShellTokenKind.EndOfFile or ShellTokenKind.NewLine) { break; } if (token.Kind == ShellTokenKind.Operator && token.Value is ";" or "&&" or "||") { break; } tokens.Add(Advance()); } return tokens; } private ImmutableArray ReadUntilKeyword(string keyword) { var tokens = new List(); while (true) { var token = Peek(); if (token.Kind == ShellTokenKind.EndOfFile) { throw new FormatException($"Unexpected EOF while looking for keyword '{keyword}'."); } if (token.Kind == ShellTokenKind.Word && token.Value == keyword) { break; } tokens.Add(Advance()); } return tokens.ToImmutableArray(); } private static string ExtractCommandName(IReadOnlyList tokens) { foreach (var token in tokens) { if (token.Kind is not ShellTokenKind.Word and not ShellTokenKind.SingleQuoted and not ShellTokenKind.DoubleQuoted) { continue; } if (token.Value.Contains('=', StringComparison.Ordinal)) { // Skip environment assignments e.g. FOO=bar exec /app var eqIndex = token.Value.IndexOf('=', StringComparison.Ordinal); if (eqIndex > 0 && token.Value[..eqIndex].All(IsIdentifierChar)) { continue; } } return NormalizeCommandName(token.Value); } return string.Empty; static bool IsIdentifierChar(char c) => char.IsLetterOrDigit(c) || c == '_'; } private static string NormalizeCommandName(string value) { if (string.IsNullOrEmpty(value)) { return string.Empty; } return value switch { "." => ".", _ => value.Trim() }; } private void SkipCommandSeparators() { while (true) { var token = Peek(); if (token.Kind == ShellTokenKind.NewLine) { Advance(); continue; } if (token.Kind == ShellTokenKind.Operator && (token.Value == ";" || token.Value == "&")) { Advance(); continue; } break; } } private void SkipNewLines() { while (Peek().Kind == ShellTokenKind.NewLine) { Advance(); } } private ShellToken Expect(ShellTokenKind kind, string? value = null) { var token = Peek(); if (token.Kind != kind || (value is not null && token.Value != value)) { throw new FormatException($"Unexpected token '{token.Value}' at line {token.Line}, expected {value ?? kind.ToString()}."); } return Advance(); } private ShellToken Advance() { if (_index >= _tokens.Count) { return _tokens[^1]; } return _tokens[_index++]; } private ShellToken Peek() { if (_index >= _tokens.Count) { return _tokens[^1]; } return _tokens[_index]; } private ShellToken Previous() { if (_index == 0) { return _tokens[0]; } return _tokens[_index - 1]; } private static ShellSpan CreateSpan(ShellToken start, ShellToken end) { return new ShellSpan(start.Line, start.Column, end.Line, end.Column + end.Value.Length); } private static ShellSpan CreateSpan(ShellToken start, ShellSpan end) { return new ShellSpan(start.Line, start.Column, end.EndLine, end.EndColumn); } private static string JoinTokens(IEnumerable tokens) { var builder = new StringBuilder(); var first = true; foreach (var token in tokens) { if (!first) { builder.Append(' '); } builder.Append(token.Value); first = false; } return builder.ToString(); } private static string ExtractPrimaryArgument(ImmutableArray tokens) { if (tokens.Length <= 1) { return string.Empty; } for (var i = 1; i < tokens.Length; i++) { var token = tokens[i]; if (token.Kind is ShellTokenKind.Word or ShellTokenKind.SingleQuoted or ShellTokenKind.DoubleQuoted) { return token.Value; } } return string.Empty; } }