using System.Globalization; using System.Text; namespace StellaOps.Scanner.EntryTrace.Parsing; /// /// Lightweight Bourne shell tokenizer sufficient for ENTRYPOINT scripts. /// Deterministic: emits tokens in source order without normalization. /// public sealed class ShellTokenizer { public IReadOnlyList Tokenize(string source) { if (source is null) { throw new ArgumentNullException(nameof(source)); } var tokens = new List(); var line = 1; var column = 1; var index = 0; while (index < source.Length) { var ch = source[index]; if (ch == '\r') { index++; continue; } if (ch == '\n') { tokens.Add(new ShellToken(ShellTokenKind.NewLine, "\n", line, column)); index++; line++; column = 1; continue; } if (char.IsWhiteSpace(ch)) { index++; column++; continue; } if (ch == '#') { // Comment: skip until newline. while (index < source.Length && source[index] != '\n') { index++; } continue; } if (IsOperatorStart(ch)) { var opStartColumn = column; var op = ConsumeOperator(source, ref index, ref column); tokens.Add(new ShellToken(ShellTokenKind.Operator, op, line, opStartColumn)); continue; } if (ch == '\'') { var (value, length) = ConsumeSingleQuoted(source, index + 1); tokens.Add(new ShellToken(ShellTokenKind.SingleQuoted, value, line, column)); index += length + 2; column += length + 2; continue; } if (ch == '"') { var (value, length) = ConsumeDoubleQuoted(source, index + 1); tokens.Add(new ShellToken(ShellTokenKind.DoubleQuoted, value, line, column)); index += length + 2; column += length + 2; continue; } var (word, consumed) = ConsumeWord(source, index); tokens.Add(new ShellToken(ShellTokenKind.Word, word, line, column)); index += consumed; column += consumed; } tokens.Add(new ShellToken(ShellTokenKind.EndOfFile, string.Empty, line, column)); return tokens; } private static bool IsOperatorStart(char ch) => ch switch { ';' or '&' or '|' or '(' or ')' => true, _ => false }; private static string ConsumeOperator(string source, ref int index, ref int column) { var start = index; var ch = source[index]; index++; column++; if (index < source.Length) { var next = source[index]; if ((ch == '&' && next == '&') || (ch == '|' && next == '|') || (ch == ';' && next == ';')) { index++; column++; } } return source[start..index]; } private static (string Value, int Length) ConsumeSingleQuoted(string source, int startIndex) { var end = startIndex; while (end < source.Length && source[end] != '\'') { end++; } if (end >= source.Length) { throw new FormatException("Unterminated single-quoted string in entrypoint script."); } return (source[startIndex..end], end - startIndex); } private static (string Value, int Length) ConsumeDoubleQuoted(string source, int startIndex) { var builder = new StringBuilder(); var index = startIndex; while (index < source.Length) { var ch = source[index]; if (ch == '"') { return (builder.ToString(), index - startIndex); } if (ch == '\\' && index + 1 < source.Length) { var next = source[index + 1]; if (next is '"' or '\\' or '$' or '`' or '\n') { builder.Append(next); index += 2; continue; } } builder.Append(ch); index++; } throw new FormatException("Unterminated double-quoted string in entrypoint script."); } private static (string Value, int Length) ConsumeWord(string source, int startIndex) { var index = startIndex; while (index < source.Length) { var ch = source[index]; if (char.IsWhiteSpace(ch) || ch == '\n' || ch == '\r' || IsOperatorStart(ch) || ch == '#' ) { break; } if (ch == '\\' && index + 1 < source.Length && source[index + 1] == '\n') { // Line continuation. index += 2; continue; } index++; } if (index == startIndex) { throw new InvalidOperationException("Tokenizer failed to advance while consuming word."); } var text = source[startIndex..index]; return (text, index - startIndex); } }