- Cleaned up code formatting and organization across multiple files for improved readability. - Introduced `OsScanAnalyzerDispatcher` to handle OS analyzer execution and plugin loading. - Updated `ScanJobContext` to include an `Analysis` property for storing scan results. - Enhanced `ScanJobProcessor` to utilize the new `OsScanAnalyzerDispatcher`. - Improved logging and error handling in `ScanProgressReporter` for better traceability. - Updated project dependencies and added references to new analyzer plugins. - Revised task documentation to reflect current status and dependencies.
201 lines
5.5 KiB
C#
201 lines
5.5 KiB
C#
using System.Globalization;
|
|
using System.Text;
|
|
|
|
namespace StellaOps.Scanner.EntryTrace.Parsing;
|
|
|
|
/// <summary>
|
|
/// Lightweight Bourne shell tokenizer sufficient for ENTRYPOINT scripts.
|
|
/// Deterministic: emits tokens in source order without normalization.
|
|
/// </summary>
|
|
public sealed class ShellTokenizer
|
|
{
|
|
public IReadOnlyList<ShellToken> Tokenize(string source)
|
|
{
|
|
if (source is null)
|
|
{
|
|
throw new ArgumentNullException(nameof(source));
|
|
}
|
|
|
|
var tokens = new List<ShellToken>();
|
|
var line = 1;
|
|
var column = 1;
|
|
var index = 0;
|
|
|
|
while (index < source.Length)
|
|
{
|
|
var ch = source[index];
|
|
|
|
if (ch == '\r')
|
|
{
|
|
index++;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '\n')
|
|
{
|
|
tokens.Add(new ShellToken(ShellTokenKind.NewLine, "\n", line, column));
|
|
index++;
|
|
line++;
|
|
column = 1;
|
|
continue;
|
|
}
|
|
|
|
if (char.IsWhiteSpace(ch))
|
|
{
|
|
index++;
|
|
column++;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '#')
|
|
{
|
|
// Comment: skip until newline.
|
|
while (index < source.Length && source[index] != '\n')
|
|
{
|
|
index++;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (IsOperatorStart(ch))
|
|
{
|
|
var opStartColumn = column;
|
|
var op = ConsumeOperator(source, ref index, ref column);
|
|
tokens.Add(new ShellToken(ShellTokenKind.Operator, op, line, opStartColumn));
|
|
continue;
|
|
}
|
|
|
|
if (ch == '\'')
|
|
{
|
|
var (value, length) = ConsumeSingleQuoted(source, index + 1);
|
|
tokens.Add(new ShellToken(ShellTokenKind.SingleQuoted, value, line, column));
|
|
index += length + 2;
|
|
column += length + 2;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '"')
|
|
{
|
|
var (value, length) = ConsumeDoubleQuoted(source, index + 1);
|
|
tokens.Add(new ShellToken(ShellTokenKind.DoubleQuoted, value, line, column));
|
|
index += length + 2;
|
|
column += length + 2;
|
|
continue;
|
|
}
|
|
|
|
var (word, consumed) = ConsumeWord(source, index);
|
|
tokens.Add(new ShellToken(ShellTokenKind.Word, word, line, column));
|
|
index += consumed;
|
|
column += consumed;
|
|
}
|
|
|
|
tokens.Add(new ShellToken(ShellTokenKind.EndOfFile, string.Empty, line, column));
|
|
return tokens;
|
|
}
|
|
|
|
private static bool IsOperatorStart(char ch) => ch switch
|
|
{
|
|
';' or '&' or '|' or '(' or ')' => true,
|
|
_ => false
|
|
};
|
|
|
|
private static string ConsumeOperator(string source, ref int index, ref int column)
|
|
{
|
|
var start = index;
|
|
var ch = source[index];
|
|
index++;
|
|
column++;
|
|
|
|
if (index < source.Length)
|
|
{
|
|
var next = source[index];
|
|
if ((ch == '&' && next == '&') ||
|
|
(ch == '|' && next == '|') ||
|
|
(ch == ';' && next == ';'))
|
|
{
|
|
index++;
|
|
column++;
|
|
}
|
|
}
|
|
|
|
return source[start..index];
|
|
}
|
|
|
|
private static (string Value, int Length) ConsumeSingleQuoted(string source, int startIndex)
|
|
{
|
|
var end = startIndex;
|
|
while (end < source.Length && source[end] != '\'')
|
|
{
|
|
end++;
|
|
}
|
|
|
|
if (end >= source.Length)
|
|
{
|
|
throw new FormatException("Unterminated single-quoted string in entrypoint script.");
|
|
}
|
|
|
|
return (source[startIndex..end], end - startIndex);
|
|
}
|
|
|
|
private static (string Value, int Length) ConsumeDoubleQuoted(string source, int startIndex)
|
|
{
|
|
var builder = new StringBuilder();
|
|
var index = startIndex;
|
|
|
|
while (index < source.Length)
|
|
{
|
|
var ch = source[index];
|
|
if (ch == '"')
|
|
{
|
|
return (builder.ToString(), index - startIndex);
|
|
}
|
|
|
|
if (ch == '\\' && index + 1 < source.Length)
|
|
{
|
|
var next = source[index + 1];
|
|
if (next is '"' or '\\' or '$' or '`' or '\n')
|
|
{
|
|
builder.Append(next);
|
|
index += 2;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
builder.Append(ch);
|
|
index++;
|
|
}
|
|
|
|
throw new FormatException("Unterminated double-quoted string in entrypoint script.");
|
|
}
|
|
|
|
private static (string Value, int Length) ConsumeWord(string source, int startIndex)
|
|
{
|
|
var index = startIndex;
|
|
while (index < source.Length)
|
|
{
|
|
var ch = source[index];
|
|
if (char.IsWhiteSpace(ch) || ch == '\n' || ch == '\r' || IsOperatorStart(ch) || ch == '#' )
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (ch == '\\' && index + 1 < source.Length && source[index + 1] == '\n')
|
|
{
|
|
// Line continuation.
|
|
index += 2;
|
|
continue;
|
|
}
|
|
|
|
index++;
|
|
}
|
|
|
|
if (index == startIndex)
|
|
{
|
|
throw new InvalidOperationException("Tokenizer failed to advance while consuming word.");
|
|
}
|
|
|
|
var text = source[startIndex..index];
|
|
return (text, index - startIndex);
|
|
}
|
|
}
|