Refactor and enhance scanner worker functionality

- Cleaned up code formatting and organization across multiple files for improved readability.
- Introduced `OsScanAnalyzerDispatcher` to handle OS analyzer execution and plugin loading.
- Updated `ScanJobContext` to include an `Analysis` property for storing scan results.
- Enhanced `ScanJobProcessor` to utilize the new `OsScanAnalyzerDispatcher`.
- Improved logging and error handling in `ScanProgressReporter` for better traceability.
- Updated project dependencies and added references to new analyzer plugins.
- Revised task documentation to reflect current status and dependencies.
This commit is contained in:
2025-10-19 18:34:15 +03:00
parent daa6a4ae8c
commit 7e2fa0a42a
59 changed files with 5563 additions and 2288 deletions

View File

@@ -0,0 +1,54 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.EntryTrace.Parsing;
public abstract record ShellNode(ShellSpan Span);
public sealed record ShellScript(ImmutableArray<ShellNode> Nodes);
public sealed record ShellSpan(int StartLine, int StartColumn, int EndLine, int EndColumn);
public sealed record ShellCommandNode(
string Command,
ImmutableArray<ShellToken> Arguments,
ShellSpan Span) : ShellNode(Span);
public sealed record ShellIncludeNode(
string PathExpression,
ImmutableArray<ShellToken> Arguments,
ShellSpan Span) : ShellNode(Span);
public sealed record ShellExecNode(
ImmutableArray<ShellToken> Arguments,
ShellSpan Span) : ShellNode(Span);
public sealed record ShellIfNode(
ImmutableArray<ShellConditionalBranch> Branches,
ShellSpan Span) : ShellNode(Span);
public sealed record ShellConditionalBranch(
ShellConditionalKind Kind,
ImmutableArray<ShellNode> Body,
ShellSpan Span,
string? PredicateSummary);
public enum ShellConditionalKind
{
If,
Elif,
Else
}
public sealed record ShellCaseNode(
ImmutableArray<ShellCaseArm> Arms,
ShellSpan Span) : ShellNode(Span);
public sealed record ShellCaseArm(
ImmutableArray<string> Patterns,
ImmutableArray<ShellNode> Body,
ShellSpan Span);
public sealed record ShellRunPartsNode(
string DirectoryExpression,
ImmutableArray<ShellToken> Arguments,
ShellSpan Span) : ShellNode(Span);

View File

@@ -0,0 +1,485 @@
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Text;
namespace StellaOps.Scanner.EntryTrace.Parsing;
/// <summary>
/// Deterministic parser producing a lightweight AST for Bourne shell constructs needed by EntryTrace.
/// Supports: simple commands, exec, source/dot, run-parts, if/elif/else/fi, case/esac.
/// </summary>
public sealed class ShellParser
{
private readonly IReadOnlyList<ShellToken> _tokens;
private int _index;
private ShellParser(IReadOnlyList<ShellToken> tokens)
{
_tokens = tokens;
}
public static ShellScript Parse(string source)
{
var tokenizer = new ShellTokenizer();
var tokens = tokenizer.Tokenize(source);
var parser = new ShellParser(tokens);
var nodes = parser.ParseNodes(untilKeywords: null);
return new ShellScript(nodes.ToImmutableArray());
}
private List<ShellNode> ParseNodes(HashSet<string>? untilKeywords)
{
var nodes = new List<ShellNode>();
while (true)
{
SkipNewLines();
var token = Peek();
if (token.Kind == ShellTokenKind.EndOfFile)
{
break;
}
if (token.Kind == ShellTokenKind.Word && untilKeywords is not null && untilKeywords.Contains(token.Value))
{
break;
}
ShellNode? node = token.Kind switch
{
ShellTokenKind.Word when token.Value == "if" => ParseIf(),
ShellTokenKind.Word when token.Value == "case" => ParseCase(),
_ => ParseCommandLike()
};
if (node is not null)
{
nodes.Add(node);
}
SkipCommandSeparators();
}
return nodes;
}
private ShellNode ParseCommandLike()
{
var start = Peek();
var tokens = ReadUntilTerminator();
if (tokens.Count == 0)
{
return new ShellCommandNode(string.Empty, ImmutableArray<ShellToken>.Empty, CreateSpan(start, start));
}
var normalizedName = ExtractCommandName(tokens);
var immutableTokens = tokens.ToImmutableArray();
var span = CreateSpan(tokens[0], tokens[^1]);
return normalizedName switch
{
"exec" => new ShellExecNode(immutableTokens, span),
"source" or "." => new ShellIncludeNode(
ExtractPrimaryArgument(immutableTokens),
immutableTokens,
span),
"run-parts" => new ShellRunPartsNode(
ExtractPrimaryArgument(immutableTokens),
immutableTokens,
span),
_ => new ShellCommandNode(normalizedName, immutableTokens, span)
};
}
private ShellIfNode ParseIf()
{
var start = Expect(ShellTokenKind.Word, "if");
var predicateTokens = ReadUntilKeyword("then");
Expect(ShellTokenKind.Word, "then");
var branches = new List<ShellConditionalBranch>();
var predicateSummary = JoinTokens(predicateTokens);
var thenNodes = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"elif",
"else",
"fi"
});
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.If,
thenNodes.ToImmutableArray(),
CreateSpan(start, thenNodes.LastOrDefault()?.Span ?? CreateSpan(start, start)),
predicateSummary));
while (true)
{
SkipNewLines();
var next = Peek();
if (next.Kind != ShellTokenKind.Word)
{
break;
}
if (next.Value == "elif")
{
var elifStart = Advance();
var elifPredicate = ReadUntilKeyword("then");
Expect(ShellTokenKind.Word, "then");
var elifBody = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"elif",
"else",
"fi"
});
var span = elifBody.Count > 0
? CreateSpan(elifStart, elifBody[^1].Span)
: CreateSpan(elifStart, elifStart);
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.Elif,
elifBody.ToImmutableArray(),
span,
JoinTokens(elifPredicate)));
continue;
}
if (next.Value == "else")
{
var elseStart = Advance();
var elseBody = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
"fi"
});
branches.Add(new ShellConditionalBranch(
ShellConditionalKind.Else,
elseBody.ToImmutableArray(),
elseBody.Count > 0 ? CreateSpan(elseStart, elseBody[^1].Span) : CreateSpan(elseStart, elseStart),
null));
break;
}
break;
}
Expect(ShellTokenKind.Word, "fi");
var end = Previous();
return new ShellIfNode(branches.ToImmutableArray(), CreateSpan(start, end));
}
private ShellCaseNode ParseCase()
{
var start = Expect(ShellTokenKind.Word, "case");
var selectorTokens = ReadUntilKeyword("in");
Expect(ShellTokenKind.Word, "in");
var arms = new List<ShellCaseArm>();
while (true)
{
SkipNewLines();
var token = Peek();
if (token.Kind == ShellTokenKind.Word && token.Value == "esac")
{
break;
}
if (token.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException("Unexpected end of file while parsing case arms.");
}
var patterns = ReadPatterns();
Expect(ShellTokenKind.Operator, ")");
var body = ParseNodes(new HashSet<string>(StringComparer.Ordinal)
{
";;",
"esac"
});
ShellSpan span;
if (body.Count > 0)
{
span = CreateSpan(patterns.FirstToken ?? token, body[^1].Span);
}
else
{
span = CreateSpan(patterns.FirstToken ?? token, token);
}
arms.Add(new ShellCaseArm(
patterns.Values.ToImmutableArray(),
body.ToImmutableArray(),
span));
SkipNewLines();
var separator = Peek();
if (separator.Kind == ShellTokenKind.Operator && separator.Value == ";;")
{
Advance();
continue;
}
if (separator.Kind == ShellTokenKind.Word && separator.Value == "esac")
{
break;
}
}
Expect(ShellTokenKind.Word, "esac");
return new ShellCaseNode(arms.ToImmutableArray(), CreateSpan(start, Previous()));
(List<string> Values, ShellToken? FirstToken) ReadPatterns()
{
var values = new List<string>();
ShellToken? first = null;
var sb = new StringBuilder();
while (true)
{
var current = Peek();
if (current.Kind is ShellTokenKind.Operator && current.Value is ")" or "|")
{
if (sb.Length > 0)
{
values.Add(sb.ToString());
sb.Clear();
}
if (current.Value == "|")
{
Advance();
continue;
}
break;
}
if (current.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException("Unexpected EOF in case arm pattern.");
}
if (first is null)
{
first = current;
}
sb.Append(current.Value);
Advance();
}
if (values.Count == 0 && sb.Length > 0)
{
values.Add(sb.ToString());
}
return (values, first);
}
}
private List<ShellToken> ReadUntilTerminator()
{
var tokens = new List<ShellToken>();
while (true)
{
var token = Peek();
if (token.Kind is ShellTokenKind.EndOfFile or ShellTokenKind.NewLine)
{
break;
}
if (token.Kind == ShellTokenKind.Operator && token.Value is ";" or "&&" or "||")
{
break;
}
tokens.Add(Advance());
}
return tokens;
}
private ImmutableArray<ShellToken> ReadUntilKeyword(string keyword)
{
var tokens = new List<ShellToken>();
while (true)
{
var token = Peek();
if (token.Kind == ShellTokenKind.EndOfFile)
{
throw new FormatException($"Unexpected EOF while looking for keyword '{keyword}'.");
}
if (token.Kind == ShellTokenKind.Word && token.Value == keyword)
{
break;
}
tokens.Add(Advance());
}
return tokens.ToImmutableArray();
}
private static string ExtractCommandName(IReadOnlyList<ShellToken> tokens)
{
foreach (var token in tokens)
{
if (token.Kind is not ShellTokenKind.Word and not ShellTokenKind.SingleQuoted and not ShellTokenKind.DoubleQuoted)
{
continue;
}
if (token.Value.Contains('=', StringComparison.Ordinal))
{
// Skip environment assignments e.g. FOO=bar exec /app
var eqIndex = token.Value.IndexOf('=', StringComparison.Ordinal);
if (eqIndex > 0 && token.Value[..eqIndex].All(IsIdentifierChar))
{
continue;
}
}
return NormalizeCommandName(token.Value);
}
return string.Empty;
static bool IsIdentifierChar(char c) => char.IsLetterOrDigit(c) || c == '_';
}
private static string NormalizeCommandName(string value)
{
if (string.IsNullOrEmpty(value))
{
return string.Empty;
}
return value switch
{
"." => ".",
_ => value.Trim()
};
}
private void SkipCommandSeparators()
{
while (true)
{
var token = Peek();
if (token.Kind == ShellTokenKind.NewLine)
{
Advance();
continue;
}
if (token.Kind == ShellTokenKind.Operator && (token.Value == ";" || token.Value == "&"))
{
Advance();
continue;
}
break;
}
}
private void SkipNewLines()
{
while (Peek().Kind == ShellTokenKind.NewLine)
{
Advance();
}
}
private ShellToken Expect(ShellTokenKind kind, string? value = null)
{
var token = Peek();
if (token.Kind != kind || (value is not null && token.Value != value))
{
throw new FormatException($"Unexpected token '{token.Value}' at line {token.Line}, expected {value ?? kind.ToString()}.");
}
return Advance();
}
private ShellToken Advance()
{
if (_index >= _tokens.Count)
{
return _tokens[^1];
}
return _tokens[_index++];
}
private ShellToken Peek()
{
if (_index >= _tokens.Count)
{
return _tokens[^1];
}
return _tokens[_index];
}
private ShellToken Previous()
{
if (_index == 0)
{
return _tokens[0];
}
return _tokens[_index - 1];
}
private static ShellSpan CreateSpan(ShellToken start, ShellToken end)
{
return new ShellSpan(start.Line, start.Column, end.Line, end.Column + end.Value.Length);
}
private static ShellSpan CreateSpan(ShellToken start, ShellSpan end)
{
return new ShellSpan(start.Line, start.Column, end.EndLine, end.EndColumn);
}
private static string JoinTokens(IEnumerable<ShellToken> tokens)
{
var builder = new StringBuilder();
var first = true;
foreach (var token in tokens)
{
if (!first)
{
builder.Append(' ');
}
builder.Append(token.Value);
first = false;
}
return builder.ToString();
}
private static string ExtractPrimaryArgument(ImmutableArray<ShellToken> tokens)
{
if (tokens.Length <= 1)
{
return string.Empty;
}
for (var i = 1; i < tokens.Length; i++)
{
var token = tokens[i];
if (token.Kind is ShellTokenKind.Word or ShellTokenKind.SingleQuoted or ShellTokenKind.DoubleQuoted)
{
return token.Value;
}
}
return string.Empty;
}
}

View File

@@ -0,0 +1,16 @@
namespace StellaOps.Scanner.EntryTrace.Parsing;
/// <summary>
/// Token produced by the shell lexer.
/// </summary>
public readonly record struct ShellToken(ShellTokenKind Kind, string Value, int Line, int Column);
public enum ShellTokenKind
{
Word,
SingleQuoted,
DoubleQuoted,
Operator,
NewLine,
EndOfFile
}

View File

@@ -0,0 +1,200 @@
using System.Globalization;
using System.Text;
namespace StellaOps.Scanner.EntryTrace.Parsing;
/// <summary>
/// Lightweight Bourne shell tokenizer sufficient for ENTRYPOINT scripts.
/// Deterministic: emits tokens in source order without normalization.
/// </summary>
public sealed class ShellTokenizer
{
public IReadOnlyList<ShellToken> Tokenize(string source)
{
if (source is null)
{
throw new ArgumentNullException(nameof(source));
}
var tokens = new List<ShellToken>();
var line = 1;
var column = 1;
var index = 0;
while (index < source.Length)
{
var ch = source[index];
if (ch == '\r')
{
index++;
continue;
}
if (ch == '\n')
{
tokens.Add(new ShellToken(ShellTokenKind.NewLine, "\n", line, column));
index++;
line++;
column = 1;
continue;
}
if (char.IsWhiteSpace(ch))
{
index++;
column++;
continue;
}
if (ch == '#')
{
// Comment: skip until newline.
while (index < source.Length && source[index] != '\n')
{
index++;
}
continue;
}
if (IsOperatorStart(ch))
{
var opStartColumn = column;
var op = ConsumeOperator(source, ref index, ref column);
tokens.Add(new ShellToken(ShellTokenKind.Operator, op, line, opStartColumn));
continue;
}
if (ch == '\'')
{
var (value, length) = ConsumeSingleQuoted(source, index + 1);
tokens.Add(new ShellToken(ShellTokenKind.SingleQuoted, value, line, column));
index += length + 2;
column += length + 2;
continue;
}
if (ch == '"')
{
var (value, length) = ConsumeDoubleQuoted(source, index + 1);
tokens.Add(new ShellToken(ShellTokenKind.DoubleQuoted, value, line, column));
index += length + 2;
column += length + 2;
continue;
}
var (word, consumed) = ConsumeWord(source, index);
tokens.Add(new ShellToken(ShellTokenKind.Word, word, line, column));
index += consumed;
column += consumed;
}
tokens.Add(new ShellToken(ShellTokenKind.EndOfFile, string.Empty, line, column));
return tokens;
}
private static bool IsOperatorStart(char ch) => ch switch
{
';' or '&' or '|' or '(' or ')' => true,
_ => false
};
private static string ConsumeOperator(string source, ref int index, ref int column)
{
var start = index;
var ch = source[index];
index++;
column++;
if (index < source.Length)
{
var next = source[index];
if ((ch == '&' && next == '&') ||
(ch == '|' && next == '|') ||
(ch == ';' && next == ';'))
{
index++;
column++;
}
}
return source[start..index];
}
private static (string Value, int Length) ConsumeSingleQuoted(string source, int startIndex)
{
var end = startIndex;
while (end < source.Length && source[end] != '\'')
{
end++;
}
if (end >= source.Length)
{
throw new FormatException("Unterminated single-quoted string in entrypoint script.");
}
return (source[startIndex..end], end - startIndex);
}
private static (string Value, int Length) ConsumeDoubleQuoted(string source, int startIndex)
{
var builder = new StringBuilder();
var index = startIndex;
while (index < source.Length)
{
var ch = source[index];
if (ch == '"')
{
return (builder.ToString(), index - startIndex);
}
if (ch == '\\' && index + 1 < source.Length)
{
var next = source[index + 1];
if (next is '"' or '\\' or '$' or '`' or '\n')
{
builder.Append(next);
index += 2;
continue;
}
}
builder.Append(ch);
index++;
}
throw new FormatException("Unterminated double-quoted string in entrypoint script.");
}
private static (string Value, int Length) ConsumeWord(string source, int startIndex)
{
var index = startIndex;
while (index < source.Length)
{
var ch = source[index];
if (char.IsWhiteSpace(ch) || ch == '\n' || ch == '\r' || IsOperatorStart(ch) || ch == '#' )
{
break;
}
if (ch == '\\' && index + 1 < source.Length && source[index + 1] == '\n')
{
// Line continuation.
index += 2;
continue;
}
index++;
}
if (index == startIndex)
{
throw new InvalidOperationException("Tokenizer failed to advance while consuming word.");
}
var text = source[startIndex..index];
return (text, index - startIndex);
}
}