Refactor and enhance scanner worker functionality
- Cleaned up code formatting and organization across multiple files for improved readability. - Introduced `OsScanAnalyzerDispatcher` to handle OS analyzer execution and plugin loading. - Updated `ScanJobContext` to include an `Analysis` property for storing scan results. - Enhanced `ScanJobProcessor` to utilize the new `OsScanAnalyzerDispatcher`. - Improved logging and error handling in `ScanProgressReporter` for better traceability. - Updated project dependencies and added references to new analyzer plugins. - Revised task documentation to reflect current status and dependencies.
This commit is contained in:
200
src/StellaOps.Scanner.EntryTrace/Parsing/ShellTokenizer.cs
Normal file
200
src/StellaOps.Scanner.EntryTrace/Parsing/ShellTokenizer.cs
Normal file
@@ -0,0 +1,200 @@
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
|
||||
namespace StellaOps.Scanner.EntryTrace.Parsing;
|
||||
|
||||
/// <summary>
|
||||
/// Lightweight Bourne shell tokenizer sufficient for ENTRYPOINT scripts.
|
||||
/// Deterministic: emits tokens in source order without normalization.
|
||||
/// </summary>
|
||||
public sealed class ShellTokenizer
|
||||
{
|
||||
public IReadOnlyList<ShellToken> Tokenize(string source)
|
||||
{
|
||||
if (source is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(source));
|
||||
}
|
||||
|
||||
var tokens = new List<ShellToken>();
|
||||
var line = 1;
|
||||
var column = 1;
|
||||
var index = 0;
|
||||
|
||||
while (index < source.Length)
|
||||
{
|
||||
var ch = source[index];
|
||||
|
||||
if (ch == '\r')
|
||||
{
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '\n')
|
||||
{
|
||||
tokens.Add(new ShellToken(ShellTokenKind.NewLine, "\n", line, column));
|
||||
index++;
|
||||
line++;
|
||||
column = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char.IsWhiteSpace(ch))
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '#')
|
||||
{
|
||||
// Comment: skip until newline.
|
||||
while (index < source.Length && source[index] != '\n')
|
||||
{
|
||||
index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsOperatorStart(ch))
|
||||
{
|
||||
var opStartColumn = column;
|
||||
var op = ConsumeOperator(source, ref index, ref column);
|
||||
tokens.Add(new ShellToken(ShellTokenKind.Operator, op, line, opStartColumn));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '\'')
|
||||
{
|
||||
var (value, length) = ConsumeSingleQuoted(source, index + 1);
|
||||
tokens.Add(new ShellToken(ShellTokenKind.SingleQuoted, value, line, column));
|
||||
index += length + 2;
|
||||
column += length + 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == '"')
|
||||
{
|
||||
var (value, length) = ConsumeDoubleQuoted(source, index + 1);
|
||||
tokens.Add(new ShellToken(ShellTokenKind.DoubleQuoted, value, line, column));
|
||||
index += length + 2;
|
||||
column += length + 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
var (word, consumed) = ConsumeWord(source, index);
|
||||
tokens.Add(new ShellToken(ShellTokenKind.Word, word, line, column));
|
||||
index += consumed;
|
||||
column += consumed;
|
||||
}
|
||||
|
||||
tokens.Add(new ShellToken(ShellTokenKind.EndOfFile, string.Empty, line, column));
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private static bool IsOperatorStart(char ch) => ch switch
|
||||
{
|
||||
';' or '&' or '|' or '(' or ')' => true,
|
||||
_ => false
|
||||
};
|
||||
|
||||
private static string ConsumeOperator(string source, ref int index, ref int column)
|
||||
{
|
||||
var start = index;
|
||||
var ch = source[index];
|
||||
index++;
|
||||
column++;
|
||||
|
||||
if (index < source.Length)
|
||||
{
|
||||
var next = source[index];
|
||||
if ((ch == '&' && next == '&') ||
|
||||
(ch == '|' && next == '|') ||
|
||||
(ch == ';' && next == ';'))
|
||||
{
|
||||
index++;
|
||||
column++;
|
||||
}
|
||||
}
|
||||
|
||||
return source[start..index];
|
||||
}
|
||||
|
||||
private static (string Value, int Length) ConsumeSingleQuoted(string source, int startIndex)
|
||||
{
|
||||
var end = startIndex;
|
||||
while (end < source.Length && source[end] != '\'')
|
||||
{
|
||||
end++;
|
||||
}
|
||||
|
||||
if (end >= source.Length)
|
||||
{
|
||||
throw new FormatException("Unterminated single-quoted string in entrypoint script.");
|
||||
}
|
||||
|
||||
return (source[startIndex..end], end - startIndex);
|
||||
}
|
||||
|
||||
private static (string Value, int Length) ConsumeDoubleQuoted(string source, int startIndex)
|
||||
{
|
||||
var builder = new StringBuilder();
|
||||
var index = startIndex;
|
||||
|
||||
while (index < source.Length)
|
||||
{
|
||||
var ch = source[index];
|
||||
if (ch == '"')
|
||||
{
|
||||
return (builder.ToString(), index - startIndex);
|
||||
}
|
||||
|
||||
if (ch == '\\' && index + 1 < source.Length)
|
||||
{
|
||||
var next = source[index + 1];
|
||||
if (next is '"' or '\\' or '$' or '`' or '\n')
|
||||
{
|
||||
builder.Append(next);
|
||||
index += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
builder.Append(ch);
|
||||
index++;
|
||||
}
|
||||
|
||||
throw new FormatException("Unterminated double-quoted string in entrypoint script.");
|
||||
}
|
||||
|
||||
private static (string Value, int Length) ConsumeWord(string source, int startIndex)
|
||||
{
|
||||
var index = startIndex;
|
||||
while (index < source.Length)
|
||||
{
|
||||
var ch = source[index];
|
||||
if (char.IsWhiteSpace(ch) || ch == '\n' || ch == '\r' || IsOperatorStart(ch) || ch == '#' )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (ch == '\\' && index + 1 < source.Length && source[index + 1] == '\n')
|
||||
{
|
||||
// Line continuation.
|
||||
index += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
if (index == startIndex)
|
||||
{
|
||||
throw new InvalidOperationException("Tokenizer failed to advance while consuming word.");
|
||||
}
|
||||
|
||||
var text = source[startIndex..index];
|
||||
return (text, index - startIndex);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user