Files
git.stella-ops.org/src/StellaOps.Scanner.EntryTrace/Parsing/ShellTokenizer.cs
master 2062da7a8b Refactor and enhance scanner worker functionality
- Cleaned up code formatting and organization across multiple files for improved readability.
- Introduced `OsScanAnalyzerDispatcher` to handle OS analyzer execution and plugin loading.
- Updated `ScanJobContext` to include an `Analysis` property for storing scan results.
- Enhanced `ScanJobProcessor` to utilize the new `OsScanAnalyzerDispatcher`.
- Improved logging and error handling in `ScanProgressReporter` for better traceability.
- Updated project dependencies and added references to new analyzer plugins.
- Revised task documentation to reflect current status and dependencies.
2025-10-19 18:34:15 +03:00

201 lines
5.5 KiB
C#

using System.Globalization;
using System.Text;
namespace StellaOps.Scanner.EntryTrace.Parsing;
/// <summary>
/// Lightweight Bourne shell tokenizer sufficient for ENTRYPOINT scripts.
/// Deterministic: emits tokens in source order without normalization.
/// </summary>
public sealed class ShellTokenizer
{
public IReadOnlyList<ShellToken> Tokenize(string source)
{
if (source is null)
{
throw new ArgumentNullException(nameof(source));
}
var tokens = new List<ShellToken>();
var line = 1;
var column = 1;
var index = 0;
while (index < source.Length)
{
var ch = source[index];
if (ch == '\r')
{
index++;
continue;
}
if (ch == '\n')
{
tokens.Add(new ShellToken(ShellTokenKind.NewLine, "\n", line, column));
index++;
line++;
column = 1;
continue;
}
if (char.IsWhiteSpace(ch))
{
index++;
column++;
continue;
}
if (ch == '#')
{
// Comment: skip until newline.
while (index < source.Length && source[index] != '\n')
{
index++;
}
continue;
}
if (IsOperatorStart(ch))
{
var opStartColumn = column;
var op = ConsumeOperator(source, ref index, ref column);
tokens.Add(new ShellToken(ShellTokenKind.Operator, op, line, opStartColumn));
continue;
}
if (ch == '\'')
{
var (value, length) = ConsumeSingleQuoted(source, index + 1);
tokens.Add(new ShellToken(ShellTokenKind.SingleQuoted, value, line, column));
index += length + 2;
column += length + 2;
continue;
}
if (ch == '"')
{
var (value, length) = ConsumeDoubleQuoted(source, index + 1);
tokens.Add(new ShellToken(ShellTokenKind.DoubleQuoted, value, line, column));
index += length + 2;
column += length + 2;
continue;
}
var (word, consumed) = ConsumeWord(source, index);
tokens.Add(new ShellToken(ShellTokenKind.Word, word, line, column));
index += consumed;
column += consumed;
}
tokens.Add(new ShellToken(ShellTokenKind.EndOfFile, string.Empty, line, column));
return tokens;
}
private static bool IsOperatorStart(char ch) => ch switch
{
';' or '&' or '|' or '(' or ')' => true,
_ => false
};
private static string ConsumeOperator(string source, ref int index, ref int column)
{
var start = index;
var ch = source[index];
index++;
column++;
if (index < source.Length)
{
var next = source[index];
if ((ch == '&' && next == '&') ||
(ch == '|' && next == '|') ||
(ch == ';' && next == ';'))
{
index++;
column++;
}
}
return source[start..index];
}
private static (string Value, int Length) ConsumeSingleQuoted(string source, int startIndex)
{
var end = startIndex;
while (end < source.Length && source[end] != '\'')
{
end++;
}
if (end >= source.Length)
{
throw new FormatException("Unterminated single-quoted string in entrypoint script.");
}
return (source[startIndex..end], end - startIndex);
}
private static (string Value, int Length) ConsumeDoubleQuoted(string source, int startIndex)
{
var builder = new StringBuilder();
var index = startIndex;
while (index < source.Length)
{
var ch = source[index];
if (ch == '"')
{
return (builder.ToString(), index - startIndex);
}
if (ch == '\\' && index + 1 < source.Length)
{
var next = source[index + 1];
if (next is '"' or '\\' or '$' or '`' or '\n')
{
builder.Append(next);
index += 2;
continue;
}
}
builder.Append(ch);
index++;
}
throw new FormatException("Unterminated double-quoted string in entrypoint script.");
}
private static (string Value, int Length) ConsumeWord(string source, int startIndex)
{
var index = startIndex;
while (index < source.Length)
{
var ch = source[index];
if (char.IsWhiteSpace(ch) || ch == '\n' || ch == '\r' || IsOperatorStart(ch) || ch == '#' )
{
break;
}
if (ch == '\\' && index + 1 < source.Length && source[index + 1] == '\n')
{
// Line continuation.
index += 2;
continue;
}
index++;
}
if (index == startIndex)
{
throw new InvalidOperationException("Tokenizer failed to advance while consuming word.");
}
var text = source[startIndex..index];
return (text, index - startIndex);
}
}