583 lines
19 KiB
C#
583 lines
19 KiB
C#
using System.Collections.Immutable;
|
|
using System.Globalization;
|
|
using System.Text;
|
|
using StellaOps.Policy;
|
|
|
|
namespace StellaOps.PolicyDsl;
|
|
|
|
/// <summary>
|
|
/// Tokenizes policy DSL source code into a stream of tokens.
|
|
/// </summary>
|
|
public static class DslTokenizer
|
|
{
|
|
public static TokenizerResult Tokenize(string source)
|
|
{
|
|
if (source is null)
|
|
{
|
|
throw new ArgumentNullException(nameof(source));
|
|
}
|
|
|
|
var tokens = ImmutableArray.CreateBuilder<DslToken>();
|
|
var diagnostics = ImmutableArray.CreateBuilder<PolicyIssue>();
|
|
|
|
var index = 0;
|
|
var line = 1;
|
|
var column = 1;
|
|
|
|
while (index < source.Length)
|
|
{
|
|
var current = source[index];
|
|
if (char.IsWhiteSpace(current))
|
|
{
|
|
(index, line, column) = AdvanceWhitespace(source, index, line, column);
|
|
continue;
|
|
}
|
|
|
|
if (current == '/' && index + 1 < source.Length)
|
|
{
|
|
if (source[index + 1] == '/')
|
|
{
|
|
(index, line, column) = SkipSingleLineComment(source, index + 2, line, column + 2);
|
|
continue;
|
|
}
|
|
|
|
if (source[index + 1] == '*')
|
|
{
|
|
(index, line, column) = SkipMultiLineComment(source, index + 2, line, column + 2, diagnostics);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
var startLocation = new SourceLocation(index, line, column);
|
|
switch (current)
|
|
{
|
|
case '{':
|
|
tokens.Add(CreateToken(TokenKind.LeftBrace, "{", startLocation, ref index, ref column));
|
|
break;
|
|
case '}':
|
|
tokens.Add(CreateToken(TokenKind.RightBrace, "}", startLocation, ref index, ref column));
|
|
break;
|
|
case '(':
|
|
tokens.Add(CreateToken(TokenKind.LeftParen, "(", startLocation, ref index, ref column));
|
|
break;
|
|
case ')':
|
|
tokens.Add(CreateToken(TokenKind.RightParen, ")", startLocation, ref index, ref column));
|
|
break;
|
|
case '[':
|
|
tokens.Add(CreateToken(TokenKind.LeftBracket, "[", startLocation, ref index, ref column));
|
|
break;
|
|
case ']':
|
|
tokens.Add(CreateToken(TokenKind.RightBracket, "]", startLocation, ref index, ref column));
|
|
break;
|
|
case ',':
|
|
tokens.Add(CreateToken(TokenKind.Comma, ",", startLocation, ref index, ref column));
|
|
break;
|
|
case ';':
|
|
tokens.Add(CreateToken(TokenKind.Semicolon, ";", startLocation, ref index, ref column));
|
|
break;
|
|
case ':':
|
|
{
|
|
if (Match(source, index + 1, '='))
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.Define, ":=", startLocation, ref index, ref column, advance: 2));
|
|
}
|
|
else
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.Colon, ":", startLocation, ref index, ref column));
|
|
}
|
|
|
|
break;
|
|
}
|
|
case '=':
|
|
{
|
|
if (Match(source, index + 1, '>'))
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.Arrow, "=>", startLocation, ref index, ref column, advance: 2));
|
|
}
|
|
else if (Match(source, index + 1, '='))
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.EqualEqual, "==", startLocation, ref index, ref column, advance: 2));
|
|
}
|
|
else
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.Assign, "=", startLocation, ref index, ref column));
|
|
}
|
|
|
|
break;
|
|
}
|
|
case '!':
|
|
{
|
|
if (Match(source, index + 1, '='))
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.NotEqual, "!=", startLocation, ref index, ref column, advance: 2));
|
|
}
|
|
else
|
|
{
|
|
ReportUnexpectedCharacter(diagnostics, current, startLocation);
|
|
index++;
|
|
column++;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case '<':
|
|
{
|
|
if (Match(source, index + 1, '='))
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.LessThanOrEqual, "<=", startLocation, ref index, ref column, advance: 2));
|
|
}
|
|
else
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.LessThan, "<", startLocation, ref index, ref column));
|
|
}
|
|
|
|
break;
|
|
}
|
|
case '>':
|
|
{
|
|
if (Match(source, index + 1, '='))
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.GreaterThanOrEqual, ">=", startLocation, ref index, ref column, advance: 2));
|
|
}
|
|
else
|
|
{
|
|
tokens.Add(CreateToken(TokenKind.GreaterThan, ">", startLocation, ref index, ref column));
|
|
}
|
|
|
|
break;
|
|
}
|
|
case '.':
|
|
tokens.Add(CreateToken(TokenKind.Dot, ".", startLocation, ref index, ref column));
|
|
break;
|
|
case '"':
|
|
TokenizeString(source, ref index, ref line, ref column, startLocation, tokens, diagnostics);
|
|
break;
|
|
case '+':
|
|
case '-':
|
|
{
|
|
if (index + 1 < source.Length && char.IsDigit(source[index + 1]))
|
|
{
|
|
TokenizeNumber(source, ref index, ref line, ref column, startLocation, tokens, diagnostics);
|
|
}
|
|
else
|
|
{
|
|
ReportUnexpectedCharacter(diagnostics, current, startLocation);
|
|
index++;
|
|
column++;
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
if (char.IsDigit(current))
|
|
{
|
|
TokenizeNumber(source, ref index, ref line, ref column, startLocation, tokens, diagnostics);
|
|
}
|
|
else if (IsIdentifierStart(current))
|
|
{
|
|
TokenizeIdentifierOrKeyword(source, ref index, ref line, ref column, startLocation, tokens);
|
|
}
|
|
else
|
|
{
|
|
ReportUnexpectedCharacter(diagnostics, current, startLocation);
|
|
index++;
|
|
column++;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
var eofLocation = new SourceLocation(index, line, column);
|
|
tokens.Add(new DslToken(TokenKind.EndOfFile, string.Empty, new SourceSpan(eofLocation, eofLocation)));
|
|
|
|
return new TokenizerResult(tokens.ToImmutable(), diagnostics.ToImmutable());
|
|
}
|
|
|
|
private static void TokenizeString(
|
|
string source,
|
|
ref int index,
|
|
ref int line,
|
|
ref int column,
|
|
SourceLocation start,
|
|
ImmutableArray<DslToken>.Builder tokens,
|
|
ImmutableArray<PolicyIssue>.Builder diagnostics)
|
|
{
|
|
var builder = new StringBuilder();
|
|
var i = index + 1;
|
|
var currentLine = line;
|
|
var currentColumn = column + 1;
|
|
|
|
while (i < source.Length)
|
|
{
|
|
var ch = source[i];
|
|
if (ch == '"')
|
|
{
|
|
var end = new SourceLocation(i + 1, currentLine, currentColumn + 1);
|
|
index = i + 1;
|
|
column = currentColumn + 1;
|
|
tokens.Add(new DslToken(TokenKind.StringLiteral, builder.ToString(), new SourceSpan(start, end), builder.ToString()));
|
|
return;
|
|
}
|
|
|
|
if (ch == '\\')
|
|
{
|
|
if (i + 1 >= source.Length)
|
|
{
|
|
diagnostics.Add(PolicyIssue.Error(DiagnosticCodes.UnterminatedString, "Unterminated string literal.", $"@{start.Line}:{start.Column}"));
|
|
index = source.Length;
|
|
line = currentLine;
|
|
column = currentColumn;
|
|
return;
|
|
}
|
|
|
|
var escape = source[i + 1];
|
|
switch (escape)
|
|
{
|
|
case '\\':
|
|
builder.Append('\\');
|
|
break;
|
|
case '"':
|
|
builder.Append('"');
|
|
break;
|
|
case 'n':
|
|
builder.Append('\n');
|
|
break;
|
|
case 'r':
|
|
builder.Append('\r');
|
|
break;
|
|
case 't':
|
|
builder.Append('\t');
|
|
break;
|
|
default:
|
|
diagnostics.Add(PolicyIssue.Error(DiagnosticCodes.InvalidEscapeSequence, $"Invalid escape sequence '\\{escape}'.", $"@{currentLine}:{currentColumn}"));
|
|
builder.Append(escape);
|
|
break;
|
|
}
|
|
|
|
i += 2;
|
|
currentColumn += 2;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '\r' || ch == '\n')
|
|
{
|
|
diagnostics.Add(PolicyIssue.Error(DiagnosticCodes.UnterminatedString, "Unterminated string literal.", $"@{start.Line}:{start.Column}"));
|
|
(index, line, column) = AdvanceWhitespace(source, i, currentLine, currentColumn);
|
|
return;
|
|
}
|
|
|
|
builder.Append(ch);
|
|
i++;
|
|
currentColumn++;
|
|
}
|
|
|
|
diagnostics.Add(PolicyIssue.Error(DiagnosticCodes.UnterminatedString, "Unterminated string literal.", $"@{start.Line}:{start.Column}"));
|
|
index = source.Length;
|
|
line = currentLine;
|
|
column = currentColumn;
|
|
}
|
|
|
|
private static void TokenizeNumber(
|
|
string source,
|
|
ref int index,
|
|
ref int line,
|
|
ref int column,
|
|
SourceLocation start,
|
|
ImmutableArray<DslToken>.Builder tokens,
|
|
ImmutableArray<PolicyIssue>.Builder diagnostics)
|
|
{
|
|
var i = index;
|
|
var hasDecimal = false;
|
|
|
|
if (source[i] == '+' || source[i] == '-')
|
|
{
|
|
i++;
|
|
}
|
|
|
|
while (i < source.Length)
|
|
{
|
|
var ch = source[i];
|
|
if (char.IsDigit(ch))
|
|
{
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '.')
|
|
{
|
|
if (hasDecimal)
|
|
{
|
|
break;
|
|
}
|
|
|
|
hasDecimal = true;
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
var percent = false;
|
|
if (i < source.Length && source[i] == '%')
|
|
{
|
|
percent = true;
|
|
i++;
|
|
}
|
|
|
|
var text = source.Substring(index, i - index);
|
|
if (!decimal.TryParse(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var value))
|
|
{
|
|
diagnostics.Add(PolicyIssue.Error(DiagnosticCodes.InvalidNumber, $"Invalid numeric literal '{text}'.", $"@{start.Line}:{start.Column}"));
|
|
index = i;
|
|
column += i - index;
|
|
return;
|
|
}
|
|
|
|
if (percent)
|
|
{
|
|
value /= 100m;
|
|
}
|
|
|
|
var end = new SourceLocation(i, line, column + (i - index));
|
|
tokens.Add(new DslToken(TokenKind.NumberLiteral, text, new SourceSpan(start, end), value));
|
|
column += i - index;
|
|
index = i;
|
|
}
|
|
|
|
private static void TokenizeIdentifierOrKeyword(
|
|
string source,
|
|
ref int index,
|
|
ref int line,
|
|
ref int column,
|
|
SourceLocation start,
|
|
ImmutableArray<DslToken>.Builder tokens)
|
|
{
|
|
var i = index + 1;
|
|
while (i < source.Length && IsIdentifierPart(source[i]))
|
|
{
|
|
i++;
|
|
}
|
|
|
|
var text = source.Substring(index, i - index);
|
|
var kind = GetKeywordKind(text);
|
|
|
|
if (kind == TokenKind.BooleanLiteral)
|
|
{
|
|
var value = string.Equals(text, "true", StringComparison.Ordinal);
|
|
var end = new SourceLocation(i, line, column + (i - index));
|
|
tokens.Add(new DslToken(TokenKind.BooleanLiteral, text, new SourceSpan(start, end), value));
|
|
}
|
|
else if (kind == TokenKind.Identifier)
|
|
{
|
|
var end = new SourceLocation(i, line, column + (i - index));
|
|
tokens.Add(new DslToken(TokenKind.Identifier, text, new SourceSpan(start, end)));
|
|
}
|
|
else
|
|
{
|
|
var end = new SourceLocation(i, line, column + (i - index));
|
|
tokens.Add(new DslToken(kind, text, new SourceSpan(start, end)));
|
|
}
|
|
|
|
column += i - index;
|
|
index = i;
|
|
}
|
|
|
|
private static TokenKind GetKeywordKind(string text)
|
|
{
|
|
return text switch
|
|
{
|
|
"policy" => TokenKind.KeywordPolicy,
|
|
"syntax" => TokenKind.KeywordSyntax,
|
|
"metadata" => TokenKind.KeywordMetadata,
|
|
"profile" => TokenKind.KeywordProfile,
|
|
"rule" => TokenKind.KeywordRule,
|
|
"map" => TokenKind.KeywordMap,
|
|
"source" => TokenKind.KeywordSource,
|
|
"env" => TokenKind.Identifier,
|
|
"if" => TokenKind.KeywordIf,
|
|
"then" => TokenKind.KeywordThen,
|
|
"when" => TokenKind.KeywordWhen,
|
|
"and" => TokenKind.KeywordAnd,
|
|
"or" => TokenKind.KeywordOr,
|
|
"not" => TokenKind.KeywordNot,
|
|
"priority" => TokenKind.KeywordPriority,
|
|
"else" => TokenKind.KeywordElse,
|
|
"because" => TokenKind.KeywordBecause,
|
|
"settings" => TokenKind.KeywordSettings,
|
|
"ignore" => TokenKind.KeywordIgnore,
|
|
"until" => TokenKind.KeywordUntil,
|
|
"escalate" => TokenKind.KeywordEscalate,
|
|
"to" => TokenKind.KeywordTo,
|
|
"requireVex" => TokenKind.KeywordRequireVex,
|
|
"warn" => TokenKind.KeywordWarn,
|
|
"message" => TokenKind.KeywordMessage,
|
|
"defer" => TokenKind.KeywordDefer,
|
|
"annotate" => TokenKind.KeywordAnnotate,
|
|
"in" => TokenKind.KeywordIn,
|
|
"true" => TokenKind.BooleanLiteral,
|
|
"false" => TokenKind.BooleanLiteral,
|
|
_ => TokenKind.Identifier,
|
|
};
|
|
}
|
|
|
|
private static bool IsIdentifierStart(char ch) => char.IsLetter(ch) || ch == '_';
|
|
|
|
private static bool IsIdentifierPart(char ch) => char.IsLetterOrDigit(ch) || ch == '_' || ch == '-';
|
|
|
|
private static (int Index, int Line, int Column) AdvanceWhitespace(string source, int index, int line, int column)
|
|
{
|
|
var i = index;
|
|
var currentLine = line;
|
|
var currentColumn = column;
|
|
|
|
while (i < source.Length)
|
|
{
|
|
var ch = source[i];
|
|
if (ch == '\r')
|
|
{
|
|
if (i + 1 < source.Length && source[i + 1] == '\n')
|
|
{
|
|
i += 2;
|
|
}
|
|
else
|
|
{
|
|
i++;
|
|
}
|
|
|
|
currentLine++;
|
|
currentColumn = 1;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '\n')
|
|
{
|
|
i++;
|
|
currentLine++;
|
|
currentColumn = 1;
|
|
continue;
|
|
}
|
|
|
|
if (!char.IsWhiteSpace(ch))
|
|
{
|
|
break;
|
|
}
|
|
|
|
i++;
|
|
currentColumn++;
|
|
}
|
|
|
|
return (i, currentLine, currentColumn);
|
|
}
|
|
|
|
private static (int Index, int Line, int Column) SkipSingleLineComment(string source, int index, int line, int column)
|
|
{
|
|
var i = index;
|
|
var currentLine = line;
|
|
var currentColumn = column;
|
|
|
|
while (i < source.Length)
|
|
{
|
|
var ch = source[i];
|
|
if (ch == '\r' || ch == '\n')
|
|
{
|
|
return AdvanceWhitespace(source, i, currentLine, currentColumn);
|
|
}
|
|
|
|
i++;
|
|
currentColumn++;
|
|
}
|
|
|
|
return (i, currentLine, currentColumn);
|
|
}
|
|
|
|
private static (int Index, int Line, int Column) SkipMultiLineComment(
|
|
string source,
|
|
int index,
|
|
int line,
|
|
int column,
|
|
ImmutableArray<PolicyIssue>.Builder diagnostics)
|
|
{
|
|
var i = index;
|
|
var currentLine = line;
|
|
var currentColumn = column;
|
|
|
|
while (i < source.Length)
|
|
{
|
|
var ch = source[i];
|
|
if (ch == '*' && i + 1 < source.Length && source[i + 1] == '/')
|
|
{
|
|
return (i + 2, currentLine, currentColumn + 2);
|
|
}
|
|
|
|
if (ch == '\r')
|
|
{
|
|
if (i + 1 < source.Length && source[i + 1] == '\n')
|
|
{
|
|
i += 2;
|
|
}
|
|
else
|
|
{
|
|
i++;
|
|
}
|
|
|
|
currentLine++;
|
|
currentColumn = 1;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '\n')
|
|
{
|
|
i++;
|
|
currentLine++;
|
|
currentColumn = 1;
|
|
continue;
|
|
}
|
|
|
|
i++;
|
|
currentColumn++;
|
|
}
|
|
|
|
diagnostics.Add(PolicyIssue.Error(DiagnosticCodes.UnexpectedCharacter, "Unterminated comment block.", $"@{line}:{column}"));
|
|
return (source.Length, currentLine, currentColumn);
|
|
}
|
|
|
|
private static DslToken CreateToken(
|
|
TokenKind kind,
|
|
string text,
|
|
SourceLocation start,
|
|
ref int index,
|
|
ref int column,
|
|
int advance = 1)
|
|
{
|
|
var end = new SourceLocation(index + advance, start.Line, start.Column + advance);
|
|
index += advance;
|
|
column += advance;
|
|
return new DslToken(kind, text, new SourceSpan(start, end));
|
|
}
|
|
|
|
private static void ReportUnexpectedCharacter(
|
|
ImmutableArray<PolicyIssue>.Builder diagnostics,
|
|
char ch,
|
|
SourceLocation location)
|
|
{
|
|
diagnostics.Add(PolicyIssue.Error(
|
|
DiagnosticCodes.UnexpectedCharacter,
|
|
$"Unexpected character '{ch}'.",
|
|
$"@{location.Line}:{location.Column}"));
|
|
}
|
|
|
|
private static bool Match(string source, int index, char expected) =>
|
|
index < source.Length && source[index] == expected;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Result of tokenizing a policy DSL source.
|
|
/// </summary>
|
|
public readonly record struct TokenizerResult(
|
|
ImmutableArray<DslToken> Tokens,
|
|
ImmutableArray<PolicyIssue> Diagnostics);
|