
Hi,
I am trying to parse a language that allows strings with embedded quotes represented by C-style escapes:
"Embedded \"quote\""
Largely by pattern matching, I have created a programmable lexer with the following code:
// Initialize the PrimaryString lexical state.
lexicalState = lexicalStates["PrimaryString"];
lexicalState.DefaultClassificationType = classificationTypeProvider.String;
lexicalState.DefaultTokenId = NXSLTokenId.StringText;
lexicalState.DefaultTokenKey = "StringText";
DynamicLexicalScope lexicalScope = new DynamicLexicalScope();
lexicalState.LexicalScopes.Add(lexicalScope);
lexicalPatternGroup = new DynamicLexicalPatternGroup(DynamicLexicalPatternType.Explicit, "StringStartDelimiter", null)
{
TokenId = NXSLTokenId.StringStartDelimiter
};
lexicalPatternGroup.Patterns.Add(new DynamicLexicalPattern("\""));
lexicalPatternGroup.Patterns.Add(new DynamicLexicalPattern("#\""));
lexicalScope.StartLexicalPatternGroup = lexicalPatternGroup;
lexicalPatternGroup = new DynamicLexicalPatternGroup(DynamicLexicalPatternType.Explicit, "StringEndDelimiter", null)
{
TokenId = NXSLTokenId.StringEndDelimiter
};
lexicalPatternGroup.Patterns.Add(new DynamicLexicalPattern("\""));
lexicalScope.EndLexicalPatternGroup = lexicalPatternGroup;
lexicalPatternGroup = new DynamicLexicalPatternGroup(DynamicLexicalPatternType.Regex, "StringLineTerminator", null)
{
TokenId = NXSLTokenId.StringLineTerminator
};
lexicalPatternGroup.Patterns.Add(new DynamicLexicalPattern("\\n"));
lexicalState.LexicalPatternGroups.Add(lexicalPatternGroup);
lexicalPatternGroup = new DynamicLexicalPatternGroup(DynamicLexicalPatternType.Explicit, "StringEscapedDelimiter", null)
{
TokenId = NXSLTokenId.StringEscapedDelimiter
};
lexicalPatternGroup.Patterns.Add(new DynamicLexicalPattern("\\\""));
lexicalState.LexicalPatternGroups.Add(lexicalPatternGroup);
lexicalPatternGroup = new DynamicLexicalPatternGroup(DynamicLexicalPatternType.Regex, "StringText", null)
{
TokenId = NXSLTokenId.StringText
};
lexicalPatternGroup.Patterns.Add(new DynamicLexicalPattern("[^\\\"\\\\\\n]+"));
lexicalState.LexicalPatternGroups.Add(lexicalPatternGroup);
It is quite possible that my mistake is in the grammar, which looks like this:
var @openQuote = new Terminal(NXSLTokenId.StringStartDelimiter, "OpenQuote") { ErrorAlias = "'\"'" };
var @string = new Terminal(NXSLTokenId.StringText, "StringText");
var @closeQuote = new Terminal(NXSLTokenId.StringEndDelimiter, "CloseQuote") { ErrorAlias = "'\"'" };
primaryExpression.Production = idExpression
| builtinExpression
| parenExpression
| curlyExpression
| @openQuote + @string.Optional() + @closeQuote
| @real
| @integer
| @false
| @true;
Unfortunately, I get an error every time I use a \" in a string. The error is '"': expected.
I would greatly appreciate any suggestions regarding possible misuses of the grammar and/or lexer.
Thanks in advance,
Scott Haney