Karl,
You really sure you want to do that? Reason I ask is that I've never seen another language that supports nested comments - I don't think most SQL implementations do either.
Are you using a dynamic language? if not, it's easy... If you are, I can't help you (but somebody else will).
To handle this in a non-dynamic language, you handle it with a stack or "counter" for the multiline comment lexer routine.
For instance, let's say your lexer looks like (from SimpleLanguage example):
public MatchType GetNextTokenLexicalParseData(ITextBufferReader reader, ILexicalState lexicalState, ref ITokenLexicalParseData lexicalParseData) {
// Initialize
int tokenID = SimpleTokenID.Invalid;
// Get the next character
char ch = reader.Read();
// If the character is a letter or digit...
if ((Char.IsLetter(ch) || (ch == '_'))) {
// Parse the identifier
tokenID = this.ParseIdentifier(reader, ch);
}
else if ((ch != '\n') && (Char.IsWhiteSpace(ch))) {
while ((reader.Peek() != '\n') && (Char.IsWhiteSpace(reader.Peek())))
reader.Read();
tokenID = SimpleTokenID.Whitespace;
}
else {
tokenID = SimpleTokenID.Invalid;
switch (ch) {
case ',':
tokenID = SimpleTokenID.Comma;
break;
case '(':
tokenID = SimpleTokenID.OpenParenthesis;
break;
case ')':
tokenID = SimpleTokenID.CloseParenthesis;
break;
case ';':
tokenID = SimpleTokenID.SemiColon;
break;
case '\n':
// Line terminator
tokenID = SimpleTokenID.LineTerminator;
break;
case '{':
tokenID = SimpleTokenID.OpenCurlyBrace;
break;
case '}':
tokenID = SimpleTokenID.CloseCurlyBrace;
break;
case '/':
tokenID = SimpleTokenID.Division;
switch (reader.Peek()) {
case '/':
// Parse a single-line comment
tokenID = this.ParseSingleLineComment(reader);
break;
case '*':
// Parse a multi-line comment
tokenID = this.ParseMultiLineComment(reader);
break;
}
break;
case '=':
if (reader.Peek() == '=') {
reader.Read();
tokenID = SimpleTokenID.Equality;
}
else
tokenID = SimpleTokenID.Assignment;
break;
case '!':
if (reader.Peek() == '=') {
reader.Read();
tokenID = SimpleTokenID.Inequality;
}
break;
case '+':
tokenID = SimpleTokenID.Addition;
break;
case '-':
tokenID = SimpleTokenID.Subtraction;
break;
case '*':
tokenID = SimpleTokenID.Multiplication;
break;
default:
if ((ch >= '0') && (ch <= '9')) {
// Parse the number
tokenID = this.ParseNumber(reader, ch);
}
break;
}
}
if (tokenID != SimpleTokenID.Invalid) {
lexicalParseData = new LexicalStateAndIDTokenLexicalParseData(lexicalState, (byte)tokenID);
return MatchType.ExactMatch;
}
else {
reader.ReadReverse();
return MatchType.NoMatch;
}
}
with the "this.ParseMultiLineComment(ITextBufferReader reader)" implemented as
protected virtual int ParseMultiLineComment(ITextBufferReader reader) {
reader.Read();
while (reader.Offset < reader.Length) {
if (reader.Peek() == '*') {
if (reader.Offset + 1 < reader.Length) {
if (reader.Peek(2) == '/') {
reader.Read();
reader.Read();
break;
}
}
else {
reader.Read();
break;
}
}
reader.Read();
}
return SimpleTokenID.MultiLineComment;
}
You can change your lexer to handle it by making ParseMultiLineComment a bit smarter...
protected virtual int ParseMultiLineComment(ITextBufferReader reader) {
// keep track of depth...
int depth = 1;
// consume the opening *
reader.Read();
while (!reader.IsAtEnd)
{
char ch = reader.Peek();
if (ch == '/')
{
// always consume the char (we need progress in any case)
reader.Read();
// don't read past EOF (assume they haven't finished the comment yet)
if (reader.IsAtEnd)
return SimpleTokenID.MultiLineComment;
// look for another nested comment
if (reader.Peek() == '*')
{
// consume the *
reader.Read();
// we're one deeper now.
depth++;
}
}
else if (ch == '*')
{
// always consume the char (we need progress in any case)
reader.Read();
// don't read past EOF (assume they haven't finished the comment yet)
if (reader.IsAtEnd)
return SimpleTokenID.MultiLineComment;
// look for a close comment
if (reader.Peek() == '/')
{
// consume the '/'
reader.Read();
// we're one shallower now.
depth--;
// if we are back to zero, we've read the entire multiline nested comment.
if (depth == 0)
return SimpleTokenID.MultiLineComment;
}
}
else
reader.Read();
}
return SimpleTokenID.MultiLineComment;
}
This code is untested, but I'm pretty sure it'll work properly... It may need a few simple changes though - feel free to ask if you run into trouble with it.
[Modified at 03/09/2007 03:36 PM]
Kelly Leahy
Software Architect
Milliman, USA