1. Michael Tindal
  2. Silver

Source

Silver / Classes / Parser / silver.rl

#import "SVParserContext.h"
#import "SVToken.h"
#import "silver_parser.h"
#import "SVObjcExtensions.h"

NSMutableString* string_buf;
#define SB string_buf = [NSMutableString string]
NSMutableString* delimiter;
BOOL delimiterFound;
NSString* delimiterChar;
#define DL delimiter = [NSMutableString string]
#define AP [string_buf appendCharacter:*ts]
#define DC delimiterChar = [NSString stringWithFormat:@"%c",*(ts+2)]

__strong void * mallocAllocatable(size_t size);

void *ParseAlloc(void *(*mallocProc)(size_t));
void ParseFree(void *p,void (*freeProc)(void*));
void Parse(void *yyp,int yymajor,SVToken* yyminor,SVParserContext* context);

BOOL __sv_delimiter_char_matched(NSString *,NSString *);

%%{
    
    machine silver_lexer;

    newline = '\n' @{context.currentLine++;};
    any_count_line = any | newline @{ context.currentLine++; };

    c_comment := any_count_line* :>> '*/' @{fgoto main;};

    attach_comment := |*
        '*/' { Parse(context.parser,TK_ATTACHCOMMENT,CFRetain([SVToken token:TK_ATTACHCOMMENT data:[string_buf copy]]),context); fgoto main; };
        
        any { AP; };
    *|;
        
    heredoc := |*
        space       { if(!delimiterFound) { delimiterFound = YES; } else { [string_buf appendCharacter:*ts]; } };
        
        any         { 
                        if(!delimiterFound) { 
                            [delimiter appendCharacter:*ts]; 
                        } else { 
                            [string_buf appendCharacter:*ts];  
                            if([string_buf hasSuffix:delimiter]) {
                                Parse(context.parser,TK_STRING,CFRetain([SVToken token:TK_STRING data:[string_buf stringByReplacingOccurrencesOfString:delimiter withString:@""]]),context);
                                fgoto main;
                            }
                        }
                    };
    *|;

    stringarray := |*
        any         { 
                        AP; 
                        if(__sv_delimiter_char_matched(string_buf,delimiterChar)) {
                            Parse(context.parser,TK_STRINGARRAY,CFRetain([SVToken token:TK_STRINGARRAY data:[string_buf substringToIndex:[string_buf length]-1]]),context);
                            fgoto main;
                        }
                    };
    *|;

    string := |*
        any         { 
                        AP; 
                        if(__sv_delimiter_char_matched(string_buf,delimiterChar)) {
                            Parse(context.parser,TK_STRING,CFRetain([SVToken token:TK_STRINGARRAY data:[string_buf substringToIndex:[string_buf length]-1]]),context);
                            fgoto main;
                        }
                    };
    *|;

    regex := |*
        any         { 
                        AP; 
                        if(__sv_delimiter_char_matched(string_buf,delimiterChar)) {
                            Parse(context.parser,TK_STRING,CFRetain([SVToken token:TK_STRINGARRAY data:[string_buf substringToIndex:[string_buf length]-1]]),context);
                            fgoto main;
                        }
                    };
    *|;

    selector := |*
        ')'         { Parse(context.parser,TK_SELECTOR,CFRetain([SVToken token:TK_SELECTOR data:[string_buf copy]]),context); fgoto main; };

        any         { AP; };
    *|;

    main := |*

    alnum_u = alnum | '_' | '@' | '?' | '!';
    alpha_u = alpha | '_' | '@' | '$';

    # more sane EOL comments, and the only ones we need 
    '#' [^\n]* newline { context.currentLine++; };
    '//' [^\n]* newline { context.currentLine++; };
    '/*' { fgoto c_comment; };
    '//*' { SB; fgoto attach_comment; };

    # Keywords
    'class'         { KW(CLASS)     };
    'sync'          { KW(SYNC)      };
    'throw'         { KW(THROW)     };
    'set'           { KW(SET)       };
    'for'           { KW(FOR)       };
    'in'            { KW(IN)        };
    'if'            { KW(IF)        };
    'unless'        { KW(UNLESS)    };
    'else'          { KW(ELSE)      };
    'ensure'        { KW(ENSURE)    };
    'do'            { KW(DO)        };
    'begin'         { KW(BEGIN)     };
    'while'         { KW(WHILE)     };
    'until'         { KW(UNTIL)     };
    'def'           { KW(DEF)       };
    'self'          { KW(SELF)      };
    'super'         { KW(SUPER)     };
    'yield'         { KW(YIELD)     };
    'nil'           { KW(NIL)       };    
    'const'         { KW(CONST)     };
    'return'        { KW(RETURN)    };
    'break'         { KW(BREAK)     };
    'continue'      { KW(CONTINUE)  };
    'switch'        { KW(SWITCH)    };
    'case'          { KW(CASE)      };
    'default'       { KW(DEFAULT)   };
    'rescue'        { KW(RESCUE)    };
    'true'          { KW(TRUE)      };
    'false'         { KW(FALSE)     };
    'and'           { KW(TAND)      };
    'or'            { KW(TOR)       };
    'not'           { KW(TNOT)      };
    'defined?'      { KW(DEFINED)   };
    'undef_method'  { KW(UNDEF)     };
    'remove_method' { KW(REMOVE)    };
    'alias'         { KW(ALIAS)     };
    '$:'            { KW(CONTEXT)   };
    '$#'            { KW(EXCEPTION) };
    'protocol'      { KW(PROTOCOL)  };
    'req'           { KW(REQ)       };
    'opt'           { KW(OPT)       };
    'require'       { KW(REQUIRE)   };
    'module'        { KW(MODULE)    };
    'include'       { KW(INCLUDE)   };

    # symbols
    '|>'             { SYM(FORWARDPIPE) };
    '<|'             { SYM(BACKWARDPIPE) };
    '->'            { SYM(LAMBDA)   };
    ':'             { SYM(COLON)    };
    '::'            { SYM(DCOLON)   };
    ':='            { SYM(REPF)     };
    ';'             { SYM(SEMICOLON) };
    '#'             { SYM(POUND) };
    '${'            { SYM(REP) };
    '['             { SYM(LBRACKET) };
    ']'             { SYM(RBRACKET) };
    '{'             { SYM(LBRACE)   };
    '}'             { SYM(RBRACE)   };
    '('             { SYM(LPAREN)   };
    ')'             { SYM(RPAREN)   };
    ','             { context.inMRHS ? SYM_(RCOMMA) : SYM(COMMA)    };
    '='             { context.inMRHS = YES; SYM(ASSIGN)   };
    '+='            { SYM(ADDASSIGN) };
    '-='            { SYM(SUBASSIGN) };
    '*='            { SYM(MULASSIGN) };
    '/='            { SYM(DIVASSIGN) };
    '%='            { SYM(MODASSIGN) };
    '**='           { SYM(EXPASSIGN) };
    '<<='           { SYM(SHLASSIGN) };
    '>>='           { SYM(SHRASSIGN) };
    '&='            { SYM(ANDASSIGN) };
    '|='            { SYM(ORASSIGN) };
    '^='            { SYM(XORASSIGN) };
    '||='           { SYM(CONDASSIGNO)};
    '&&='           { SYM(CONDASSIGNA)};
    '.'             { SYM(DOT)      };
    '..'            { SYM(IRANGE)   };
    '...'           { SYM(ERANGE)   };
    '=>'            { SYM(HASH)     };
    '?'             { SYM(QUESTION) };
    '%%'            { SYM(PERCENT)  };
    '=~'            { SYM(REGEXMATCH) };
    '!~'            { SYM(REGEXNOMATCH) };
    '+@'            { SYM(UPLUS) };
    '-@'            { SYM(UMINUS) };
    '=='            { SYM(EQUAL)    };
    '!='            { SYM(NOTEQUAL) };
    '==='           { SYM(WHENCOMP)  };
    '!=='           { SYM(WHENNOT) };
    '<=>'           { SYM(COMPARE)  };
    '<'             { SYM(LESSTHAN) };
    '<='            { SYM(LESSTHANEQUAL) };
    '>'             { SYM(GREATERTHAN) };
    '>='            { SYM(GREATERTHANEQUAL) };
    '&&'            { SYM(LOGICALAND) };
    '||'            { SYM(LOGICALOR) };
    '(+)'           { SYM(LOGICALXOR) };
    '!'             { SYM(NOT)      };
    '+'             { SYM(PLUS)     };
    '-'             { SYM(MINUS)    };
    '*'             { SYM(MULTIPLY) };
    '/'             { SYM(DIVIDE)   };
    '%'             { SYM(MODULUS)  };
    '**'             { SYM(EXPONENT) };
    '<<'            { SYM(SHIFTLEFT)};
    '>>'            { SYM(SHIFTRIGHT) };
    '&'             { SYM(BITWISEAND) };
    '|'             { SYM(BITWISEOR) };
    '^'           { SYM(BITWISEXOR) };
    '~'             { SYM(BITWISEINVERSE) };
    '[]'            { SYM(BRACKETS) };
    '[]='            { SYM(BRACKETSASSIGN) };

    # identifiers for things like variables and such
    alpha_u alnum_u* { TOK(IDENTIFIER) };

    '<|' . alpha_u alnum_u* . '|>' { RI() };
    
    # single quoted string
    sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
    '\'' . sliteralChar* . '\'' { STRING() };

    # double quoted string
    dliteralChar = [^"\\] | newline | ( '\\' . any_count_line );
    '"' . dliteralChar* . '"' { STRING() };

    # heredoc
    '<<-' { delimiterFound = NO; DL; SB; fgoto heredoc; };

    # special characters
    '%w' . any { SB; DC; fgoto stringarray; };
    ('%q' | '%Q' | '%x') . any { SB; DC; fgoto string; };
    '%r' . any { SB; DC; fgoto regex; };

    # regular expression string
    '%/' . [^/]* . '/' { REGEX() };

    ('+' | '-' | '=' | '!' | '~' | '<' | '>' | '&' | '|' | '^' | '%' | '*' | '/' | '@' | '?' )+ { OP() };

    '@selector(' { SB; fgoto selector; };

    newline { SYM(NEWLINE) };

    # Whitespace is standard ws, newlines and control codes.
    any_count_line - 0x21..0x7e;

    # integers, floats, and hex encoded values
    digit+ { NUM() };
    digit+ '.' digit+ { NUM() };
    '0x' xdigit+ { HEX() };
    *|;

}%%

%% write data nofinal;

void ParseTrace(FILE*,char*);

int compile(SVParserContext* context, NSString* source)
{
    int cs, act;
    const char *ts = 0, *te = 0;
    const char *p = [source UTF8String];
    context.parser = ParseAlloc((void*(*)(size_t))mallocAllocatable);
    //ParseTrace(stdout,"");

    %% write init;

    const char *pe = p+[source length];
    const char *eof = pe;

    %% write exec;

    if(cs == silver_lexer_error) {
        NSLog(@"Lexer error");
        context.errors++;
    } else
        Parse(context.parser, 0, nil, context);

    context.parser = NULL;

    return context.errors > 0 ? 1 : 0;
}