Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Meta Syntax

This is a fully functional meta-syntax that can bootstrap the parser generator. The non-terminals action and eof are handwritten. The former requires embedding a foreign language, so it needs to maintain a brace counter, which is non-standard.

{
    use crate::ast::*;
    #[allow(unused_imports)]
    use crate::builder::common::s2c;
}

peg grammar -> { Grammar }:
    / import=action? $ callables=(callable=callable $)* #eof {
        Grammar { import, callables }
    }
    ;

peg callable -> { Callable }:
    / decorator=decorator? prefix=PREFIX name=#NAME #'->' ty=#action #':' rule=#rule #';' {
        Callable::Rule(decorator, prefix, name, ty, rule)
    }
    / decorator=decorator? name=NAME #':' regex=#union #';' {
        Callable::Regex(decorator, name, regex)
    }
    / decorator=decorator #'{' shared=#(name=NAME #':' regex=#union #';')+ #'}' {
        Callable::Shared(decorator, shared)
    }
    ;

peg decorator -> { Decorator }:
    / '@' #'(' first=#tag more=(',' tag=#tag)* #')' { Decorator { first, more } }
    ;

peg tag -> { Tag }:
    / 'memo' { Tag::Memo }
    / 'left' { Tag::Left }
    / 'intern' { Tag::Intern }
    / 'ws' { Tag::Whitespace }
    ;

peg rule -> { Rule }:
    / '/'? first=#alter more=('/' alter=#alter)* {
        Rule { first, more }
    }
    ;

peg alter -> { Alter }:
    / assignments=#assignment+ action=action? { Alter { assignments, action } }
    ;

peg assignment -> { Assignment }:
    / name=NAME '=' item=#item { Assignment::Named(name, item) }
    / lookahead=lookahead { Assignment::Lookahead(lookahead) }
    / item=item { Assignment::Anonymous(item) }
    / '$' { Assignment::Clean }
    ;

peg lookahead -> { Lookahead }:
    / '&' atom=#atom { Lookahead::Positive(atom) }
    / '!' atom=#atom { Lookahead::Negative(atom) }
    ;

peg item -> { Item }:
    / atom=atom '?' { Item::Optional(atom) }
    / atom=atom '*' { Item::ZeroOrMore(atom) }
    / eager='#'? atom=atom '+' { Item::OnceOrMore(eager.is_some(), atom) }
    / eager='#'? atom=atom { Item::Name(eager.is_some(), atom) }
    ;

peg atom -> { Atom }:
    / '(' rule=#rule #')' { Atom::Nested(rule) }
    / expect=EXPECT { Atom::Expect(expect) }
    / name=NAME { Atom::Name(name) }
    ;

peg union -> { Regex }:
    / lhs=union '|' rhs=#concat { Regex::Union(lhs.into(), rhs.into()) }
    / concat=concat { concat }
    ;

peg concat -> { Regex }:
    / lhs=concat rhs=repeat { Regex::Concat(lhs.into(), rhs.into()) }
    / repeat=repeat { repeat }
    ;

peg repeat -> { Regex }:
    / inner=repeat '+' { Regex::OnceOrMore(inner.into()) }
    / inner=repeat '*' { Regex::ZeroOrMore(inner.into()) }
    / primary=primary { Regex::Primary(primary) }
    ;

peg primary -> { Primary }:
    / '(' regex=#union #')' { Primary::Parentheses(regex.into()) }
    / string=STRING { Primary::Literal(string) }
    / name=NAME { Primary::Name(name) }
    / set=SET { set }
    ;

lex PREFIX -> { Prefix }:
    / 'peg' !TAIL { Prefix::Peg }
    / 'lex' !TAIL { Prefix::Lex }
    ;

lex SET -> { Primary }:
    / '[' '^' set=#RANGE+ #']' { Primary::Exclude(set) }
    / '[' set=#RANGE+ #']' { Primary::Include(set) }
    ;

lex EXPECT -> { Expect }:
    / '\'' expect=#SQC+ #'\'' { {
        let expect = expect
            .iter()
            .map(|c| s2c(x.intern.get(c).unwrap()))
            .collect::<String>();
        let id = x.intern.id(expect.as_str());
        Expect::Once(id)
    } }
    / '"' expect=#DQC+ #'"' { {
        let expect = expect
            .iter()
            .map(|c| s2c(x.intern.get(c).unwrap()))
            .collect::<String>();
        let id = x.intern.id(expect.as_str());
        Expect::Keyword(id)
    } }
    ;

lex STRING -> { Vec<usize> }:
    / '\'' string=#SQC+ #'\'' { string }
    ;

lex RANGE -> { (usize, usize) }:
    / start=BC '-' end=#BC { (start, end) }
    / ch=BC { (ch, ch) }
    ;

@(intern, memo)
NAME: [a-zA-Z_] TAIL* ;
TAIL: [a-zA-Z0-9_] ;

@(intern) {
    DQC: UNICODE | ESCAPE | [^\"\\] ;
    SQC: UNICODE | ESCAPE | [^\'\\] ;
    BC: UNICODE | ESCAPE | [^\]\\] ;
}

@(ws) {
    WS: [\u{20}\n\t\r]+ ;
    COMMENT: '//' [^\n]* ;
}

UNICODE: '\\' 'u' '{' [0-9a-f]* '}' ;
ESCAPE: '\\' [\'\"\[\]ntr\\] ;