forked from KolibriOS/kolibrios
67 lines
2.5 KiB
Plaintext
67 lines
2.5 KiB
Plaintext
|
Production rules for lexical tokens
|
||
|
===================================
|
||
|
|
||
|
This file provides a complete set of production rules for the tokens generated
|
||
|
by the lexer. In case of ambiguity, the longest match wins.
|
||
|
|
||
|
Components
|
||
|
----------
|
||
|
|
||
|
ident ::= '-'? nmstart nmchar*
|
||
|
name ::= nmchar+
|
||
|
nmstart ::= [a-zA-Z] | '_' | nonascii | escape
|
||
|
nonascii ::= [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
|
||
|
unicode ::= '\' [0-9a-fA-F]{1,6} wc?
|
||
|
escape ::= unicode | '\' [^\n\r\f0-9a-fA-F]
|
||
|
nmchar ::= [a-zA-Z0-9] | '-' | '_' | nonascii | escape
|
||
|
num ::= [-+]? ([0-9]+ | [0-9]* '.' [0-9]+)
|
||
|
string ::= '"' (stringchar | "'")* '"' | "'" (stringchar | '"')* "'"
|
||
|
stringchar ::= urlchar | #x20 | #x29 | '\' nl
|
||
|
urlchar ::= [#x9#x21#x23-#x26#x28#x2A-#x7E] | nonascii | escape
|
||
|
nl ::= #xA | #xD #xA | #xD | #xC
|
||
|
w ::= wc*
|
||
|
wc ::= #x9 | #xA | #xC | #xD | #x20
|
||
|
|
||
|
Tokens
|
||
|
------
|
||
|
|
||
|
IDENT ::= ident
|
||
|
ATKEYWORD ::= '@' ident
|
||
|
STRING ::= string
|
||
|
INVALID_STRING ::= '"' (stringchar | "'")* [^"] | "'" (stringchar | '"')* [^']
|
||
|
HASH ::= '#' name
|
||
|
NUMBER ::= num
|
||
|
PERCENTAGE ::= num '%'
|
||
|
DIMENSION ::= num ident
|
||
|
URI ::= "url(" w (string | urlchar*) w ')'
|
||
|
UNICODE-RANGE ::= [Uu] '+' [0-9a-fA-F?]{1,6} ('-' [0-9a-fA-F]{1,6})?
|
||
|
CDO ::= "<!--"
|
||
|
CDC ::= "-->"
|
||
|
S ::= wc+
|
||
|
COMMENT ::= "/*" [^*]* '*'+ ([^/] [^*]* '*'+) '/'
|
||
|
FUNCTION ::= ident '('
|
||
|
INCLUDES ::= "~="
|
||
|
DASHMATCH ::= "|="
|
||
|
PREFIXMATCH ::= "^="
|
||
|
SUFFIXMATCH ::= "$="
|
||
|
SUBSTRINGMATCH ::= "*="
|
||
|
CHAR ::= any other character, except " or '
|
||
|
|
||
|
Differences from the CSS3 Syntax module specification
|
||
|
-----------------------------------------------------
|
||
|
|
||
|
1) UNICODE-RANGE is case insensitive (it's uppercase only in the spec)
|
||
|
2) escape follows CSS2.1. CSS3 defines it as:
|
||
|
escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
|
||
|
3) urlchar omits ' and ):
|
||
|
a) If ' is permitted verbatim then, as stringchar inherits from urlchar,
|
||
|
single quoted strings may contain verbatim single quotes. This is
|
||
|
clearly nonsense.
|
||
|
b) If ) is permitted verbatim then it becomes impossible to determine the
|
||
|
true end of URI. Thus, for sanity's sake, it's omitted here.
|
||
|
4) stringchar explicitly includes ). See 3(b) for why it won't inherit it
|
||
|
from urlchar as the spec implies.
|
||
|
5) BOM ::= #xFEFF is omitted. It is assumed that any leading BOM will be
|
||
|
stripped from the document before lexing occurs.
|
||
|
|