bb2bbc6b91
git-svn-id: svn://kolibrios.org@4364 a494cfbc-eb01-0410-851d-a64ba20cac60
67 lines
2.5 KiB
Plaintext
67 lines
2.5 KiB
Plaintext
Production rules for lexical tokens
|
|
===================================
|
|
|
|
This file provides a complete set of production rules for the tokens generated
|
|
by the lexer. In case of ambiguity, the longest match wins.
|
|
|
|
Components
|
|
----------
|
|
|
|
ident ::= '-'? nmstart nmchar*
|
|
name ::= nmchar+
|
|
nmstart ::= [a-zA-Z] | '_' | nonascii | escape
|
|
nonascii ::= [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
|
|
unicode ::= '\' [0-9a-fA-F]{1,6} wc?
|
|
escape ::= unicode | '\' [^\n\r\f0-9a-fA-F]
|
|
nmchar ::= [a-zA-Z0-9] | '-' | '_' | nonascii | escape
|
|
num ::= [-+]? ([0-9]+ | [0-9]* '.' [0-9]+)
|
|
string ::= '"' (stringchar | "'")* '"' | "'" (stringchar | '"')* "'"
|
|
stringchar ::= urlchar | #x20 | #x29 | '\' nl
|
|
urlchar ::= [#x9#x21#x23-#x26#x28#x2A-#x7E] | nonascii | escape
|
|
nl ::= #xA | #xD #xA | #xD | #xC
|
|
w ::= wc*
|
|
wc ::= #x9 | #xA | #xC | #xD | #x20
|
|
|
|
Tokens
|
|
------
|
|
|
|
IDENT ::= ident
|
|
ATKEYWORD ::= '@' ident
|
|
STRING ::= string
|
|
INVALID_STRING ::= '"' (stringchar | "'")* [^"] | "'" (stringchar | '"')* [^']
|
|
HASH ::= '#' name
|
|
NUMBER ::= num
|
|
PERCENTAGE ::= num '%'
|
|
DIMENSION ::= num ident
|
|
URI ::= "url(" w (string | urlchar*) w ')'
|
|
UNICODE-RANGE ::= [Uu] '+' [0-9a-fA-F?]{1,6} ('-' [0-9a-fA-F]{1,6})?
|
|
CDO ::= "<!--"
|
|
CDC ::= "-->"
|
|
S ::= wc+
|
|
COMMENT ::= "/*" [^*]* '*'+ ([^/] [^*]* '*'+) '/'
|
|
FUNCTION ::= ident '('
|
|
INCLUDES ::= "~="
|
|
DASHMATCH ::= "|="
|
|
PREFIXMATCH ::= "^="
|
|
SUFFIXMATCH ::= "$="
|
|
SUBSTRINGMATCH ::= "*="
|
|
CHAR ::= any other character, except " or '
|
|
|
|
Differences from the CSS3 Syntax module specification
|
|
-----------------------------------------------------
|
|
|
|
1) UNICODE-RANGE is case insensitive (it's uppercase only in the spec)
|
|
2) escape follows CSS2.1. CSS3 defines it as:
|
|
escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
|
|
3) urlchar omits ' and ):
|
|
a) If ' is permitted verbatim then, as stringchar inherits from urlchar,
|
|
single quoted strings may contain verbatim single quotes. This is
|
|
clearly nonsense.
|
|
b) If ) is permitted verbatim then it becomes impossible to determine the
|
|
true end of URI. Thus, for sanity's sake, it's omitted here.
|
|
4) stringchar explicitly includes ). See 3(b) for why it won't inherit it
|
|
from urlchar as the spec implies.
|
|
5) BOM ::= #xFEFF is omitted. It is assumed that any leading BOM will be
|
|
stripped from the document before lexing occurs.
|
|
|