Here's the grammar:
class XmlGrammar = ExecutableGrammar(
"Xml 1.0 grammar with namespaces"
|
openAB = char: $<.
closeAB = char: $>.
amp = char: $&.
semicolon = char: $; .
slash = char: $/.
topenAB = tokenFromChar: $<.
tcloseAB = tokenFromChar: $>.
tslash = tokenFromChar: $/.
comment = openAB , (char: $-),(char: $-),
((charExceptFor: $-) | ((char: $-), (charExceptFor: $-))) plus,
(char: $-),(char: $-),closeAB .
letter = (charBetween: $a and: $z) | (charBetween: $A and: $Z).
digit = charBetween: $0 and: $9.
colon = char: $:.
quote = (char:$') .
dquote = (char:$") .
eq = tokenFromChar: $= .
VersionNum = (char:$1), (char: $.) , (charBetween: $0 and: $9) plus.
VersionInfo = (tokenFromSymbol: #version), eq, ((quote, VersionNum,quote) | (dquote, VersionNum, dquote )).
EncName = letter, (letter | digit) star.
EncodingDecl = (tokenFromSymbol: #enconding) , eq , ((quote, EncName ,quote) | (dquote , EncName , dquote )).
yesNo = (tokenFromSymbol: #yes) | (tokenFromSymbol: #no).
SDDecl = (tokenFromSymbol: #standalone), eq, ((quote, yesNo ,quote) | (dquote , yesNo , dquote )).
XMLDecl = (char: $<) , (char: $?) ,(tokenFromSymbol: #xml), VersionInfo , EncodingDecl opt, SDDecl opt,
(tokenFromChar: $?), (char: $>).
dprolog = XMLDecl.
NameStartChar = letter | (char: $_) .
NameChar = NameStartChar | (char: $-) | (char: $.) | digit.
Name = NameStartChar, NameChar star.
QName = (Name, colon, Name)| Name.
TQName = tokenFor: QName.
EntityRef = amp, Name ,semicolon .
CharRef = amp, (char: $#), (((char:$x), (digit | letter) plus) | (digit plus)) ,semicolon .
Reference = EntityRef | CharRef.
AttributeContent1 = (charExceptForCharIn: {$< . $". $&. }) | Reference.
AttributeValue = (dquote, AttributeContent1 star,dquote) |
(quote, AttributeContent1 star,quote).
Attribute = TQName ,eq, AttributeValue.
EmptyElemTag = topenAB ,QName, Attribute star, tslash , closeAB .
STag = topenAB ,QName, Attribute star, tcloseAB .
ETag = topenAB ,slash,QName, closeAB .
CDStart = topenAB ,(char: $!),(char:$[),(tokenFromSymbol: #CDATA),(char:$[).
CDEnd = (char: $]),(char: $]),(char: $>).
CDSect = CDStart, (charExceptFor: $]) star , CDEnd.
CharData = tokenFor: ((charExceptForCharIn: {$&. $<}) plus).
content = CharData opt, ((element | Reference | CDSect), CharData opt) star.
ComplexElement = STag, content , ETag.
element = EmptyElemTag | ComplexElement .
|
)
...
Code for this experiment can be found here.