1698 lines
44 KiB
JavaScript
1698 lines
44 KiB
JavaScript
;(function (sax) {
|
|
// wrapper for non-node envs
|
|
sax.parser = function (strict, opt) {
|
|
return new SAXParser(strict, opt)
|
|
}
|
|
sax.SAXParser = SAXParser
|
|
sax.SAXStream = SAXStream
|
|
sax.createStream = createStream
|
|
|
|
// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
|
|
// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
|
|
// since that's the earliest that a buffer overrun could occur. This way, checks are
|
|
// as rare as required, but as often as necessary to ensure never crossing this bound.
|
|
// Furthermore, buffers are only tested at most once per write(), so passing a very
|
|
// large string into write() might have undesirable effects, but this is manageable by
|
|
// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
|
|
// edge case, result in creating at most one complete copy of the string passed in.
|
|
// Set to Infinity to have unlimited buffers.
|
|
sax.MAX_BUFFER_LENGTH = 64 * 1024
|
|
|
|
var buffers = [
|
|
'comment',
|
|
'sgmlDecl',
|
|
'textNode',
|
|
'tagName',
|
|
'doctype',
|
|
'procInstName',
|
|
'procInstBody',
|
|
'entity',
|
|
'attribName',
|
|
'attribValue',
|
|
'cdata',
|
|
'script',
|
|
]
|
|
|
|
sax.EVENTS = [
|
|
'text',
|
|
'processinginstruction',
|
|
'sgmldeclaration',
|
|
'doctype',
|
|
'comment',
|
|
'opentagstart',
|
|
'attribute',
|
|
'opentag',
|
|
'closetag',
|
|
'opencdata',
|
|
'cdata',
|
|
'closecdata',
|
|
'error',
|
|
'end',
|
|
'ready',
|
|
'script',
|
|
'opennamespace',
|
|
'closenamespace',
|
|
]
|
|
|
|
function SAXParser(strict, opt) {
|
|
if (!(this instanceof SAXParser)) {
|
|
return new SAXParser(strict, opt)
|
|
}
|
|
|
|
var parser = this
|
|
clearBuffers(parser)
|
|
parser.q = parser.c = ''
|
|
parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
|
|
parser.opt = opt || {}
|
|
parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
|
|
parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
|
|
parser.tags = []
|
|
parser.closed = parser.closedRoot = parser.sawRoot = false
|
|
parser.tag = parser.error = null
|
|
parser.strict = !!strict
|
|
parser.noscript = !!(strict || parser.opt.noscript)
|
|
parser.state = S.BEGIN
|
|
parser.strictEntities = parser.opt.strictEntities
|
|
parser.ENTITIES =
|
|
parser.strictEntities ?
|
|
Object.create(sax.XML_ENTITIES)
|
|
: Object.create(sax.ENTITIES)
|
|
parser.attribList = []
|
|
|
|
// namespaces form a prototype chain.
|
|
// it always points at the current tag,
|
|
// which protos to its parent tag.
|
|
if (parser.opt.xmlns) {
|
|
parser.ns = Object.create(rootNS)
|
|
}
|
|
|
|
// disallow unquoted attribute values if not otherwise configured
|
|
// and strict mode is true
|
|
if (parser.opt.unquotedAttributeValues === undefined) {
|
|
parser.opt.unquotedAttributeValues = !strict
|
|
}
|
|
|
|
// mostly just for error reporting
|
|
parser.trackPosition = parser.opt.position !== false
|
|
if (parser.trackPosition) {
|
|
parser.position = parser.line = parser.column = 0
|
|
}
|
|
emit(parser, 'onready')
|
|
}
|
|
|
|
if (!Object.create) {
|
|
Object.create = function (o) {
|
|
function F() {}
|
|
F.prototype = o
|
|
var newf = new F()
|
|
return newf
|
|
}
|
|
}
|
|
|
|
if (!Object.keys) {
|
|
Object.keys = function (o) {
|
|
var a = []
|
|
for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
|
|
return a
|
|
}
|
|
}
|
|
|
|
function checkBufferLength(parser) {
|
|
var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
|
|
var maxActual = 0
|
|
for (var i = 0, l = buffers.length; i < l; i++) {
|
|
var len = parser[buffers[i]].length
|
|
if (len > maxAllowed) {
|
|
// Text/cdata nodes can get big, and since they're buffered,
|
|
// we can get here under normal conditions.
|
|
// Avoid issues by emitting the text node now,
|
|
// so at least it won't get any bigger.
|
|
switch (buffers[i]) {
|
|
case 'textNode':
|
|
closeText(parser)
|
|
break
|
|
|
|
case 'cdata':
|
|
emitNode(parser, 'oncdata', parser.cdata)
|
|
parser.cdata = ''
|
|
break
|
|
|
|
case 'script':
|
|
emitNode(parser, 'onscript', parser.script)
|
|
parser.script = ''
|
|
break
|
|
|
|
default:
|
|
error(parser, 'Max buffer length exceeded: ' + buffers[i])
|
|
}
|
|
}
|
|
maxActual = Math.max(maxActual, len)
|
|
}
|
|
// schedule the next check for the earliest possible buffer overrun.
|
|
var m = sax.MAX_BUFFER_LENGTH - maxActual
|
|
parser.bufferCheckPosition = m + parser.position
|
|
}
|
|
|
|
function clearBuffers(parser) {
|
|
for (var i = 0, l = buffers.length; i < l; i++) {
|
|
parser[buffers[i]] = ''
|
|
}
|
|
}
|
|
|
|
function flushBuffers(parser) {
|
|
closeText(parser)
|
|
if (parser.cdata !== '') {
|
|
emitNode(parser, 'oncdata', parser.cdata)
|
|
parser.cdata = ''
|
|
}
|
|
if (parser.script !== '') {
|
|
emitNode(parser, 'onscript', parser.script)
|
|
parser.script = ''
|
|
}
|
|
}
|
|
|
|
SAXParser.prototype = {
|
|
end: function () {
|
|
end(this)
|
|
},
|
|
write: write,
|
|
resume: function () {
|
|
this.error = null
|
|
return this
|
|
},
|
|
close: function () {
|
|
return this.write(null)
|
|
},
|
|
flush: function () {
|
|
flushBuffers(this)
|
|
},
|
|
}
|
|
|
|
var Stream
|
|
try {
|
|
Stream = require('stream').Stream
|
|
} catch (ex) {
|
|
Stream = function () {}
|
|
}
|
|
if (!Stream) Stream = function () {}
|
|
|
|
var streamWraps = sax.EVENTS.filter(function (ev) {
|
|
return ev !== 'error' && ev !== 'end'
|
|
})
|
|
|
|
function createStream(strict, opt) {
|
|
return new SAXStream(strict, opt)
|
|
}
|
|
|
|
function SAXStream(strict, opt) {
|
|
if (!(this instanceof SAXStream)) {
|
|
return new SAXStream(strict, opt)
|
|
}
|
|
|
|
Stream.apply(this)
|
|
|
|
this._parser = new SAXParser(strict, opt)
|
|
this.writable = true
|
|
this.readable = true
|
|
|
|
var me = this
|
|
|
|
this._parser.onend = function () {
|
|
me.emit('end')
|
|
}
|
|
|
|
this._parser.onerror = function (er) {
|
|
me.emit('error', er)
|
|
|
|
// if didn't throw, then means error was handled.
|
|
// go ahead and clear error, so we can write again.
|
|
me._parser.error = null
|
|
}
|
|
|
|
this._decoder = null
|
|
|
|
streamWraps.forEach(function (ev) {
|
|
Object.defineProperty(me, 'on' + ev, {
|
|
get: function () {
|
|
return me._parser['on' + ev]
|
|
},
|
|
set: function (h) {
|
|
if (!h) {
|
|
me.removeAllListeners(ev)
|
|
me._parser['on' + ev] = h
|
|
return h
|
|
}
|
|
me.on(ev, h)
|
|
},
|
|
enumerable: true,
|
|
configurable: false,
|
|
})
|
|
})
|
|
}
|
|
|
|
SAXStream.prototype = Object.create(Stream.prototype, {
|
|
constructor: {
|
|
value: SAXStream,
|
|
},
|
|
})
|
|
|
|
SAXStream.prototype.write = function (data) {
|
|
if (
|
|
typeof Buffer === 'function' &&
|
|
typeof Buffer.isBuffer === 'function' &&
|
|
Buffer.isBuffer(data)
|
|
) {
|
|
if (!this._decoder) {
|
|
var SD = require('string_decoder').StringDecoder
|
|
this._decoder = new SD('utf8')
|
|
}
|
|
data = this._decoder.write(data)
|
|
}
|
|
|
|
this._parser.write(data.toString())
|
|
this.emit('data', data)
|
|
return true
|
|
}
|
|
|
|
SAXStream.prototype.end = function (chunk) {
|
|
if (chunk && chunk.length) {
|
|
this.write(chunk)
|
|
}
|
|
this._parser.end()
|
|
return true
|
|
}
|
|
|
|
SAXStream.prototype.on = function (ev, handler) {
|
|
var me = this
|
|
if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
|
|
me._parser['on' + ev] = function () {
|
|
var args =
|
|
arguments.length === 1 ?
|
|
[arguments[0]]
|
|
: Array.apply(null, arguments)
|
|
args.splice(0, 0, ev)
|
|
me.emit.apply(me, args)
|
|
}
|
|
}
|
|
|
|
return Stream.prototype.on.call(me, ev, handler)
|
|
}
|
|
|
|
// this really needs to be replaced with character classes.
|
|
// XML allows all manner of ridiculous numbers and digits.
|
|
var CDATA = '[CDATA['
|
|
var DOCTYPE = 'DOCTYPE'
|
|
var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
|
|
var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
|
|
var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
|
|
|
|
// http://www.w3.org/TR/REC-xml/#NT-NameStartChar
|
|
// This implementation works on strings, a single character at a time
|
|
// as such, it cannot ever support astral-plane characters (10000-EFFFF)
|
|
// without a significant breaking change to either this parser, or the
|
|
// JavaScript language. Implementation of an emoji-capable xml parser
|
|
// is left as an exercise for the reader.
|
|
var nameStart =
|
|
/[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
|
|
|
|
var nameBody =
|
|
/[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
|
|
|
|
var entityStart =
|
|
/[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
|
|
var entityBody =
|
|
/[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/
|
|
|
|
function isWhitespace(c) {
|
|
return c === ' ' || c === '\n' || c === '\r' || c === '\t'
|
|
}
|
|
|
|
function isQuote(c) {
|
|
return c === '"' || c === "'"
|
|
}
|
|
|
|
function isAttribEnd(c) {
|
|
return c === '>' || isWhitespace(c)
|
|
}
|
|
|
|
function isMatch(regex, c) {
|
|
return regex.test(c)
|
|
}
|
|
|
|
function notMatch(regex, c) {
|
|
return !isMatch(regex, c)
|
|
}
|
|
|
|
var S = 0
|
|
sax.STATE = {
|
|
BEGIN: S++, // leading byte order mark or whitespace
|
|
BEGIN_WHITESPACE: S++, // leading whitespace
|
|
TEXT: S++, // general stuff
|
|
TEXT_ENTITY: S++, // & and such.
|
|
OPEN_WAKA: S++, // <
|
|
SGML_DECL: S++, // <!BLARG
|
|
SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
|
|
DOCTYPE: S++, // <!DOCTYPE
|
|
DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
|
|
DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
|
|
DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
|
|
COMMENT_STARTING: S++, // <!-
|
|
COMMENT: S++, // <!--
|
|
COMMENT_ENDING: S++, // <!-- blah -
|
|
COMMENT_ENDED: S++, // <!-- blah --
|
|
CDATA: S++, // <![CDATA[ something
|
|
CDATA_ENDING: S++, // ]
|
|
CDATA_ENDING_2: S++, // ]]
|
|
PROC_INST: S++, // <?hi
|
|
PROC_INST_BODY: S++, // <?hi there
|
|
PROC_INST_ENDING: S++, // <?hi "there" ?
|
|
OPEN_TAG: S++, // <strong
|
|
OPEN_TAG_SLASH: S++, // <strong /
|
|
ATTRIB: S++, // <a
|
|
ATTRIB_NAME: S++, // <a foo
|
|
ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
|
|
ATTRIB_VALUE: S++, // <a foo=
|
|
ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
|
|
ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
|
|
ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
|
|
ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="""
|
|
ATTRIB_VALUE_ENTITY_U: S++, // <foo bar="
|
|
CLOSE_TAG: S++, // </a
|
|
CLOSE_TAG_SAW_WHITE: S++, // </a >
|
|
SCRIPT: S++, // <script> ...
|
|
SCRIPT_ENDING: S++, // <script> ... <
|
|
}
|
|
|
|
sax.XML_ENTITIES = {
|
|
amp: '&',
|
|
gt: '>',
|
|
lt: '<',
|
|
quot: '"',
|
|
apos: "'",
|
|
}
|
|
|
|
sax.ENTITIES = {
|
|
amp: '&',
|
|
gt: '>',
|
|
lt: '<',
|
|
quot: '"',
|
|
apos: "'",
|
|
AElig: 198,
|
|
Aacute: 193,
|
|
Acirc: 194,
|
|
Agrave: 192,
|
|
Aring: 197,
|
|
Atilde: 195,
|
|
Auml: 196,
|
|
Ccedil: 199,
|
|
ETH: 208,
|
|
Eacute: 201,
|
|
Ecirc: 202,
|
|
Egrave: 200,
|
|
Euml: 203,
|
|
Iacute: 205,
|
|
Icirc: 206,
|
|
Igrave: 204,
|
|
Iuml: 207,
|
|
Ntilde: 209,
|
|
Oacute: 211,
|
|
Ocirc: 212,
|
|
Ograve: 210,
|
|
Oslash: 216,
|
|
Otilde: 213,
|
|
Ouml: 214,
|
|
THORN: 222,
|
|
Uacute: 218,
|
|
Ucirc: 219,
|
|
Ugrave: 217,
|
|
Uuml: 220,
|
|
Yacute: 221,
|
|
aacute: 225,
|
|
acirc: 226,
|
|
aelig: 230,
|
|
agrave: 224,
|
|
aring: 229,
|
|
atilde: 227,
|
|
auml: 228,
|
|
ccedil: 231,
|
|
eacute: 233,
|
|
ecirc: 234,
|
|
egrave: 232,
|
|
eth: 240,
|
|
euml: 235,
|
|
iacute: 237,
|
|
icirc: 238,
|
|
igrave: 236,
|
|
iuml: 239,
|
|
ntilde: 241,
|
|
oacute: 243,
|
|
ocirc: 244,
|
|
ograve: 242,
|
|
oslash: 248,
|
|
otilde: 245,
|
|
ouml: 246,
|
|
szlig: 223,
|
|
thorn: 254,
|
|
uacute: 250,
|
|
ucirc: 251,
|
|
ugrave: 249,
|
|
uuml: 252,
|
|
yacute: 253,
|
|
yuml: 255,
|
|
copy: 169,
|
|
reg: 174,
|
|
nbsp: 160,
|
|
iexcl: 161,
|
|
cent: 162,
|
|
pound: 163,
|
|
curren: 164,
|
|
yen: 165,
|
|
brvbar: 166,
|
|
sect: 167,
|
|
uml: 168,
|
|
ordf: 170,
|
|
laquo: 171,
|
|
not: 172,
|
|
shy: 173,
|
|
macr: 175,
|
|
deg: 176,
|
|
plusmn: 177,
|
|
sup1: 185,
|
|
sup2: 178,
|
|
sup3: 179,
|
|
acute: 180,
|
|
micro: 181,
|
|
para: 182,
|
|
middot: 183,
|
|
cedil: 184,
|
|
ordm: 186,
|
|
raquo: 187,
|
|
frac14: 188,
|
|
frac12: 189,
|
|
frac34: 190,
|
|
iquest: 191,
|
|
times: 215,
|
|
divide: 247,
|
|
OElig: 338,
|
|
oelig: 339,
|
|
Scaron: 352,
|
|
scaron: 353,
|
|
Yuml: 376,
|
|
fnof: 402,
|
|
circ: 710,
|
|
tilde: 732,
|
|
Alpha: 913,
|
|
Beta: 914,
|
|
Gamma: 915,
|
|
Delta: 916,
|
|
Epsilon: 917,
|
|
Zeta: 918,
|
|
Eta: 919,
|
|
Theta: 920,
|
|
Iota: 921,
|
|
Kappa: 922,
|
|
Lambda: 923,
|
|
Mu: 924,
|
|
Nu: 925,
|
|
Xi: 926,
|
|
Omicron: 927,
|
|
Pi: 928,
|
|
Rho: 929,
|
|
Sigma: 931,
|
|
Tau: 932,
|
|
Upsilon: 933,
|
|
Phi: 934,
|
|
Chi: 935,
|
|
Psi: 936,
|
|
Omega: 937,
|
|
alpha: 945,
|
|
beta: 946,
|
|
gamma: 947,
|
|
delta: 948,
|
|
epsilon: 949,
|
|
zeta: 950,
|
|
eta: 951,
|
|
theta: 952,
|
|
iota: 953,
|
|
kappa: 954,
|
|
lambda: 955,
|
|
mu: 956,
|
|
nu: 957,
|
|
xi: 958,
|
|
omicron: 959,
|
|
pi: 960,
|
|
rho: 961,
|
|
sigmaf: 962,
|
|
sigma: 963,
|
|
tau: 964,
|
|
upsilon: 965,
|
|
phi: 966,
|
|
chi: 967,
|
|
psi: 968,
|
|
omega: 969,
|
|
thetasym: 977,
|
|
upsih: 978,
|
|
piv: 982,
|
|
ensp: 8194,
|
|
emsp: 8195,
|
|
thinsp: 8201,
|
|
zwnj: 8204,
|
|
zwj: 8205,
|
|
lrm: 8206,
|
|
rlm: 8207,
|
|
ndash: 8211,
|
|
mdash: 8212,
|
|
lsquo: 8216,
|
|
rsquo: 8217,
|
|
sbquo: 8218,
|
|
ldquo: 8220,
|
|
rdquo: 8221,
|
|
bdquo: 8222,
|
|
dagger: 8224,
|
|
Dagger: 8225,
|
|
bull: 8226,
|
|
hellip: 8230,
|
|
permil: 8240,
|
|
prime: 8242,
|
|
Prime: 8243,
|
|
lsaquo: 8249,
|
|
rsaquo: 8250,
|
|
oline: 8254,
|
|
frasl: 8260,
|
|
euro: 8364,
|
|
image: 8465,
|
|
weierp: 8472,
|
|
real: 8476,
|
|
trade: 8482,
|
|
alefsym: 8501,
|
|
larr: 8592,
|
|
uarr: 8593,
|
|
rarr: 8594,
|
|
darr: 8595,
|
|
harr: 8596,
|
|
crarr: 8629,
|
|
lArr: 8656,
|
|
uArr: 8657,
|
|
rArr: 8658,
|
|
dArr: 8659,
|
|
hArr: 8660,
|
|
forall: 8704,
|
|
part: 8706,
|
|
exist: 8707,
|
|
empty: 8709,
|
|
nabla: 8711,
|
|
isin: 8712,
|
|
notin: 8713,
|
|
ni: 8715,
|
|
prod: 8719,
|
|
sum: 8721,
|
|
minus: 8722,
|
|
lowast: 8727,
|
|
radic: 8730,
|
|
prop: 8733,
|
|
infin: 8734,
|
|
ang: 8736,
|
|
and: 8743,
|
|
or: 8744,
|
|
cap: 8745,
|
|
cup: 8746,
|
|
int: 8747,
|
|
there4: 8756,
|
|
sim: 8764,
|
|
cong: 8773,
|
|
asymp: 8776,
|
|
ne: 8800,
|
|
equiv: 8801,
|
|
le: 8804,
|
|
ge: 8805,
|
|
sub: 8834,
|
|
sup: 8835,
|
|
nsub: 8836,
|
|
sube: 8838,
|
|
supe: 8839,
|
|
oplus: 8853,
|
|
otimes: 8855,
|
|
perp: 8869,
|
|
sdot: 8901,
|
|
lceil: 8968,
|
|
rceil: 8969,
|
|
lfloor: 8970,
|
|
rfloor: 8971,
|
|
lang: 9001,
|
|
rang: 9002,
|
|
loz: 9674,
|
|
spades: 9824,
|
|
clubs: 9827,
|
|
hearts: 9829,
|
|
diams: 9830,
|
|
}
|
|
|
|
Object.keys(sax.ENTITIES).forEach(function (key) {
|
|
var e = sax.ENTITIES[key]
|
|
var s = typeof e === 'number' ? String.fromCharCode(e) : e
|
|
sax.ENTITIES[key] = s
|
|
})
|
|
|
|
for (var s in sax.STATE) {
|
|
sax.STATE[sax.STATE[s]] = s
|
|
}
|
|
|
|
// shorthand
|
|
S = sax.STATE
|
|
|
|
function emit(parser, event, data) {
|
|
parser[event] && parser[event](data)
|
|
}
|
|
|
|
function emitNode(parser, nodeType, data) {
|
|
if (parser.textNode) closeText(parser)
|
|
emit(parser, nodeType, data)
|
|
}
|
|
|
|
function closeText(parser) {
|
|
parser.textNode = textopts(parser.opt, parser.textNode)
|
|
if (parser.textNode) emit(parser, 'ontext', parser.textNode)
|
|
parser.textNode = ''
|
|
}
|
|
|
|
function textopts(opt, text) {
|
|
if (opt.trim) text = text.trim()
|
|
if (opt.normalize) text = text.replace(/\s+/g, ' ')
|
|
return text
|
|
}
|
|
|
|
function error(parser, er) {
|
|
closeText(parser)
|
|
if (parser.trackPosition) {
|
|
er +=
|
|
'\nLine: ' +
|
|
parser.line +
|
|
'\nColumn: ' +
|
|
parser.column +
|
|
'\nChar: ' +
|
|
parser.c
|
|
}
|
|
er = new Error(er)
|
|
parser.error = er
|
|
emit(parser, 'onerror', er)
|
|
return parser
|
|
}
|
|
|
|
function end(parser) {
|
|
if (parser.sawRoot && !parser.closedRoot)
|
|
strictFail(parser, 'Unclosed root tag')
|
|
if (
|
|
parser.state !== S.BEGIN &&
|
|
parser.state !== S.BEGIN_WHITESPACE &&
|
|
parser.state !== S.TEXT
|
|
) {
|
|
error(parser, 'Unexpected end')
|
|
}
|
|
closeText(parser)
|
|
parser.c = ''
|
|
parser.closed = true
|
|
emit(parser, 'onend')
|
|
SAXParser.call(parser, parser.strict, parser.opt)
|
|
return parser
|
|
}
|
|
|
|
function strictFail(parser, message) {
|
|
if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
|
|
throw new Error('bad call to strictFail')
|
|
}
|
|
if (parser.strict) {
|
|
error(parser, message)
|
|
}
|
|
}
|
|
|
|
function newTag(parser) {
|
|
if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
|
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
var tag = (parser.tag = { name: parser.tagName, attributes: {} })
|
|
|
|
// will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
|
|
if (parser.opt.xmlns) {
|
|
tag.ns = parent.ns
|
|
}
|
|
parser.attribList.length = 0
|
|
emitNode(parser, 'onopentagstart', tag)
|
|
}
|
|
|
|
function qname(name, attribute) {
|
|
var i = name.indexOf(':')
|
|
var qualName = i < 0 ? ['', name] : name.split(':')
|
|
var prefix = qualName[0]
|
|
var local = qualName[1]
|
|
|
|
// <x "xmlns"="http://foo">
|
|
if (attribute && name === 'xmlns') {
|
|
prefix = 'xmlns'
|
|
local = ''
|
|
}
|
|
|
|
return { prefix: prefix, local: local }
|
|
}
|
|
|
|
function attrib(parser) {
|
|
if (!parser.strict) {
|
|
parser.attribName = parser.attribName[parser.looseCase]()
|
|
}
|
|
|
|
if (
|
|
parser.attribList.indexOf(parser.attribName) !== -1 ||
|
|
parser.tag.attributes.hasOwnProperty(parser.attribName)
|
|
) {
|
|
parser.attribName = parser.attribValue = ''
|
|
return
|
|
}
|
|
|
|
if (parser.opt.xmlns) {
|
|
var qn = qname(parser.attribName, true)
|
|
var prefix = qn.prefix
|
|
var local = qn.local
|
|
|
|
if (prefix === 'xmlns') {
|
|
// namespace binding attribute. push the binding into scope
|
|
if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
|
|
strictFail(
|
|
parser,
|
|
'xml: prefix must be bound to ' +
|
|
XML_NAMESPACE +
|
|
'\n' +
|
|
'Actual: ' +
|
|
parser.attribValue
|
|
)
|
|
} else if (
|
|
local === 'xmlns' &&
|
|
parser.attribValue !== XMLNS_NAMESPACE
|
|
) {
|
|
strictFail(
|
|
parser,
|
|
'xmlns: prefix must be bound to ' +
|
|
XMLNS_NAMESPACE +
|
|
'\n' +
|
|
'Actual: ' +
|
|
parser.attribValue
|
|
)
|
|
} else {
|
|
var tag = parser.tag
|
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
if (tag.ns === parent.ns) {
|
|
tag.ns = Object.create(parent.ns)
|
|
}
|
|
tag.ns[local] = parser.attribValue
|
|
}
|
|
}
|
|
|
|
// defer onattribute events until all attributes have been seen
|
|
// so any new bindings can take effect. preserve attribute order
|
|
// so deferred events can be emitted in document order
|
|
parser.attribList.push([parser.attribName, parser.attribValue])
|
|
} else {
|
|
// in non-xmlns mode, we can emit the event right away
|
|
parser.tag.attributes[parser.attribName] = parser.attribValue
|
|
emitNode(parser, 'onattribute', {
|
|
name: parser.attribName,
|
|
value: parser.attribValue,
|
|
})
|
|
}
|
|
|
|
parser.attribName = parser.attribValue = ''
|
|
}
|
|
|
|
function openTag(parser, selfClosing) {
|
|
if (parser.opt.xmlns) {
|
|
// emit namespace binding events
|
|
var tag = parser.tag
|
|
|
|
// add namespace info to tag
|
|
var qn = qname(parser.tagName)
|
|
tag.prefix = qn.prefix
|
|
tag.local = qn.local
|
|
tag.uri = tag.ns[qn.prefix] || ''
|
|
|
|
if (tag.prefix && !tag.uri) {
|
|
strictFail(
|
|
parser,
|
|
'Unbound namespace prefix: ' + JSON.stringify(parser.tagName)
|
|
)
|
|
tag.uri = qn.prefix
|
|
}
|
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
if (tag.ns && parent.ns !== tag.ns) {
|
|
Object.keys(tag.ns).forEach(function (p) {
|
|
emitNode(parser, 'onopennamespace', {
|
|
prefix: p,
|
|
uri: tag.ns[p],
|
|
})
|
|
})
|
|
}
|
|
|
|
// handle deferred onattribute events
|
|
// Note: do not apply default ns to attributes:
|
|
// http://www.w3.org/TR/REC-xml-names/#defaulting
|
|
for (var i = 0, l = parser.attribList.length; i < l; i++) {
|
|
var nv = parser.attribList[i]
|
|
var name = nv[0]
|
|
var value = nv[1]
|
|
var qualName = qname(name, true)
|
|
var prefix = qualName.prefix
|
|
var local = qualName.local
|
|
var uri = prefix === '' ? '' : tag.ns[prefix] || ''
|
|
var a = {
|
|
name: name,
|
|
value: value,
|
|
prefix: prefix,
|
|
local: local,
|
|
uri: uri,
|
|
}
|
|
|
|
// if there's any attributes with an undefined namespace,
|
|
// then fail on them now.
|
|
if (prefix && prefix !== 'xmlns' && !uri) {
|
|
strictFail(
|
|
parser,
|
|
'Unbound namespace prefix: ' + JSON.stringify(prefix)
|
|
)
|
|
a.uri = prefix
|
|
}
|
|
parser.tag.attributes[name] = a
|
|
emitNode(parser, 'onattribute', a)
|
|
}
|
|
parser.attribList.length = 0
|
|
}
|
|
|
|
parser.tag.isSelfClosing = !!selfClosing
|
|
|
|
// process the tag
|
|
parser.sawRoot = true
|
|
parser.tags.push(parser.tag)
|
|
emitNode(parser, 'onopentag', parser.tag)
|
|
if (!selfClosing) {
|
|
// special case for <script> in non-strict mode.
|
|
if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
|
|
parser.state = S.SCRIPT
|
|
} else {
|
|
parser.state = S.TEXT
|
|
}
|
|
parser.tag = null
|
|
parser.tagName = ''
|
|
}
|
|
parser.attribName = parser.attribValue = ''
|
|
parser.attribList.length = 0
|
|
}
|
|
|
|
function closeTag(parser) {
|
|
if (!parser.tagName) {
|
|
strictFail(parser, 'Weird empty close tag.')
|
|
parser.textNode += '</>'
|
|
parser.state = S.TEXT
|
|
return
|
|
}
|
|
|
|
if (parser.script) {
|
|
if (parser.tagName !== 'script') {
|
|
parser.script += '</' + parser.tagName + '>'
|
|
parser.tagName = ''
|
|
parser.state = S.SCRIPT
|
|
return
|
|
}
|
|
emitNode(parser, 'onscript', parser.script)
|
|
parser.script = ''
|
|
}
|
|
|
|
// first make sure that the closing tag actually exists.
|
|
// <a><b></c></b></a> will close everything, otherwise.
|
|
var t = parser.tags.length
|
|
var tagName = parser.tagName
|
|
if (!parser.strict) {
|
|
tagName = tagName[parser.looseCase]()
|
|
}
|
|
var closeTo = tagName
|
|
while (t--) {
|
|
var close = parser.tags[t]
|
|
if (close.name !== closeTo) {
|
|
// fail the first time in strict mode
|
|
strictFail(parser, 'Unexpected close tag')
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
// didn't find it. we already failed for strict, so just abort.
|
|
if (t < 0) {
|
|
strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
|
|
parser.textNode += '</' + parser.tagName + '>'
|
|
parser.state = S.TEXT
|
|
return
|
|
}
|
|
parser.tagName = tagName
|
|
var s = parser.tags.length
|
|
while (s-- > t) {
|
|
var tag = (parser.tag = parser.tags.pop())
|
|
parser.tagName = parser.tag.name
|
|
emitNode(parser, 'onclosetag', parser.tagName)
|
|
|
|
var x = {}
|
|
for (var i in tag.ns) {
|
|
x[i] = tag.ns[i]
|
|
}
|
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
if (parser.opt.xmlns && tag.ns !== parent.ns) {
|
|
// remove namespace bindings introduced by tag
|
|
Object.keys(tag.ns).forEach(function (p) {
|
|
var n = tag.ns[p]
|
|
emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
|
|
})
|
|
}
|
|
}
|
|
if (t === 0) parser.closedRoot = true
|
|
parser.tagName = parser.attribValue = parser.attribName = ''
|
|
parser.attribList.length = 0
|
|
parser.state = S.TEXT
|
|
}
|
|
|
|
function parseEntity(parser) {
|
|
var entity = parser.entity
|
|
var entityLC = entity.toLowerCase()
|
|
var num
|
|
var numStr = ''
|
|
|
|
if (parser.ENTITIES[entity]) {
|
|
return parser.ENTITIES[entity]
|
|
}
|
|
if (parser.ENTITIES[entityLC]) {
|
|
return parser.ENTITIES[entityLC]
|
|
}
|
|
entity = entityLC
|
|
if (entity.charAt(0) === '#') {
|
|
if (entity.charAt(1) === 'x') {
|
|
entity = entity.slice(2)
|
|
num = parseInt(entity, 16)
|
|
numStr = num.toString(16)
|
|
} else {
|
|
entity = entity.slice(1)
|
|
num = parseInt(entity, 10)
|
|
numStr = num.toString(10)
|
|
}
|
|
}
|
|
entity = entity.replace(/^0+/, '')
|
|
if (
|
|
isNaN(num) ||
|
|
numStr.toLowerCase() !== entity ||
|
|
num < 0 ||
|
|
num > 0x10ffff
|
|
) {
|
|
strictFail(parser, 'Invalid character entity')
|
|
return '&' + parser.entity + ';'
|
|
}
|
|
|
|
return String.fromCodePoint(num)
|
|
}
|
|
|
|
function beginWhiteSpace(parser, c) {
|
|
if (c === '<') {
|
|
parser.state = S.OPEN_WAKA
|
|
parser.startTagPosition = parser.position
|
|
} else if (!isWhitespace(c)) {
|
|
// have to process this as a text node.
|
|
// weird, but happens.
|
|
strictFail(parser, 'Non-whitespace before first tag.')
|
|
parser.textNode = c
|
|
parser.state = S.TEXT
|
|
}
|
|
}
|
|
|
|
function charAt(chunk, i) {
|
|
var result = ''
|
|
if (i < chunk.length) {
|
|
result = chunk.charAt(i)
|
|
}
|
|
return result
|
|
}
|
|
|
|
function write(chunk) {
|
|
var parser = this
|
|
if (this.error) {
|
|
throw this.error
|
|
}
|
|
if (parser.closed) {
|
|
return error(
|
|
parser,
|
|
'Cannot write after close. Assign an onready handler.'
|
|
)
|
|
}
|
|
if (chunk === null) {
|
|
return end(parser)
|
|
}
|
|
if (typeof chunk === 'object') {
|
|
chunk = chunk.toString()
|
|
}
|
|
var i = 0
|
|
var c = ''
|
|
while (true) {
|
|
c = charAt(chunk, i++)
|
|
parser.c = c
|
|
|
|
if (!c) {
|
|
break
|
|
}
|
|
|
|
if (parser.trackPosition) {
|
|
parser.position++
|
|
if (c === '\n') {
|
|
parser.line++
|
|
parser.column = 0
|
|
} else {
|
|
parser.column++
|
|
}
|
|
}
|
|
|
|
switch (parser.state) {
|
|
case S.BEGIN:
|
|
parser.state = S.BEGIN_WHITESPACE
|
|
if (c === '\uFEFF') {
|
|
continue
|
|
}
|
|
beginWhiteSpace(parser, c)
|
|
continue
|
|
|
|
case S.BEGIN_WHITESPACE:
|
|
beginWhiteSpace(parser, c)
|
|
continue
|
|
|
|
case S.TEXT:
|
|
if (parser.sawRoot && !parser.closedRoot) {
|
|
var starti = i - 1
|
|
while (c && c !== '<' && c !== '&') {
|
|
c = charAt(chunk, i++)
|
|
if (c && parser.trackPosition) {
|
|
parser.position++
|
|
if (c === '\n') {
|
|
parser.line++
|
|
parser.column = 0
|
|
} else {
|
|
parser.column++
|
|
}
|
|
}
|
|
}
|
|
parser.textNode += chunk.substring(starti, i - 1)
|
|
}
|
|
if (
|
|
c === '<' &&
|
|
!(parser.sawRoot && parser.closedRoot && !parser.strict)
|
|
) {
|
|
parser.state = S.OPEN_WAKA
|
|
parser.startTagPosition = parser.position
|
|
} else {
|
|
if (
|
|
!isWhitespace(c) &&
|
|
(!parser.sawRoot || parser.closedRoot)
|
|
) {
|
|
strictFail(parser, 'Text data outside of root node.')
|
|
}
|
|
if (c === '&') {
|
|
parser.state = S.TEXT_ENTITY
|
|
} else {
|
|
parser.textNode += c
|
|
}
|
|
}
|
|
continue
|
|
|
|
case S.SCRIPT:
|
|
// only non-strict
|
|
if (c === '<') {
|
|
parser.state = S.SCRIPT_ENDING
|
|
} else {
|
|
parser.script += c
|
|
}
|
|
continue
|
|
|
|
case S.SCRIPT_ENDING:
|
|
if (c === '/') {
|
|
parser.state = S.CLOSE_TAG
|
|
} else {
|
|
parser.script += '<' + c
|
|
parser.state = S.SCRIPT
|
|
}
|
|
continue
|
|
|
|
case S.OPEN_WAKA:
|
|
// either a /, ?, !, or text is coming next.
|
|
if (c === '!') {
|
|
parser.state = S.SGML_DECL
|
|
parser.sgmlDecl = ''
|
|
} else if (isWhitespace(c)) {
|
|
// wait for it...
|
|
} else if (isMatch(nameStart, c)) {
|
|
parser.state = S.OPEN_TAG
|
|
parser.tagName = c
|
|
} else if (c === '/') {
|
|
parser.state = S.CLOSE_TAG
|
|
parser.tagName = ''
|
|
} else if (c === '?') {
|
|
parser.state = S.PROC_INST
|
|
parser.procInstName = parser.procInstBody = ''
|
|
} else {
|
|
strictFail(parser, 'Unencoded <')
|
|
// if there was some whitespace, then add that in.
|
|
if (parser.startTagPosition + 1 < parser.position) {
|
|
var pad = parser.position - parser.startTagPosition
|
|
c = new Array(pad).join(' ') + c
|
|
}
|
|
parser.textNode += '<' + c
|
|
parser.state = S.TEXT
|
|
}
|
|
continue
|
|
|
|
case S.SGML_DECL:
|
|
if (parser.sgmlDecl + c === '--') {
|
|
parser.state = S.COMMENT
|
|
parser.comment = ''
|
|
parser.sgmlDecl = ''
|
|
continue
|
|
}
|
|
|
|
if (
|
|
parser.doctype &&
|
|
parser.doctype !== true &&
|
|
parser.sgmlDecl
|
|
) {
|
|
parser.state = S.DOCTYPE_DTD
|
|
parser.doctype += '<!' + parser.sgmlDecl + c
|
|
parser.sgmlDecl = ''
|
|
} else if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
|
|
emitNode(parser, 'onopencdata')
|
|
parser.state = S.CDATA
|
|
parser.sgmlDecl = ''
|
|
parser.cdata = ''
|
|
} else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
|
|
parser.state = S.DOCTYPE
|
|
if (parser.doctype || parser.sawRoot) {
|
|
strictFail(
|
|
parser,
|
|
'Inappropriately located doctype declaration'
|
|
)
|
|
}
|
|
parser.doctype = ''
|
|
parser.sgmlDecl = ''
|
|
} else if (c === '>') {
|
|
emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
|
|
parser.sgmlDecl = ''
|
|
parser.state = S.TEXT
|
|
} else if (isQuote(c)) {
|
|
parser.state = S.SGML_DECL_QUOTED
|
|
parser.sgmlDecl += c
|
|
} else {
|
|
parser.sgmlDecl += c
|
|
}
|
|
continue
|
|
|
|
case S.SGML_DECL_QUOTED:
|
|
if (c === parser.q) {
|
|
parser.state = S.SGML_DECL
|
|
parser.q = ''
|
|
}
|
|
parser.sgmlDecl += c
|
|
continue
|
|
|
|
case S.DOCTYPE:
|
|
if (c === '>') {
|
|
parser.state = S.TEXT
|
|
emitNode(parser, 'ondoctype', parser.doctype)
|
|
parser.doctype = true // just remember that we saw it.
|
|
} else {
|
|
parser.doctype += c
|
|
if (c === '[') {
|
|
parser.state = S.DOCTYPE_DTD
|
|
} else if (isQuote(c)) {
|
|
parser.state = S.DOCTYPE_QUOTED
|
|
parser.q = c
|
|
}
|
|
}
|
|
continue
|
|
|
|
case S.DOCTYPE_QUOTED:
|
|
parser.doctype += c
|
|
if (c === parser.q) {
|
|
parser.q = ''
|
|
parser.state = S.DOCTYPE
|
|
}
|
|
continue
|
|
|
|
case S.DOCTYPE_DTD:
|
|
if (c === ']') {
|
|
parser.doctype += c
|
|
parser.state = S.DOCTYPE
|
|
} else if (c === '<') {
|
|
parser.state = S.OPEN_WAKA
|
|
parser.startTagPosition = parser.position
|
|
} else if (isQuote(c)) {
|
|
parser.doctype += c
|
|
parser.state = S.DOCTYPE_DTD_QUOTED
|
|
parser.q = c
|
|
} else {
|
|
parser.doctype += c
|
|
}
|
|
continue
|
|
|
|
case S.DOCTYPE_DTD_QUOTED:
|
|
parser.doctype += c
|
|
if (c === parser.q) {
|
|
parser.state = S.DOCTYPE_DTD
|
|
parser.q = ''
|
|
}
|
|
continue
|
|
|
|
case S.COMMENT:
|
|
if (c === '-') {
|
|
parser.state = S.COMMENT_ENDING
|
|
} else {
|
|
parser.comment += c
|
|
}
|
|
continue
|
|
|
|
case S.COMMENT_ENDING:
|
|
if (c === '-') {
|
|
parser.state = S.COMMENT_ENDED
|
|
parser.comment = textopts(parser.opt, parser.comment)
|
|
if (parser.comment) {
|
|
emitNode(parser, 'oncomment', parser.comment)
|
|
}
|
|
parser.comment = ''
|
|
} else {
|
|
parser.comment += '-' + c
|
|
parser.state = S.COMMENT
|
|
}
|
|
continue
|
|
|
|
case S.COMMENT_ENDED:
|
|
if (c !== '>') {
|
|
strictFail(parser, 'Malformed comment')
|
|
// allow <!-- blah -- bloo --> in non-strict mode,
|
|
// which is a comment of " blah -- bloo "
|
|
parser.comment += '--' + c
|
|
parser.state = S.COMMENT
|
|
} else if (parser.doctype && parser.doctype !== true) {
|
|
parser.state = S.DOCTYPE_DTD
|
|
} else {
|
|
parser.state = S.TEXT
|
|
}
|
|
continue
|
|
|
|
case S.CDATA:
|
|
var starti = i - 1
|
|
while (c && c !== ']') {
|
|
c = charAt(chunk, i++)
|
|
if (c && parser.trackPosition) {
|
|
parser.position++
|
|
if (c === '\n') {
|
|
parser.line++
|
|
parser.column = 0
|
|
} else {
|
|
parser.column++
|
|
}
|
|
}
|
|
}
|
|
parser.cdata += chunk.substring(starti, i - 1)
|
|
if (c === ']') {
|
|
parser.state = S.CDATA_ENDING
|
|
}
|
|
continue
|
|
|
|
case S.CDATA_ENDING:
|
|
if (c === ']') {
|
|
parser.state = S.CDATA_ENDING_2
|
|
} else {
|
|
parser.cdata += ']' + c
|
|
parser.state = S.CDATA
|
|
}
|
|
continue
|
|
|
|
case S.CDATA_ENDING_2:
|
|
if (c === '>') {
|
|
if (parser.cdata) {
|
|
emitNode(parser, 'oncdata', parser.cdata)
|
|
}
|
|
emitNode(parser, 'onclosecdata')
|
|
parser.cdata = ''
|
|
parser.state = S.TEXT
|
|
} else if (c === ']') {
|
|
parser.cdata += ']'
|
|
} else {
|
|
parser.cdata += ']]' + c
|
|
parser.state = S.CDATA
|
|
}
|
|
continue
|
|
|
|
case S.PROC_INST:
|
|
if (c === '?') {
|
|
parser.state = S.PROC_INST_ENDING
|
|
} else if (isWhitespace(c)) {
|
|
parser.state = S.PROC_INST_BODY
|
|
} else {
|
|
parser.procInstName += c
|
|
}
|
|
continue
|
|
|
|
case S.PROC_INST_BODY:
|
|
if (!parser.procInstBody && isWhitespace(c)) {
|
|
continue
|
|
} else if (c === '?') {
|
|
parser.state = S.PROC_INST_ENDING
|
|
} else {
|
|
parser.procInstBody += c
|
|
}
|
|
continue
|
|
|
|
case S.PROC_INST_ENDING:
|
|
if (c === '>') {
|
|
emitNode(parser, 'onprocessinginstruction', {
|
|
name: parser.procInstName,
|
|
body: parser.procInstBody,
|
|
})
|
|
parser.procInstName = parser.procInstBody = ''
|
|
parser.state = S.TEXT
|
|
} else {
|
|
parser.procInstBody += '?' + c
|
|
parser.state = S.PROC_INST_BODY
|
|
}
|
|
continue
|
|
|
|
case S.OPEN_TAG:
|
|
if (isMatch(nameBody, c)) {
|
|
parser.tagName += c
|
|
} else {
|
|
newTag(parser)
|
|
if (c === '>') {
|
|
openTag(parser)
|
|
} else if (c === '/') {
|
|
parser.state = S.OPEN_TAG_SLASH
|
|
} else {
|
|
if (!isWhitespace(c)) {
|
|
strictFail(parser, 'Invalid character in tag name')
|
|
}
|
|
parser.state = S.ATTRIB
|
|
}
|
|
}
|
|
continue
|
|
|
|
case S.OPEN_TAG_SLASH:
|
|
if (c === '>') {
|
|
openTag(parser, true)
|
|
closeTag(parser)
|
|
} else {
|
|
strictFail(
|
|
parser,
|
|
'Forward-slash in opening tag not followed by >'
|
|
)
|
|
parser.state = S.ATTRIB
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB:
|
|
// haven't read the attribute name yet.
|
|
if (isWhitespace(c)) {
|
|
continue
|
|
} else if (c === '>') {
|
|
openTag(parser)
|
|
} else if (c === '/') {
|
|
parser.state = S.OPEN_TAG_SLASH
|
|
} else if (isMatch(nameStart, c)) {
|
|
parser.attribName = c
|
|
parser.attribValue = ''
|
|
parser.state = S.ATTRIB_NAME
|
|
} else {
|
|
strictFail(parser, 'Invalid attribute name')
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB_NAME:
|
|
if (c === '=') {
|
|
parser.state = S.ATTRIB_VALUE
|
|
} else if (c === '>') {
|
|
strictFail(parser, 'Attribute without value')
|
|
parser.attribValue = parser.attribName
|
|
attrib(parser)
|
|
openTag(parser)
|
|
} else if (isWhitespace(c)) {
|
|
parser.state = S.ATTRIB_NAME_SAW_WHITE
|
|
} else if (isMatch(nameBody, c)) {
|
|
parser.attribName += c
|
|
} else {
|
|
strictFail(parser, 'Invalid attribute name')
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB_NAME_SAW_WHITE:
|
|
if (c === '=') {
|
|
parser.state = S.ATTRIB_VALUE
|
|
} else if (isWhitespace(c)) {
|
|
continue
|
|
} else {
|
|
strictFail(parser, 'Attribute without value')
|
|
parser.tag.attributes[parser.attribName] = ''
|
|
parser.attribValue = ''
|
|
emitNode(parser, 'onattribute', {
|
|
name: parser.attribName,
|
|
value: '',
|
|
})
|
|
parser.attribName = ''
|
|
if (c === '>') {
|
|
openTag(parser)
|
|
} else if (isMatch(nameStart, c)) {
|
|
parser.attribName = c
|
|
parser.state = S.ATTRIB_NAME
|
|
} else {
|
|
strictFail(parser, 'Invalid attribute name')
|
|
parser.state = S.ATTRIB
|
|
}
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE:
|
|
if (isWhitespace(c)) {
|
|
continue
|
|
} else if (isQuote(c)) {
|
|
parser.q = c
|
|
parser.state = S.ATTRIB_VALUE_QUOTED
|
|
} else {
|
|
if (!parser.opt.unquotedAttributeValues) {
|
|
error(parser, 'Unquoted attribute value')
|
|
}
|
|
parser.state = S.ATTRIB_VALUE_UNQUOTED
|
|
parser.attribValue = c
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE_QUOTED:
|
|
if (c !== parser.q) {
|
|
if (c === '&') {
|
|
parser.state = S.ATTRIB_VALUE_ENTITY_Q
|
|
} else {
|
|
parser.attribValue += c
|
|
}
|
|
continue
|
|
}
|
|
attrib(parser)
|
|
parser.q = ''
|
|
parser.state = S.ATTRIB_VALUE_CLOSED
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE_CLOSED:
|
|
if (isWhitespace(c)) {
|
|
parser.state = S.ATTRIB
|
|
} else if (c === '>') {
|
|
openTag(parser)
|
|
} else if (c === '/') {
|
|
parser.state = S.OPEN_TAG_SLASH
|
|
} else if (isMatch(nameStart, c)) {
|
|
strictFail(parser, 'No whitespace between attributes')
|
|
parser.attribName = c
|
|
parser.attribValue = ''
|
|
parser.state = S.ATTRIB_NAME
|
|
} else {
|
|
strictFail(parser, 'Invalid attribute name')
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE_UNQUOTED:
|
|
if (!isAttribEnd(c)) {
|
|
if (c === '&') {
|
|
parser.state = S.ATTRIB_VALUE_ENTITY_U
|
|
} else {
|
|
parser.attribValue += c
|
|
}
|
|
continue
|
|
}
|
|
attrib(parser)
|
|
if (c === '>') {
|
|
openTag(parser)
|
|
} else {
|
|
parser.state = S.ATTRIB
|
|
}
|
|
continue
|
|
|
|
case S.CLOSE_TAG:
|
|
if (!parser.tagName) {
|
|
if (isWhitespace(c)) {
|
|
continue
|
|
} else if (notMatch(nameStart, c)) {
|
|
if (parser.script) {
|
|
parser.script += '</' + c
|
|
parser.state = S.SCRIPT
|
|
} else {
|
|
strictFail(parser, 'Invalid tagname in closing tag.')
|
|
}
|
|
} else {
|
|
parser.tagName = c
|
|
}
|
|
} else if (c === '>') {
|
|
closeTag(parser)
|
|
} else if (isMatch(nameBody, c)) {
|
|
parser.tagName += c
|
|
} else if (parser.script) {
|
|
parser.script += '</' + parser.tagName
|
|
parser.tagName = ''
|
|
parser.state = S.SCRIPT
|
|
} else {
|
|
if (!isWhitespace(c)) {
|
|
strictFail(parser, 'Invalid tagname in closing tag')
|
|
}
|
|
parser.state = S.CLOSE_TAG_SAW_WHITE
|
|
}
|
|
continue
|
|
|
|
case S.CLOSE_TAG_SAW_WHITE:
|
|
if (isWhitespace(c)) {
|
|
continue
|
|
}
|
|
if (c === '>') {
|
|
closeTag(parser)
|
|
} else {
|
|
strictFail(parser, 'Invalid characters in closing tag')
|
|
}
|
|
continue
|
|
|
|
case S.TEXT_ENTITY:
|
|
case S.ATTRIB_VALUE_ENTITY_Q:
|
|
case S.ATTRIB_VALUE_ENTITY_U:
|
|
var returnState
|
|
var buffer
|
|
switch (parser.state) {
|
|
case S.TEXT_ENTITY:
|
|
returnState = S.TEXT
|
|
buffer = 'textNode'
|
|
break
|
|
|
|
case S.ATTRIB_VALUE_ENTITY_Q:
|
|
returnState = S.ATTRIB_VALUE_QUOTED
|
|
buffer = 'attribValue'
|
|
break
|
|
|
|
case S.ATTRIB_VALUE_ENTITY_U:
|
|
returnState = S.ATTRIB_VALUE_UNQUOTED
|
|
buffer = 'attribValue'
|
|
break
|
|
}
|
|
|
|
if (c === ';') {
|
|
var parsedEntity = parseEntity(parser)
|
|
if (
|
|
parser.opt.unparsedEntities &&
|
|
!Object.values(sax.XML_ENTITIES).includes(parsedEntity)
|
|
) {
|
|
parser.entity = ''
|
|
parser.state = returnState
|
|
parser.write(parsedEntity)
|
|
} else {
|
|
parser[buffer] += parsedEntity
|
|
parser.entity = ''
|
|
parser.state = returnState
|
|
}
|
|
} else if (
|
|
isMatch(parser.entity.length ? entityBody : entityStart, c)
|
|
) {
|
|
parser.entity += c
|
|
} else {
|
|
strictFail(parser, 'Invalid character in entity name')
|
|
parser[buffer] += '&' + parser.entity + c
|
|
parser.entity = ''
|
|
parser.state = returnState
|
|
}
|
|
|
|
continue
|
|
|
|
default: /* istanbul ignore next */ {
|
|
throw new Error(parser, 'Unknown state: ' + parser.state)
|
|
}
|
|
}
|
|
} // while
|
|
|
|
if (parser.position >= parser.bufferCheckPosition) {
|
|
checkBufferLength(parser)
|
|
}
|
|
return parser
|
|
}
|
|
|
|
/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
|
|
/* istanbul ignore next */
|
|
if (!String.fromCodePoint) {
|
|
;(function () {
|
|
var stringFromCharCode = String.fromCharCode
|
|
var floor = Math.floor
|
|
var fromCodePoint = function () {
|
|
var MAX_SIZE = 0x4000
|
|
var codeUnits = []
|
|
var highSurrogate
|
|
var lowSurrogate
|
|
var index = -1
|
|
var length = arguments.length
|
|
if (!length) {
|
|
return ''
|
|
}
|
|
var result = ''
|
|
while (++index < length) {
|
|
var codePoint = Number(arguments[index])
|
|
if (
|
|
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
|
codePoint < 0 || // not a valid Unicode code point
|
|
codePoint > 0x10ffff || // not a valid Unicode code point
|
|
floor(codePoint) !== codePoint // not an integer
|
|
) {
|
|
throw RangeError('Invalid code point: ' + codePoint)
|
|
}
|
|
if (codePoint <= 0xffff) {
|
|
// BMP code point
|
|
codeUnits.push(codePoint)
|
|
} else {
|
|
// Astral code point; split in surrogate halves
|
|
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
codePoint -= 0x10000
|
|
highSurrogate = (codePoint >> 10) + 0xd800
|
|
lowSurrogate = (codePoint % 0x400) + 0xdc00
|
|
codeUnits.push(highSurrogate, lowSurrogate)
|
|
}
|
|
if (index + 1 === length || codeUnits.length > MAX_SIZE) {
|
|
result += stringFromCharCode.apply(null, codeUnits)
|
|
codeUnits.length = 0
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
/* istanbul ignore next */
|
|
if (Object.defineProperty) {
|
|
Object.defineProperty(String, 'fromCodePoint', {
|
|
value: fromCodePoint,
|
|
configurable: true,
|
|
writable: true,
|
|
})
|
|
} else {
|
|
String.fromCodePoint = fromCodePoint
|
|
}
|
|
})()
|
|
}
|
|
})(typeof exports === 'undefined' ? (this.sax = {}) : exports)
|