sax.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. var NAMESPACE = require("./conventions").NAMESPACE;
  2. //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
  3. //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
  4. //[5] Name ::= NameStartChar (NameChar)*
  5. var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
  6. var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
  7. var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
  8. //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
  9. //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
  10. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  11. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  12. var S_TAG = 0;//tag name offerring
  13. var S_ATTR = 1;//attr name offerring
  14. var S_ATTR_SPACE=2;//attr name end and space offer
  15. var S_EQ = 3;//=space?
  16. var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
  17. var S_ATTR_END = 5;//attr value end and no space(quot end)
  18. var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
  19. var S_TAG_CLOSE = 7;//closed el<el />
  20. /**
  21. * Creates an error that will not be caught by XMLReader aka the SAX parser.
  22. *
  23. * @param {string} message
  24. * @param {any?} locator Optional, can provide details about the location in the source
  25. * @constructor
  26. */
  27. function ParseError(message, locator) {
  28. this.message = message
  29. this.locator = locator
  30. if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
  31. }
  32. ParseError.prototype = new Error();
  33. ParseError.prototype.name = ParseError.name
  34. function XMLReader(){
  35. }
  36. XMLReader.prototype = {
  37. parse:function(source,defaultNSMap,entityMap){
  38. var domBuilder = this.domBuilder;
  39. domBuilder.startDocument();
  40. _copy(defaultNSMap ,defaultNSMap = {})
  41. parse(source,defaultNSMap,entityMap,
  42. domBuilder,this.errorHandler);
  43. domBuilder.endDocument();
  44. }
  45. }
  46. function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
  47. function fixedFromCharCode(code) {
  48. // String.prototype.fromCharCode does not supports
  49. // > 2 bytes unicode chars directly
  50. if (code > 0xffff) {
  51. code -= 0x10000;
  52. var surrogate1 = 0xd800 + (code >> 10)
  53. , surrogate2 = 0xdc00 + (code & 0x3ff);
  54. return String.fromCharCode(surrogate1, surrogate2);
  55. } else {
  56. return String.fromCharCode(code);
  57. }
  58. }
  59. function entityReplacer(a){
  60. var k = a.slice(1,-1);
  61. if (Object.hasOwnProperty.call(entityMap, k)) {
  62. return entityMap[k];
  63. }else if(k.charAt(0) === '#'){
  64. return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
  65. }else{
  66. errorHandler.error('entity not found:'+a);
  67. return a;
  68. }
  69. }
  70. function appendText(end){//has some bugs
  71. if(end>start){
  72. var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
  73. locator&&position(start);
  74. domBuilder.characters(xt,0,end-start);
  75. start = end
  76. }
  77. }
  78. function position(p,m){
  79. while(p>=lineEnd && (m = linePattern.exec(source))){
  80. lineStart = m.index;
  81. lineEnd = lineStart + m[0].length;
  82. locator.lineNumber++;
  83. //console.log('line++:',locator,startPos,endPos)
  84. }
  85. locator.columnNumber = p-lineStart+1;
  86. }
  87. var lineStart = 0;
  88. var lineEnd = 0;
  89. var linePattern = /.*(?:\r\n?|\n)|.*$/g
  90. var locator = domBuilder.locator;
  91. var parseStack = [{currentNSMap:defaultNSMapCopy}]
  92. var closeMap = {};
  93. var start = 0;
  94. while(true){
  95. try{
  96. var tagStart = source.indexOf('<',start);
  97. if(tagStart<0){
  98. if(!source.substr(start).match(/^\s*$/)){
  99. var doc = domBuilder.doc;
  100. var text = doc.createTextNode(source.substr(start));
  101. doc.appendChild(text);
  102. domBuilder.currentElement = text;
  103. }
  104. return;
  105. }
  106. if(tagStart>start){
  107. appendText(tagStart);
  108. }
  109. switch(source.charAt(tagStart+1)){
  110. case '/':
  111. var end = source.indexOf('>',tagStart+3);
  112. var tagName = source.substring(tagStart + 2, end).replace(/[ \t\n\r]+$/g, '');
  113. var config = parseStack.pop();
  114. if(end<0){
  115. tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
  116. errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
  117. end = tagStart+1+tagName.length;
  118. }else if(tagName.match(/\s</)){
  119. tagName = tagName.replace(/[\s<].*/,'');
  120. errorHandler.error("end tag name: "+tagName+' maybe not complete');
  121. end = tagStart+1+tagName.length;
  122. }
  123. var localNSMap = config.localNSMap;
  124. var endMatch = config.tagName == tagName;
  125. var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
  126. if(endIgnoreCaseMach){
  127. domBuilder.endElement(config.uri,config.localName,tagName);
  128. if(localNSMap){
  129. for(var prefix in localNSMap){
  130. domBuilder.endPrefixMapping(prefix) ;
  131. }
  132. }
  133. if(!endMatch){
  134. errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
  135. }
  136. }else{
  137. parseStack.push(config)
  138. }
  139. end++;
  140. break;
  141. // end elment
  142. case '?':// <?...?>
  143. locator&&position(tagStart);
  144. end = parseInstruction(source,tagStart,domBuilder);
  145. break;
  146. case '!':// <!doctype,<![CDATA,<!--
  147. locator&&position(tagStart);
  148. end = parseDCC(source,tagStart,domBuilder,errorHandler);
  149. break;
  150. default:
  151. locator&&position(tagStart);
  152. var el = new ElementAttributes();
  153. var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  154. //elStartEnd
  155. var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler);
  156. var len = el.length;
  157. if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
  158. el.closed = true;
  159. if(!entityMap.nbsp){
  160. errorHandler.warning('unclosed xml attribute');
  161. }
  162. }
  163. if(locator && len){
  164. var locator2 = copyLocator(locator,{});
  165. //try{//attribute position fixed
  166. for(var i = 0;i<len;i++){
  167. var a = el[i];
  168. position(a.offset);
  169. a.locator = copyLocator(locator,{});
  170. }
  171. domBuilder.locator = locator2
  172. if(appendElement(el,domBuilder,currentNSMap)){
  173. parseStack.push(el)
  174. }
  175. domBuilder.locator = locator;
  176. }else{
  177. if(appendElement(el,domBuilder,currentNSMap)){
  178. parseStack.push(el)
  179. }
  180. }
  181. if (NAMESPACE.isHTML(el.uri) && !el.closed) {
  182. end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
  183. } else {
  184. end++;
  185. }
  186. }
  187. }catch(e){
  188. if (e instanceof ParseError) {
  189. throw e;
  190. }
  191. errorHandler.error('element parse error: '+e)
  192. end = -1;
  193. }
  194. if(end>start){
  195. start = end;
  196. }else{
  197. //TODO: 这里有可能sax回退,有位置错误风险
  198. appendText(Math.max(tagStart,start)+1);
  199. }
  200. }
  201. }
  202. function copyLocator(f,t){
  203. t.lineNumber = f.lineNumber;
  204. t.columnNumber = f.columnNumber;
  205. return t;
  206. }
  207. /**
  208. * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
  209. * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
  210. */
  211. function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){
  212. /**
  213. * @param {string} qname
  214. * @param {string} value
  215. * @param {number} startIndex
  216. */
  217. function addAttribute(qname, value, startIndex) {
  218. if (el.attributeNames.hasOwnProperty(qname)) {
  219. errorHandler.fatalError('Attribute ' + qname + ' redefined')
  220. }
  221. el.addValue(
  222. qname,
  223. // @see https://www.w3.org/TR/xml/#AVNormalize
  224. // since the xmldom sax parser does not "interpret" DTD the following is not implemented:
  225. // - recursive replacement of (DTD) entity references
  226. // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
  227. value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
  228. startIndex
  229. )
  230. }
  231. var attrName;
  232. var value;
  233. var p = ++start;
  234. var s = S_TAG;//status
  235. while(true){
  236. var c = source.charAt(p);
  237. switch(c){
  238. case '=':
  239. if(s === S_ATTR){//attrName
  240. attrName = source.slice(start,p);
  241. s = S_EQ;
  242. }else if(s === S_ATTR_SPACE){
  243. s = S_EQ;
  244. }else{
  245. //fatalError: equal must after attrName or space after attrName
  246. throw new Error('attribute equal must after attrName'); // No known test case
  247. }
  248. break;
  249. case '\'':
  250. case '"':
  251. if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
  252. ){//equal
  253. if(s === S_ATTR){
  254. errorHandler.warning('attribute value must after "="')
  255. attrName = source.slice(start,p)
  256. }
  257. start = p+1;
  258. p = source.indexOf(c,start)
  259. if(p>0){
  260. value = source.slice(start, p);
  261. addAttribute(attrName, value, start-1);
  262. s = S_ATTR_END;
  263. }else{
  264. //fatalError: no end quot match
  265. throw new Error('attribute value no end \''+c+'\' match');
  266. }
  267. }else if(s == S_ATTR_NOQUOT_VALUE){
  268. value = source.slice(start, p);
  269. addAttribute(attrName, value, start);
  270. errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
  271. start = p+1;
  272. s = S_ATTR_END
  273. }else{
  274. //fatalError: no equal before
  275. throw new Error('attribute value must after "="'); // No known test case
  276. }
  277. break;
  278. case '/':
  279. switch(s){
  280. case S_TAG:
  281. el.setTagName(source.slice(start,p));
  282. case S_ATTR_END:
  283. case S_TAG_SPACE:
  284. case S_TAG_CLOSE:
  285. s =S_TAG_CLOSE;
  286. el.closed = true;
  287. case S_ATTR_NOQUOT_VALUE:
  288. case S_ATTR:
  289. case S_ATTR_SPACE:
  290. break;
  291. //case S_EQ:
  292. default:
  293. throw new Error("attribute invalid close char('/')") // No known test case
  294. }
  295. break;
  296. case ''://end document
  297. errorHandler.error('unexpected end of input');
  298. if(s == S_TAG){
  299. el.setTagName(source.slice(start,p));
  300. }
  301. return p;
  302. case '>':
  303. switch(s){
  304. case S_TAG:
  305. el.setTagName(source.slice(start,p));
  306. case S_ATTR_END:
  307. case S_TAG_SPACE:
  308. case S_TAG_CLOSE:
  309. break;//normal
  310. case S_ATTR_NOQUOT_VALUE://Compatible state
  311. case S_ATTR:
  312. value = source.slice(start,p);
  313. if(value.slice(-1) === '/'){
  314. el.closed = true;
  315. value = value.slice(0,-1)
  316. }
  317. case S_ATTR_SPACE:
  318. if(s === S_ATTR_SPACE){
  319. value = attrName;
  320. }
  321. if(s == S_ATTR_NOQUOT_VALUE){
  322. errorHandler.warning('attribute "'+value+'" missed quot(")!');
  323. addAttribute(attrName, value, start)
  324. }else{
  325. if(!NAMESPACE.isHTML(currentNSMap['']) || !value.match(/^(?:disabled|checked|selected)$/i)){
  326. errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
  327. }
  328. addAttribute(value, value, start)
  329. }
  330. break;
  331. case S_EQ:
  332. throw new Error('attribute value missed!!');
  333. }
  334. // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
  335. return p;
  336. /*xml space '\x20' | #x9 | #xD | #xA; */
  337. case '\u0080':
  338. c = ' ';
  339. default:
  340. if(c<= ' '){//space
  341. switch(s){
  342. case S_TAG:
  343. el.setTagName(source.slice(start,p));//tagName
  344. s = S_TAG_SPACE;
  345. break;
  346. case S_ATTR:
  347. attrName = source.slice(start,p)
  348. s = S_ATTR_SPACE;
  349. break;
  350. case S_ATTR_NOQUOT_VALUE:
  351. var value = source.slice(start, p);
  352. errorHandler.warning('attribute "'+value+'" missed quot(")!!');
  353. addAttribute(attrName, value, start)
  354. case S_ATTR_END:
  355. s = S_TAG_SPACE;
  356. break;
  357. //case S_TAG_SPACE:
  358. //case S_EQ:
  359. //case S_ATTR_SPACE:
  360. // void();break;
  361. //case S_TAG_CLOSE:
  362. //ignore warning
  363. }
  364. }else{//not space
  365. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  366. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  367. switch(s){
  368. //case S_TAG:void();break;
  369. //case S_ATTR:void();break;
  370. //case S_ATTR_NOQUOT_VALUE:void();break;
  371. case S_ATTR_SPACE:
  372. var tagName = el.tagName;
  373. if (!NAMESPACE.isHTML(currentNSMap['']) || !attrName.match(/^(?:disabled|checked|selected)$/i)) {
  374. errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
  375. }
  376. addAttribute(attrName, attrName, start);
  377. start = p;
  378. s = S_ATTR;
  379. break;
  380. case S_ATTR_END:
  381. errorHandler.warning('attribute space is required"'+attrName+'"!!')
  382. case S_TAG_SPACE:
  383. s = S_ATTR;
  384. start = p;
  385. break;
  386. case S_EQ:
  387. s = S_ATTR_NOQUOT_VALUE;
  388. start = p;
  389. break;
  390. case S_TAG_CLOSE:
  391. throw new Error("elements closed character '/' and '>' must be connected to");
  392. }
  393. }
  394. }//end outer switch
  395. //console.log('p++',p)
  396. p++;
  397. }
  398. }
  399. /**
  400. * @return true if has new namespace define
  401. */
  402. function appendElement(el,domBuilder,currentNSMap){
  403. var tagName = el.tagName;
  404. var localNSMap = null;
  405. //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  406. var i = el.length;
  407. while(i--){
  408. var a = el[i];
  409. var qName = a.qName;
  410. var value = a.value;
  411. var nsp = qName.indexOf(':');
  412. if(nsp>0){
  413. var prefix = a.prefix = qName.slice(0,nsp);
  414. var localName = qName.slice(nsp+1);
  415. var nsPrefix = prefix === 'xmlns' && localName
  416. }else{
  417. localName = qName;
  418. prefix = null
  419. nsPrefix = qName === 'xmlns' && ''
  420. }
  421. //can not set prefix,because prefix !== ''
  422. a.localName = localName ;
  423. //prefix == null for no ns prefix attribute
  424. if(nsPrefix !== false){//hack!!
  425. if(localNSMap == null){
  426. localNSMap = {}
  427. //console.log(currentNSMap,0)
  428. _copy(currentNSMap,currentNSMap={})
  429. //console.log(currentNSMap,1)
  430. }
  431. currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
  432. a.uri = NAMESPACE.XMLNS
  433. domBuilder.startPrefixMapping(nsPrefix, value)
  434. }
  435. }
  436. var i = el.length;
  437. while(i--){
  438. a = el[i];
  439. var prefix = a.prefix;
  440. if(prefix){//no prefix attribute has no namespace
  441. if(prefix === 'xml'){
  442. a.uri = NAMESPACE.XML;
  443. }if(prefix !== 'xmlns'){
  444. a.uri = currentNSMap[prefix || '']
  445. //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)}
  446. }
  447. }
  448. }
  449. var nsp = tagName.indexOf(':');
  450. if(nsp>0){
  451. prefix = el.prefix = tagName.slice(0,nsp);
  452. localName = el.localName = tagName.slice(nsp+1);
  453. }else{
  454. prefix = null;//important!!
  455. localName = el.localName = tagName;
  456. }
  457. //no prefix element has default namespace
  458. var ns = el.uri = currentNSMap[prefix || ''];
  459. domBuilder.startElement(ns,localName,tagName,el);
  460. //endPrefixMapping and startPrefixMapping have not any help for dom builder
  461. //localNSMap = null
  462. if(el.closed){
  463. domBuilder.endElement(ns,localName,tagName);
  464. if(localNSMap){
  465. for(prefix in localNSMap){
  466. domBuilder.endPrefixMapping(prefix)
  467. }
  468. }
  469. }else{
  470. el.currentNSMap = currentNSMap;
  471. el.localNSMap = localNSMap;
  472. //parseStack.push(el);
  473. return true;
  474. }
  475. }
  476. function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
  477. if(/^(?:script|textarea)$/i.test(tagName)){
  478. var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
  479. var text = source.substring(elStartEnd+1,elEndStart);
  480. if(/[&<]/.test(text)){
  481. if(/^script$/i.test(tagName)){
  482. //if(!/\]\]>/.test(text)){
  483. //lexHandler.startCDATA();
  484. domBuilder.characters(text,0,text.length);
  485. //lexHandler.endCDATA();
  486. return elEndStart;
  487. //}
  488. }//}else{//text area
  489. text = text.replace(/&#?\w+;/g,entityReplacer);
  490. domBuilder.characters(text,0,text.length);
  491. return elEndStart;
  492. //}
  493. }
  494. }
  495. return elStartEnd+1;
  496. }
  497. function fixSelfClosed(source,elStartEnd,tagName,closeMap){
  498. //if(tagName in closeMap){
  499. var pos = closeMap[tagName];
  500. if(pos == null){
  501. //console.log(tagName)
  502. pos = source.lastIndexOf('</'+tagName+'>')
  503. if(pos<elStartEnd){//忘记闭合
  504. pos = source.lastIndexOf('</'+tagName)
  505. }
  506. closeMap[tagName] =pos
  507. }
  508. return pos<elStartEnd;
  509. //}
  510. }
  511. function _copy(source,target){
  512. for(var n in source){target[n] = source[n]}
  513. }
  514. function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
  515. var next= source.charAt(start+2)
  516. switch(next){
  517. case '-':
  518. if(source.charAt(start + 3) === '-'){
  519. var end = source.indexOf('-->',start+4);
  520. //append comment source.substring(4,end)//<!--
  521. if(end>start){
  522. domBuilder.comment(source,start+4,end-start-4);
  523. return end+3;
  524. }else{
  525. errorHandler.error("Unclosed comment");
  526. return -1;
  527. }
  528. }else{
  529. //error
  530. return -1;
  531. }
  532. default:
  533. if(source.substr(start+3,6) == 'CDATA['){
  534. var end = source.indexOf(']]>',start+9);
  535. domBuilder.startCDATA();
  536. domBuilder.characters(source,start+9,end-start-9);
  537. domBuilder.endCDATA()
  538. return end+3;
  539. }
  540. //<!DOCTYPE
  541. //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
  542. var matchs = split(source,start);
  543. var len = matchs.length;
  544. if(len>1 && /!doctype/i.test(matchs[0][0])){
  545. var name = matchs[1][0];
  546. var pubid = false;
  547. var sysid = false;
  548. if(len>3){
  549. if(/^public$/i.test(matchs[2][0])){
  550. pubid = matchs[3][0];
  551. sysid = len>4 && matchs[4][0];
  552. }else if(/^system$/i.test(matchs[2][0])){
  553. sysid = matchs[3][0];
  554. }
  555. }
  556. var lastMatch = matchs[len-1]
  557. domBuilder.startDTD(name, pubid, sysid);
  558. domBuilder.endDTD();
  559. return lastMatch.index+lastMatch[0].length
  560. }
  561. }
  562. return -1;
  563. }
  564. function parseInstruction(source,start,domBuilder){
  565. var end = source.indexOf('?>',start);
  566. if(end){
  567. var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
  568. if(match){
  569. var len = match[0].length;
  570. domBuilder.processingInstruction(match[1], match[2]) ;
  571. return end+2;
  572. }else{//error
  573. return -1;
  574. }
  575. }
  576. return -1;
  577. }
  578. function ElementAttributes(){
  579. this.attributeNames = {}
  580. }
  581. ElementAttributes.prototype = {
  582. setTagName:function(tagName){
  583. if(!tagNamePattern.test(tagName)){
  584. throw new Error('invalid tagName:'+tagName)
  585. }
  586. this.tagName = tagName
  587. },
  588. addValue:function(qName, value, offset) {
  589. if(!tagNamePattern.test(qName)){
  590. throw new Error('invalid attribute:'+qName)
  591. }
  592. this.attributeNames[qName] = this.length;
  593. this[this.length++] = {qName:qName,value:value,offset:offset}
  594. },
  595. length:0,
  596. getLocalName:function(i){return this[i].localName},
  597. getLocator:function(i){return this[i].locator},
  598. getQName:function(i){return this[i].qName},
  599. getURI:function(i){return this[i].uri},
  600. getValue:function(i){return this[i].value}
  601. // ,getIndex:function(uri, localName)){
  602. // if(localName){
  603. //
  604. // }else{
  605. // var qName = uri
  606. // }
  607. // },
  608. // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
  609. // getType:function(uri,localName){}
  610. // getType:function(i){},
  611. }
  612. function split(source,start){
  613. var match;
  614. var buf = [];
  615. var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
  616. reg.lastIndex = start;
  617. reg.exec(source);//skip <
  618. while(match = reg.exec(source)){
  619. buf.push(match);
  620. if(match[1])return buf;
  621. }
  622. }
  623. exports.XMLReader = XMLReader;
  624. exports.ParseError = ParseError;