123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322 |
- var conventions = require("./conventions");
- var dom = require('./dom')
- var entities = require('./entities');
- var sax = require('./sax');
- var DOMImplementation = dom.DOMImplementation;
- var NAMESPACE = conventions.NAMESPACE;
- var ParseError = sax.ParseError;
- var XMLReader = sax.XMLReader;
- /**
- * Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends:
- *
- * > XML parsed entities are often stored in computer files which,
- * > for editing convenience, are organized into lines.
- * > These lines are typically separated by some combination
- * > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
- * >
- * > To simplify the tasks of applications, the XML processor must behave
- * > as if it normalized all line breaks in external parsed entities (including the document entity)
- * > on input, before parsing, by translating all of the following to a single #xA character:
- * >
- * > 1. the two-character sequence #xD #xA
- * > 2. the two-character sequence #xD #x85
- * > 3. the single character #x85
- * > 4. the single character #x2028
- * > 5. any #xD character that is not immediately followed by #xA or #x85.
- *
- * @param {string} input
- * @returns {string}
- */
- function normalizeLineEndings(input) {
- return input
- .replace(/\r[\n\u0085]/g, '\n')
- .replace(/[\r\u0085\u2028]/g, '\n')
- }
- /**
- * @typedef Locator
- * @property {number} [columnNumber]
- * @property {number} [lineNumber]
- */
- /**
- * @typedef DOMParserOptions
- * @property {DOMHandler} [domBuilder]
- * @property {Function} [errorHandler]
- * @property {(string) => string} [normalizeLineEndings] used to replace line endings before parsing
- * defaults to `normalizeLineEndings`
- * @property {Locator} [locator]
- * @property {Record<string, string>} [xmlns]
- *
- * @see normalizeLineEndings
- */
- /**
- * The DOMParser interface provides the ability to parse XML or HTML source code
- * from a string into a DOM `Document`.
- *
- * _xmldom is different from the spec in that it allows an `options` parameter,
- * to override the default behavior._
- *
- * @param {DOMParserOptions} [options]
- * @constructor
- *
- * @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
- * @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
- */
- function DOMParser(options){
- this.options = options ||{locator:{}};
- }
- DOMParser.prototype.parseFromString = function(source,mimeType){
- var options = this.options;
- var sax = new XMLReader();
- var domBuilder = options.domBuilder || new DOMHandler();//contentHandler and LexicalHandler
- var errorHandler = options.errorHandler;
- var locator = options.locator;
- var defaultNSMap = options.xmlns||{};
- var isHTML = /\/x?html?$/.test(mimeType);//mimeType.toLowerCase().indexOf('html') > -1;
- var entityMap = isHTML ? entities.HTML_ENTITIES : entities.XML_ENTITIES;
- if(locator){
- domBuilder.setDocumentLocator(locator)
- }
- sax.errorHandler = buildErrorHandler(errorHandler,domBuilder,locator);
- sax.domBuilder = options.domBuilder || domBuilder;
- if(isHTML){
- defaultNSMap[''] = NAMESPACE.HTML;
- }
- defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML;
- var normalize = options.normalizeLineEndings || normalizeLineEndings;
- if (source && typeof source === 'string') {
- sax.parse(
- normalize(source),
- defaultNSMap,
- entityMap
- )
- } else {
- sax.errorHandler.error('invalid doc source')
- }
- return domBuilder.doc;
- }
- function buildErrorHandler(errorImpl,domBuilder,locator){
- if(!errorImpl){
- if(domBuilder instanceof DOMHandler){
- return domBuilder;
- }
- errorImpl = domBuilder ;
- }
- var errorHandler = {}
- var isCallback = errorImpl instanceof Function;
- locator = locator||{}
- function build(key){
- var fn = errorImpl[key];
- if(!fn && isCallback){
- fn = errorImpl.length == 2?function(msg){errorImpl(key,msg)}:errorImpl;
- }
- errorHandler[key] = fn && function(msg){
- fn('[xmldom '+key+']\t'+msg+_locator(locator));
- }||function(){};
- }
- build('warning');
- build('error');
- build('fatalError');
- return errorHandler;
- }
- //console.log('#\n\n\n\n\n\n\n####')
- /**
- * +ContentHandler+ErrorHandler
- * +LexicalHandler+EntityResolver2
- * -DeclHandler-DTDHandler
- *
- * DefaultHandler:EntityResolver, DTDHandler, ContentHandler, ErrorHandler
- * DefaultHandler2:DefaultHandler,LexicalHandler, DeclHandler, EntityResolver2
- * @link http://www.saxproject.org/apidoc/org/xml/sax/helpers/DefaultHandler.html
- */
- function DOMHandler() {
- this.cdata = false;
- }
- function position(locator,node){
- node.lineNumber = locator.lineNumber;
- node.columnNumber = locator.columnNumber;
- }
- /**
- * @see org.xml.sax.ContentHandler#startDocument
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
- */
- DOMHandler.prototype = {
- startDocument : function() {
- this.doc = new DOMImplementation().createDocument(null, null, null);
- if (this.locator) {
- this.doc.documentURI = this.locator.systemId;
- }
- },
- startElement:function(namespaceURI, localName, qName, attrs) {
- var doc = this.doc;
- var el = doc.createElementNS(namespaceURI, qName||localName);
- var len = attrs.length;
- appendElement(this, el);
- this.currentElement = el;
- this.locator && position(this.locator,el)
- for (var i = 0 ; i < len; i++) {
- var namespaceURI = attrs.getURI(i);
- var value = attrs.getValue(i);
- var qName = attrs.getQName(i);
- var attr = doc.createAttributeNS(namespaceURI, qName);
- this.locator &&position(attrs.getLocator(i),attr);
- attr.value = attr.nodeValue = value;
- el.setAttributeNode(attr)
- }
- },
- endElement:function(namespaceURI, localName, qName) {
- var current = this.currentElement
- var tagName = current.tagName;
- this.currentElement = current.parentNode;
- },
- startPrefixMapping:function(prefix, uri) {
- },
- endPrefixMapping:function(prefix) {
- },
- processingInstruction:function(target, data) {
- var ins = this.doc.createProcessingInstruction(target, data);
- this.locator && position(this.locator,ins)
- appendElement(this, ins);
- },
- ignorableWhitespace:function(ch, start, length) {
- },
- characters:function(chars, start, length) {
- chars = _toString.apply(this,arguments)
- //console.log(chars)
- if(chars){
- if (this.cdata) {
- var charNode = this.doc.createCDATASection(chars);
- } else {
- var charNode = this.doc.createTextNode(chars);
- }
- if(this.currentElement){
- this.currentElement.appendChild(charNode);
- }else if(/^\s*$/.test(chars)){
- this.doc.appendChild(charNode);
- //process xml
- }
- this.locator && position(this.locator,charNode)
- }
- },
- skippedEntity:function(name) {
- },
- endDocument:function() {
- this.doc.normalize();
- },
- setDocumentLocator:function (locator) {
- if(this.locator = locator){// && !('lineNumber' in locator)){
- locator.lineNumber = 0;
- }
- },
- //LexicalHandler
- comment:function(chars, start, length) {
- chars = _toString.apply(this,arguments)
- var comm = this.doc.createComment(chars);
- this.locator && position(this.locator,comm)
- appendElement(this, comm);
- },
- startCDATA:function() {
- //used in characters() methods
- this.cdata = true;
- },
- endCDATA:function() {
- this.cdata = false;
- },
- startDTD:function(name, publicId, systemId) {
- var impl = this.doc.implementation;
- if (impl && impl.createDocumentType) {
- var dt = impl.createDocumentType(name, publicId, systemId);
- this.locator && position(this.locator,dt)
- appendElement(this, dt);
- this.doc.doctype = dt;
- }
- },
- /**
- * @see org.xml.sax.ErrorHandler
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html
- */
- warning:function(error) {
- console.warn('[xmldom warning]\t'+error,_locator(this.locator));
- },
- error:function(error) {
- console.error('[xmldom error]\t'+error,_locator(this.locator));
- },
- fatalError:function(error) {
- throw new ParseError(error, this.locator);
- }
- }
- function _locator(l){
- if(l){
- return '\n@'+(l.systemId ||'')+'#[line:'+l.lineNumber+',col:'+l.columnNumber+']'
- }
- }
- function _toString(chars,start,length){
- if(typeof chars == 'string'){
- return chars.substr(start,length)
- }else{//java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)")
- if(chars.length >= start+length || start){
- return new java.lang.String(chars,start,length)+'';
- }
- return chars;
- }
- }
- /*
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html
- * used method of org.xml.sax.ext.LexicalHandler:
- * #comment(chars, start, length)
- * #startCDATA()
- * #endCDATA()
- * #startDTD(name, publicId, systemId)
- *
- *
- * IGNORED method of org.xml.sax.ext.LexicalHandler:
- * #endDTD()
- * #startEntity(name)
- * #endEntity(name)
- *
- *
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html
- * IGNORED method of org.xml.sax.ext.DeclHandler
- * #attributeDecl(eName, aName, type, mode, value)
- * #elementDecl(name, model)
- * #externalEntityDecl(name, publicId, systemId)
- * #internalEntityDecl(name, value)
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html
- * IGNORED method of org.xml.sax.EntityResolver2
- * #resolveEntity(String name,String publicId,String baseURI,String systemId)
- * #resolveEntity(publicId, systemId)
- * #getExternalSubset(name, baseURI)
- * @link http://www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html
- * IGNORED method of org.xml.sax.DTDHandler
- * #notationDecl(name, publicId, systemId) {};
- * #unparsedEntityDecl(name, publicId, systemId, notationName) {};
- */
- "endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl".replace(/\w+/g,function(key){
- DOMHandler.prototype[key] = function(){return null}
- })
- /* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */
- function appendElement (hander,node) {
- if (!hander.currentElement) {
- hander.doc.appendChild(node);
- } else {
- hander.currentElement.appendChild(node);
- }
- }//appendChild and setAttributeNS are preformance key
- exports.__DOMHandler = DOMHandler;
- exports.normalizeLineEndings = normalizeLineEndings;
- exports.DOMParser = DOMParser;
|