323 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			323 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
|   | var conventions = require("./conventions"); | ||
|  | var dom = require('./dom') | ||
|  | var entities = require('./entities'); | ||
|  | var sax = require('./sax'); | ||
|  | 
 | ||
|  | var DOMImplementation = dom.DOMImplementation; | ||
|  | 
 | ||
|  | var NAMESPACE = conventions.NAMESPACE; | ||
|  | 
 | ||
|  | var ParseError = sax.ParseError; | ||
|  | var XMLReader = sax.XMLReader; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends:
 | ||
|  |  * | ||
|  |  * > XML parsed entities are often stored in computer files which, | ||
|  |  * > for editing convenience, are organized into lines. | ||
|  |  * > These lines are typically separated by some combination | ||
|  |  * > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA). | ||
|  |  * > | ||
|  |  * > To simplify the tasks of applications, the XML processor must behave | ||
|  |  * > as if it normalized all line breaks in external parsed entities (including the document entity) | ||
|  |  * > on input, before parsing, by translating all of the following to a single #xA character: | ||
|  |  * > | ||
|  |  * > 1. the two-character sequence #xD #xA | ||
|  |  * > 2. the two-character sequence #xD #x85 | ||
|  |  * > 3. the single character #x85 | ||
|  |  * > 4. the single character #x2028 | ||
|  |  * > 5. any #xD character that is not immediately followed by #xA or #x85. | ||
|  |  * | ||
|  |  * @param {string} input | ||
|  |  * @returns {string} | ||
|  |  */ | ||
|  | function normalizeLineEndings(input) { | ||
|  | 	return input | ||
|  | 		.replace(/\r[\n\u0085]/g, '\n') | ||
|  | 		.replace(/[\r\u0085\u2028]/g, '\n') | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * @typedef Locator | ||
|  |  * @property {number} [columnNumber] | ||
|  |  * @property {number} [lineNumber] | ||
|  |  */ | ||
|  | 
 | ||
|  | /** | ||
|  |  * @typedef DOMParserOptions | ||
|  |  * @property {DOMHandler} [domBuilder] | ||
|  |  * @property {Function} [errorHandler] | ||
|  |  * @property {(string) => string} [normalizeLineEndings] used to replace line endings before parsing | ||
|  |  * 						defaults to `normalizeLineEndings` | ||
|  |  * @property {Locator} [locator] | ||
|  |  * @property {Record<string, string>} [xmlns] | ||
|  |  * | ||
|  |  * @see normalizeLineEndings | ||
|  |  */ | ||
|  | 
 | ||
|  | /** | ||
|  |  * The DOMParser interface provides the ability to parse XML or HTML source code | ||
|  |  * from a string into a DOM `Document`. | ||
|  |  * | ||
|  |  * _xmldom is different from the spec in that it allows an `options` parameter, | ||
|  |  * to override the default behavior._ | ||
|  |  * | ||
|  |  * @param {DOMParserOptions} [options] | ||
|  |  * @constructor | ||
|  |  * | ||
|  |  * @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
 | ||
|  |  * @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
 | ||
|  |  */ | ||
|  | function DOMParser(options){ | ||
|  | 	this.options = options ||{locator:{}}; | ||
|  | } | ||
|  | 
 | ||
|  | DOMParser.prototype.parseFromString = function(source,mimeType){ | ||
|  | 	var options = this.options; | ||
|  | 	var sax =  new XMLReader(); | ||
|  | 	var domBuilder = options.domBuilder || new DOMHandler();//contentHandler and LexicalHandler
 | ||
|  | 	var errorHandler = options.errorHandler; | ||
|  | 	var locator = options.locator; | ||
|  | 	var defaultNSMap = options.xmlns||{}; | ||
|  | 	var isHTML = /\/x?html?$/.test(mimeType);//mimeType.toLowerCase().indexOf('html') > -1;
 | ||
|  |   	var entityMap = isHTML ? entities.HTML_ENTITIES : entities.XML_ENTITIES; | ||
|  | 	if(locator){ | ||
|  | 		domBuilder.setDocumentLocator(locator) | ||
|  | 	} | ||
|  | 
 | ||
|  | 	sax.errorHandler = buildErrorHandler(errorHandler,domBuilder,locator); | ||
|  | 	sax.domBuilder = options.domBuilder || domBuilder; | ||
|  | 	if(isHTML){ | ||
|  | 		defaultNSMap[''] = NAMESPACE.HTML; | ||
|  | 	} | ||
|  | 	defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML; | ||
|  | 	var normalize = options.normalizeLineEndings || normalizeLineEndings; | ||
|  | 	if (source && typeof source === 'string') { | ||
|  | 		sax.parse( | ||
|  | 			normalize(source), | ||
|  | 			defaultNSMap, | ||
|  | 			entityMap | ||
|  | 		) | ||
|  | 	} else { | ||
|  | 		sax.errorHandler.error('invalid doc source') | ||
|  | 	} | ||
|  | 	return domBuilder.doc; | ||
|  | } | ||
|  | function buildErrorHandler(errorImpl,domBuilder,locator){ | ||
|  | 	if(!errorImpl){ | ||
|  | 		if(domBuilder instanceof DOMHandler){ | ||
|  | 			return domBuilder; | ||
|  | 		} | ||
|  | 		errorImpl = domBuilder ; | ||
|  | 	} | ||
|  | 	var errorHandler = {} | ||
|  | 	var isCallback = errorImpl instanceof Function; | ||
|  | 	locator = locator||{} | ||
|  | 	function build(key){ | ||
|  | 		var fn = errorImpl[key]; | ||
|  | 		if(!fn && isCallback){ | ||
|  | 			fn = errorImpl.length == 2?function(msg){errorImpl(key,msg)}:errorImpl; | ||
|  | 		} | ||
|  | 		errorHandler[key] = fn && function(msg){ | ||
|  | 			fn('[xmldom '+key+']\t'+msg+_locator(locator)); | ||
|  | 		}||function(){}; | ||
|  | 	} | ||
|  | 	build('warning'); | ||
|  | 	build('error'); | ||
|  | 	build('fatalError'); | ||
|  | 	return errorHandler; | ||
|  | } | ||
|  | 
 | ||
|  | //console.log('#\n\n\n\n\n\n\n####')
 | ||
|  | /** | ||
|  |  * +ContentHandler+ErrorHandler | ||
|  |  * +LexicalHandler+EntityResolver2 | ||
|  |  * -DeclHandler-DTDHandler | ||
|  |  * | ||
|  |  * DefaultHandler:EntityResolver, DTDHandler, ContentHandler, ErrorHandler | ||
|  |  * DefaultHandler2:DefaultHandler,LexicalHandler, DeclHandler, EntityResolver2 | ||
|  |  * @link http://www.saxproject.org/apidoc/org/xml/sax/helpers/DefaultHandler.html
 | ||
|  |  */ | ||
|  | function DOMHandler() { | ||
|  |     this.cdata = false; | ||
|  | } | ||
|  | function position(locator,node){ | ||
|  | 	node.lineNumber = locator.lineNumber; | ||
|  | 	node.columnNumber = locator.columnNumber; | ||
|  | } | ||
|  | /** | ||
|  |  * @see org.xml.sax.ContentHandler#startDocument | ||
|  |  * @link http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
 | ||
|  |  */ | ||
|  | DOMHandler.prototype = { | ||
|  | 	startDocument : function() { | ||
|  |     	this.doc = new DOMImplementation().createDocument(null, null, null); | ||
|  |     	if (this.locator) { | ||
|  |         	this.doc.documentURI = this.locator.systemId; | ||
|  |     	} | ||
|  | 	}, | ||
|  | 	startElement:function(namespaceURI, localName, qName, attrs) { | ||
|  | 		var doc = this.doc; | ||
|  | 	    var el = doc.createElementNS(namespaceURI, qName||localName); | ||
|  | 	    var len = attrs.length; | ||
|  | 	    appendElement(this, el); | ||
|  | 	    this.currentElement = el; | ||
|  | 
 | ||
|  | 		this.locator && position(this.locator,el) | ||
|  | 	    for (var i = 0 ; i < len; i++) { | ||
|  | 	        var namespaceURI = attrs.getURI(i); | ||
|  | 	        var value = attrs.getValue(i); | ||
|  | 	        var qName = attrs.getQName(i); | ||
|  | 			var attr = doc.createAttributeNS(namespaceURI, qName); | ||
|  | 			this.locator &&position(attrs.getLocator(i),attr); | ||
|  | 			attr.value = attr.nodeValue = value; | ||
|  | 			el.setAttributeNode(attr) | ||
|  | 	    } | ||
|  | 	}, | ||
|  | 	endElement:function(namespaceURI, localName, qName) { | ||
|  | 		var current = this.currentElement | ||
|  | 		var tagName = current.tagName; | ||
|  | 		this.currentElement = current.parentNode; | ||
|  | 	}, | ||
|  | 	startPrefixMapping:function(prefix, uri) { | ||
|  | 	}, | ||
|  | 	endPrefixMapping:function(prefix) { | ||
|  | 	}, | ||
|  | 	processingInstruction:function(target, data) { | ||
|  | 	    var ins = this.doc.createProcessingInstruction(target, data); | ||
|  | 	    this.locator && position(this.locator,ins) | ||
|  | 	    appendElement(this, ins); | ||
|  | 	}, | ||
|  | 	ignorableWhitespace:function(ch, start, length) { | ||
|  | 	}, | ||
|  | 	characters:function(chars, start, length) { | ||
|  | 		chars = _toString.apply(this,arguments) | ||
|  | 		//console.log(chars)
 | ||
|  | 		if(chars){ | ||
|  | 			if (this.cdata) { | ||
|  | 				var charNode = this.doc.createCDATASection(chars); | ||
|  | 			} else { | ||
|  | 				var charNode = this.doc.createTextNode(chars); | ||
|  | 			} | ||
|  | 			if(this.currentElement){ | ||
|  | 				this.currentElement.appendChild(charNode); | ||
|  | 			}else if(/^\s*$/.test(chars)){ | ||
|  | 				this.doc.appendChild(charNode); | ||
|  | 				//process xml
 | ||
|  | 			} | ||
|  | 			this.locator && position(this.locator,charNode) | ||
|  | 		} | ||
|  | 	}, | ||
|  | 	skippedEntity:function(name) { | ||
|  | 	}, | ||
|  | 	endDocument:function() { | ||
|  | 		this.doc.normalize(); | ||
|  | 	}, | ||
|  | 	setDocumentLocator:function (locator) { | ||
|  | 	    if(this.locator = locator){// && !('lineNumber' in locator)){
 | ||
|  | 	    	locator.lineNumber = 0; | ||
|  | 	    } | ||
|  | 	}, | ||
|  | 	//LexicalHandler
 | ||
|  | 	comment:function(chars, start, length) { | ||
|  | 		chars = _toString.apply(this,arguments) | ||
|  | 	    var comm = this.doc.createComment(chars); | ||
|  | 	    this.locator && position(this.locator,comm) | ||
|  | 	    appendElement(this, comm); | ||
|  | 	}, | ||
|  | 
 | ||
|  | 	startCDATA:function() { | ||
|  | 	    //used in characters() methods
 | ||
|  | 	    this.cdata = true; | ||
|  | 	}, | ||
|  | 	endCDATA:function() { | ||
|  | 	    this.cdata = false; | ||
|  | 	}, | ||
|  | 
 | ||
|  | 	startDTD:function(name, publicId, systemId) { | ||
|  | 		var impl = this.doc.implementation; | ||
|  | 	    if (impl && impl.createDocumentType) { | ||
|  | 	        var dt = impl.createDocumentType(name, publicId, systemId); | ||
|  | 	        this.locator && position(this.locator,dt) | ||
|  | 	        appendElement(this, dt); | ||
|  | 					this.doc.doctype = dt; | ||
|  | 	    } | ||
|  | 	}, | ||
|  | 	/** | ||
|  | 	 * @see org.xml.sax.ErrorHandler | ||
|  | 	 * @link http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html
 | ||
|  | 	 */ | ||
|  | 	warning:function(error) { | ||
|  | 		console.warn('[xmldom warning]\t'+error,_locator(this.locator)); | ||
|  | 	}, | ||
|  | 	error:function(error) { | ||
|  | 		console.error('[xmldom error]\t'+error,_locator(this.locator)); | ||
|  | 	}, | ||
|  | 	fatalError:function(error) { | ||
|  | 		throw new ParseError(error, this.locator); | ||
|  | 	} | ||
|  | } | ||
|  | function _locator(l){ | ||
|  | 	if(l){ | ||
|  | 		return '\n@'+(l.systemId ||'')+'#[line:'+l.lineNumber+',col:'+l.columnNumber+']' | ||
|  | 	} | ||
|  | } | ||
|  | function _toString(chars,start,length){ | ||
|  | 	if(typeof chars == 'string'){ | ||
|  | 		return chars.substr(start,length) | ||
|  | 	}else{//java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)")
 | ||
|  | 		if(chars.length >= start+length || start){ | ||
|  | 			return new java.lang.String(chars,start,length)+''; | ||
|  | 		} | ||
|  | 		return chars; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | /* | ||
|  |  * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html
 | ||
|  |  * used method of org.xml.sax.ext.LexicalHandler: | ||
|  |  *  #comment(chars, start, length) | ||
|  |  *  #startCDATA() | ||
|  |  *  #endCDATA() | ||
|  |  *  #startDTD(name, publicId, systemId) | ||
|  |  * | ||
|  |  * | ||
|  |  * IGNORED method of org.xml.sax.ext.LexicalHandler: | ||
|  |  *  #endDTD() | ||
|  |  *  #startEntity(name) | ||
|  |  *  #endEntity(name) | ||
|  |  * | ||
|  |  * | ||
|  |  * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html
 | ||
|  |  * IGNORED method of org.xml.sax.ext.DeclHandler | ||
|  |  * 	#attributeDecl(eName, aName, type, mode, value) | ||
|  |  *  #elementDecl(name, model) | ||
|  |  *  #externalEntityDecl(name, publicId, systemId) | ||
|  |  *  #internalEntityDecl(name, value) | ||
|  |  * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html
 | ||
|  |  * IGNORED method of org.xml.sax.EntityResolver2 | ||
|  |  *  #resolveEntity(String name,String publicId,String baseURI,String systemId) | ||
|  |  *  #resolveEntity(publicId, systemId) | ||
|  |  *  #getExternalSubset(name, baseURI) | ||
|  |  * @link http://www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html
 | ||
|  |  * IGNORED method of org.xml.sax.DTDHandler | ||
|  |  *  #notationDecl(name, publicId, systemId) {}; | ||
|  |  *  #unparsedEntityDecl(name, publicId, systemId, notationName) {}; | ||
|  |  */ | ||
|  | "endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl".replace(/\w+/g,function(key){ | ||
|  | 	DOMHandler.prototype[key] = function(){return null} | ||
|  | }) | ||
|  | 
 | ||
|  | /* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */ | ||
|  | function appendElement (hander,node) { | ||
|  |     if (!hander.currentElement) { | ||
|  |         hander.doc.appendChild(node); | ||
|  |     } else { | ||
|  |         hander.currentElement.appendChild(node); | ||
|  |     } | ||
|  | }//appendChild and setAttributeNS are preformance key
 | ||
|  | 
 | ||
|  | exports.__DOMHandler = DOMHandler; | ||
|  | exports.normalizeLineEndings = normalizeLineEndings; | ||
|  | exports.DOMParser = DOMParser; |