You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
323 lines
10 KiB
323 lines
10 KiB
var conventions = require("./conventions");
|
|
var dom = require('./dom')
|
|
var entities = require('./entities');
|
|
var sax = require('./sax');
|
|
|
|
var DOMImplementation = dom.DOMImplementation;
|
|
|
|
var NAMESPACE = conventions.NAMESPACE;
|
|
|
|
var ParseError = sax.ParseError;
|
|
var XMLReader = sax.XMLReader;
|
|
|
|
/**
|
|
* Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends:
|
|
*
|
|
* > XML parsed entities are often stored in computer files which,
|
|
* > for editing convenience, are organized into lines.
|
|
* > These lines are typically separated by some combination
|
|
* > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
|
|
* >
|
|
* > To simplify the tasks of applications, the XML processor must behave
|
|
* > as if it normalized all line breaks in external parsed entities (including the document entity)
|
|
* > on input, before parsing, by translating all of the following to a single #xA character:
|
|
* >
|
|
* > 1. the two-character sequence #xD #xA
|
|
* > 2. the two-character sequence #xD #x85
|
|
* > 3. the single character #x85
|
|
* > 4. the single character #x2028
|
|
* > 5. any #xD character that is not immediately followed by #xA or #x85.
|
|
*
|
|
* @param {string} input
|
|
* @returns {string}
|
|
*/
|
|
function normalizeLineEndings(input) {
|
|
return input
|
|
.replace(/\r[\n\u0085]/g, '\n')
|
|
.replace(/[\r\u0085\u2028]/g, '\n')
|
|
}
|
|
|
|
/**
|
|
* @typedef Locator
|
|
* @property {number} [columnNumber]
|
|
* @property {number} [lineNumber]
|
|
*/
|
|
|
|
/**
|
|
* @typedef DOMParserOptions
|
|
* @property {DOMHandler} [domBuilder]
|
|
* @property {Function} [errorHandler]
|
|
* @property {(string) => string} [normalizeLineEndings] used to replace line endings before parsing
|
|
* defaults to `normalizeLineEndings`
|
|
* @property {Locator} [locator]
|
|
* @property {Record<string, string>} [xmlns]
|
|
*
|
|
* @see normalizeLineEndings
|
|
*/
|
|
|
|
/**
|
|
* The DOMParser interface provides the ability to parse XML or HTML source code
|
|
* from a string into a DOM `Document`.
|
|
*
|
|
* _xmldom is different from the spec in that it allows an `options` parameter,
|
|
* to override the default behavior._
|
|
*
|
|
* @param {DOMParserOptions} [options]
|
|
* @constructor
|
|
*
|
|
* @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
|
|
* @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
|
|
*/
|
|
function DOMParser(options){
|
|
this.options = options ||{locator:{}};
|
|
}
|
|
|
|
DOMParser.prototype.parseFromString = function(source,mimeType){
|
|
var options = this.options;
|
|
var sax = new XMLReader();
|
|
var domBuilder = options.domBuilder || new DOMHandler();//contentHandler and LexicalHandler
|
|
var errorHandler = options.errorHandler;
|
|
var locator = options.locator;
|
|
var defaultNSMap = options.xmlns||{};
|
|
var isHTML = /\/x?html?$/.test(mimeType);//mimeType.toLowerCase().indexOf('html') > -1;
|
|
var entityMap = isHTML ? entities.HTML_ENTITIES : entities.XML_ENTITIES;
|
|
if(locator){
|
|
domBuilder.setDocumentLocator(locator)
|
|
}
|
|
|
|
sax.errorHandler = buildErrorHandler(errorHandler,domBuilder,locator);
|
|
sax.domBuilder = options.domBuilder || domBuilder;
|
|
if(isHTML){
|
|
defaultNSMap[''] = NAMESPACE.HTML;
|
|
}
|
|
defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML;
|
|
var normalize = options.normalizeLineEndings || normalizeLineEndings;
|
|
if (source && typeof source === 'string') {
|
|
sax.parse(
|
|
normalize(source),
|
|
defaultNSMap,
|
|
entityMap
|
|
)
|
|
} else {
|
|
sax.errorHandler.error('invalid doc source')
|
|
}
|
|
return domBuilder.doc;
|
|
}
|
|
function buildErrorHandler(errorImpl,domBuilder,locator){
|
|
if(!errorImpl){
|
|
if(domBuilder instanceof DOMHandler){
|
|
return domBuilder;
|
|
}
|
|
errorImpl = domBuilder ;
|
|
}
|
|
var errorHandler = {}
|
|
var isCallback = errorImpl instanceof Function;
|
|
locator = locator||{}
|
|
function build(key){
|
|
var fn = errorImpl[key];
|
|
if(!fn && isCallback){
|
|
fn = errorImpl.length == 2?function(msg){errorImpl(key,msg)}:errorImpl;
|
|
}
|
|
errorHandler[key] = fn && function(msg){
|
|
fn('[xmldom '+key+']\t'+msg+_locator(locator));
|
|
}||function(){};
|
|
}
|
|
build('warning');
|
|
build('error');
|
|
build('fatalError');
|
|
return errorHandler;
|
|
}
|
|
|
|
//console.log('#\n\n\n\n\n\n\n####')
|
|
/**
|
|
* +ContentHandler+ErrorHandler
|
|
* +LexicalHandler+EntityResolver2
|
|
* -DeclHandler-DTDHandler
|
|
*
|
|
* DefaultHandler:EntityResolver, DTDHandler, ContentHandler, ErrorHandler
|
|
* DefaultHandler2:DefaultHandler,LexicalHandler, DeclHandler, EntityResolver2
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/helpers/DefaultHandler.html
|
|
*/
|
|
function DOMHandler() {
|
|
this.cdata = false;
|
|
}
|
|
function position(locator,node){
|
|
node.lineNumber = locator.lineNumber;
|
|
node.columnNumber = locator.columnNumber;
|
|
}
|
|
/**
|
|
* @see org.xml.sax.ContentHandler#startDocument
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
|
|
*/
|
|
DOMHandler.prototype = {
|
|
startDocument : function() {
|
|
this.doc = new DOMImplementation().createDocument(null, null, null);
|
|
if (this.locator) {
|
|
this.doc.documentURI = this.locator.systemId;
|
|
}
|
|
},
|
|
startElement:function(namespaceURI, localName, qName, attrs) {
|
|
var doc = this.doc;
|
|
var el = doc.createElementNS(namespaceURI, qName||localName);
|
|
var len = attrs.length;
|
|
appendElement(this, el);
|
|
this.currentElement = el;
|
|
|
|
this.locator && position(this.locator,el)
|
|
for (var i = 0 ; i < len; i++) {
|
|
var namespaceURI = attrs.getURI(i);
|
|
var value = attrs.getValue(i);
|
|
var qName = attrs.getQName(i);
|
|
var attr = doc.createAttributeNS(namespaceURI, qName);
|
|
this.locator &&position(attrs.getLocator(i),attr);
|
|
attr.value = attr.nodeValue = value;
|
|
el.setAttributeNode(attr)
|
|
}
|
|
},
|
|
endElement:function(namespaceURI, localName, qName) {
|
|
var current = this.currentElement
|
|
var tagName = current.tagName;
|
|
this.currentElement = current.parentNode;
|
|
},
|
|
startPrefixMapping:function(prefix, uri) {
|
|
},
|
|
endPrefixMapping:function(prefix) {
|
|
},
|
|
processingInstruction:function(target, data) {
|
|
var ins = this.doc.createProcessingInstruction(target, data);
|
|
this.locator && position(this.locator,ins)
|
|
appendElement(this, ins);
|
|
},
|
|
ignorableWhitespace:function(ch, start, length) {
|
|
},
|
|
characters:function(chars, start, length) {
|
|
chars = _toString.apply(this,arguments)
|
|
//console.log(chars)
|
|
if(chars){
|
|
if (this.cdata) {
|
|
var charNode = this.doc.createCDATASection(chars);
|
|
} else {
|
|
var charNode = this.doc.createTextNode(chars);
|
|
}
|
|
if(this.currentElement){
|
|
this.currentElement.appendChild(charNode);
|
|
}else if(/^\s*$/.test(chars)){
|
|
this.doc.appendChild(charNode);
|
|
//process xml
|
|
}
|
|
this.locator && position(this.locator,charNode)
|
|
}
|
|
},
|
|
skippedEntity:function(name) {
|
|
},
|
|
endDocument:function() {
|
|
this.doc.normalize();
|
|
},
|
|
setDocumentLocator:function (locator) {
|
|
if(this.locator = locator){// && !('lineNumber' in locator)){
|
|
locator.lineNumber = 0;
|
|
}
|
|
},
|
|
//LexicalHandler
|
|
comment:function(chars, start, length) {
|
|
chars = _toString.apply(this,arguments)
|
|
var comm = this.doc.createComment(chars);
|
|
this.locator && position(this.locator,comm)
|
|
appendElement(this, comm);
|
|
},
|
|
|
|
startCDATA:function() {
|
|
//used in characters() methods
|
|
this.cdata = true;
|
|
},
|
|
endCDATA:function() {
|
|
this.cdata = false;
|
|
},
|
|
|
|
startDTD:function(name, publicId, systemId) {
|
|
var impl = this.doc.implementation;
|
|
if (impl && impl.createDocumentType) {
|
|
var dt = impl.createDocumentType(name, publicId, systemId);
|
|
this.locator && position(this.locator,dt)
|
|
appendElement(this, dt);
|
|
this.doc.doctype = dt;
|
|
}
|
|
},
|
|
/**
|
|
* @see org.xml.sax.ErrorHandler
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html
|
|
*/
|
|
warning:function(error) {
|
|
console.warn('[xmldom warning]\t'+error,_locator(this.locator));
|
|
},
|
|
error:function(error) {
|
|
console.error('[xmldom error]\t'+error,_locator(this.locator));
|
|
},
|
|
fatalError:function(error) {
|
|
throw new ParseError(error, this.locator);
|
|
}
|
|
}
|
|
function _locator(l){
|
|
if(l){
|
|
return '\n@'+(l.systemId ||'')+'#[line:'+l.lineNumber+',col:'+l.columnNumber+']'
|
|
}
|
|
}
|
|
function _toString(chars,start,length){
|
|
if(typeof chars == 'string'){
|
|
return chars.substr(start,length)
|
|
}else{//java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)")
|
|
if(chars.length >= start+length || start){
|
|
return new java.lang.String(chars,start,length)+'';
|
|
}
|
|
return chars;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html
|
|
* used method of org.xml.sax.ext.LexicalHandler:
|
|
* #comment(chars, start, length)
|
|
* #startCDATA()
|
|
* #endCDATA()
|
|
* #startDTD(name, publicId, systemId)
|
|
*
|
|
*
|
|
* IGNORED method of org.xml.sax.ext.LexicalHandler:
|
|
* #endDTD()
|
|
* #startEntity(name)
|
|
* #endEntity(name)
|
|
*
|
|
*
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html
|
|
* IGNORED method of org.xml.sax.ext.DeclHandler
|
|
* #attributeDecl(eName, aName, type, mode, value)
|
|
* #elementDecl(name, model)
|
|
* #externalEntityDecl(name, publicId, systemId)
|
|
* #internalEntityDecl(name, value)
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html
|
|
* IGNORED method of org.xml.sax.EntityResolver2
|
|
* #resolveEntity(String name,String publicId,String baseURI,String systemId)
|
|
* #resolveEntity(publicId, systemId)
|
|
* #getExternalSubset(name, baseURI)
|
|
* @link http://www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html
|
|
* IGNORED method of org.xml.sax.DTDHandler
|
|
* #notationDecl(name, publicId, systemId) {};
|
|
* #unparsedEntityDecl(name, publicId, systemId, notationName) {};
|
|
*/
|
|
"endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl".replace(/\w+/g,function(key){
|
|
DOMHandler.prototype[key] = function(){return null}
|
|
})
|
|
|
|
/* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */
|
|
function appendElement (hander,node) {
|
|
if (!hander.currentElement) {
|
|
hander.doc.appendChild(node);
|
|
} else {
|
|
hander.currentElement.appendChild(node);
|
|
}
|
|
}//appendChild and setAttributeNS are preformance key
|
|
|
|
exports.__DOMHandler = DOMHandler;
|
|
exports.normalizeLineEndings = normalizeLineEndings;
|
|
exports.DOMParser = DOMParser;
|