# HG changeset patch # User zuwiki # Date 1266794711 28800 # Node ID aaad945d10ba995f95e7931f7784bc4ec15b3656 # Parent 917f370a76311040d96fd9d2e4e07b2ba7436c86 Adding node-xml.js by Rob Righter. Fixing up xmpp.js to work for me. Can't say exactly what all was wrong, other than apparent changes in the node.js TCP API. diff -r 917f370a7631 -r aaad945d10ba node-xml.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/node-xml.js Sun Feb 21 15:25:11 2010 -0800 @@ -0,0 +1,1249 @@ +// node-xml +// An xml parser for node.js +// (C) Rob Righter (@robrighter) 2009 - 2010, Licensed under the MIT-LICENSE +// Contributions from David Joham + + +(function () { + +// CONSTANTS +var whitespace = "\n\r\t "; + + +//XMLP is a pull-based parser. The calling application passes in a XML string +//to the constructor, then repeatedly calls .next() to parse the next segment. +//.next() returns a flag indicating what type of segment was found, and stores +//data temporarily in couple member variables (name, content, array of +//attributes), which can be accessed by several .get____() methods. +// +//Basically, XMLP is the lowest common denominator parser - an very simple +//API which other wrappers can be built against. + + +var XMLP = function(strXML) { + // Normalize line breaks + strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n"); + strXML = SAXStrings.replace(strXML, null, null, "\r", "\n"); + + this.m_xml = strXML; + this.m_iP = 0; + this.m_iState = XMLP._STATE_PROLOG; + this.m_stack = new Stack(); + this._clearAttributes(); + this.m_pause = false; + this.m_preInterruptIState = XMLP._STATE_PROLOG; + this.m_namespaceList = new Array(); + this.m_chunkTransitionContinuation = null; + +} + + +// CONSTANTS (these must be below the constructor) +XMLP._NONE = 0; +XMLP._ELM_B = 1; +XMLP._ELM_E = 2; +XMLP._ELM_EMP = 3; +XMLP._ATT = 4; +XMLP._TEXT = 5; +XMLP._ENTITY = 6; +XMLP._PI = 7; +XMLP._CDATA = 8; +XMLP._COMMENT = 9; +XMLP._DTD = 10; +XMLP._ERROR = 11; +XMLP._INTERRUPT = 12; + +XMLP._CONT_XML = 0; +XMLP._CONT_ALT = 1; + +XMLP._ATT_NAME = 0; +XMLP._ATT_VAL = 1; + +XMLP._STATE_PROLOG = 1; +XMLP._STATE_DOCUMENT = 2; +XMLP._STATE_MISC = 3; + +XMLP._errs = new Array(); +XMLP._errs[XMLP.ERR_CLOSE_PI = 0 ] = "PI: missing closing sequence"; +XMLP._errs[XMLP.ERR_CLOSE_DTD = 1 ] = "DTD: missing closing sequence"; +XMLP._errs[XMLP.ERR_CLOSE_COMMENT = 2 ] = "Comment: missing closing sequence"; +XMLP._errs[XMLP.ERR_CLOSE_CDATA = 3 ] = "CDATA: missing closing sequence"; +XMLP._errs[XMLP.ERR_CLOSE_ELM = 4 ] = "Element: missing closing sequence"; +XMLP._errs[XMLP.ERR_CLOSE_ENTITY = 5 ] = "Entity: missing closing sequence"; +XMLP._errs[XMLP.ERR_PI_TARGET = 6 ] = "PI: target is required"; +XMLP._errs[XMLP.ERR_ELM_EMPTY = 7 ] = "Element: cannot be both empty and closing"; +XMLP._errs[XMLP.ERR_ELM_NAME = 8 ] = "Element: name must immediatly follow \"<\""; +XMLP._errs[XMLP.ERR_ELM_LT_NAME = 9 ] = "Element: \"<\" not allowed in element names"; +XMLP._errs[XMLP.ERR_ATT_VALUES = 10] = "Attribute: values are required and must be in quotes"; +XMLP._errs[XMLP.ERR_ATT_LT_NAME = 11] = "Element: \"<\" not allowed in attribute names"; +XMLP._errs[XMLP.ERR_ATT_LT_VALUE = 12] = "Attribute: \"<\" not allowed in attribute values"; +XMLP._errs[XMLP.ERR_ATT_DUP = 13] = "Attribute: duplicate attributes not allowed"; +XMLP._errs[XMLP.ERR_ENTITY_UNKNOWN = 14] = "Entity: unknown entity"; +XMLP._errs[XMLP.ERR_INFINITELOOP = 15] = "Infininte loop"; +XMLP._errs[XMLP.ERR_DOC_STRUCTURE = 16] = "Document: only comments, processing instructions, or whitespace allowed outside of document element"; +XMLP._errs[XMLP.ERR_ELM_NESTING = 17] = "Element: must be nested correctly"; + + + +XMLP.prototype.continueParsing = function(strXML) { + + if(this.m_chunkTransitionContinuation){ + strXML = this.m_chunkTransitionContinuation + strXML; + } + // Normalize line breaks + strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n"); + strXML = SAXStrings.replace(strXML, null, null, "\r", "\n"); + + this.m_xml = strXML; + this.m_iP = 0; + this.m_iState = XMLP._STATE_DOCUMENT; + //this.m_stack = new Stack(); + //this._clearAttributes(); + this.m_pause = false; + this.m_preInterruptIState = XMLP._STATE_PROLOG; + this.m_chunkTransitionContinuation = null; + +} + +XMLP.prototype._addAttribute = function(name, value) { + this.m_atts[this.m_atts.length] = new Array(name, value); +} + +XMLP.prototype._checkStructure = function(iEvent) { + if(XMLP._STATE_PROLOG == this.m_iState) { + if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) { + if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) { + return this._setErr(XMLP.ERR_DOC_STRUCTURE); + } + } + + if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) { + this.m_iState = XMLP._STATE_DOCUMENT; + // Don't return - fall through to next state + } + } + if(XMLP._STATE_DOCUMENT == this.m_iState) { + if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) { + this.m_stack.push(this.getName()); + } + + if((XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent)) { + var strTop = this.m_stack.pop(); + if((strTop == null) || (strTop != this.getName())) { + return this._setErr(XMLP.ERR_ELM_NESTING); + } + } + + if(this.m_stack.count() == 0) { + this.m_iState = XMLP._STATE_MISC; + return iEvent; + } + } + if(XMLP._STATE_MISC == this.m_iState) { + if((XMLP._ELM_B == iEvent) || (XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent) || (XMLP.EVT_DTD == iEvent)) { + return this._setErr(XMLP.ERR_DOC_STRUCTURE); + } + + if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) { + if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) { + return this._setErr(XMLP.ERR_DOC_STRUCTURE); + } + } + } + + return iEvent; + +} + +XMLP.prototype._clearAttributes = function() { + this.m_atts = new Array(); +} + +XMLP.prototype._findAttributeIndex = function(name) { + for(var i = 0; i < this.m_atts.length; i++) { + if(this.m_atts[i][XMLP._ATT_NAME] == name) { + return i; + } + } + return -1; + +} + +XMLP.prototype.getAttributeCount = function() { + return this.m_atts ? this.m_atts.length : 0; +} + +XMLP.prototype.getAttributeName = function(index) { + return ((index < 0) || (index >= this.m_atts.length)) ? null : this.m_atts[index][XMLP._ATT_NAME]; +} + +XMLP.prototype.getAttributeValue = function(index) { + return ((index < 0) || (index >= this.m_atts.length)) ? null : __unescapeString(this.m_atts[index][XMLP._ATT_VAL]); +} + +XMLP.prototype.getAttributeValueByName = function(name) { + return this.getAttributeValue(this._findAttributeIndex(name)); +} + +XMLP.prototype.getColumnNumber = function() { + return SAXStrings.getColumnNumber(this.m_xml, this.m_iP); +} + +XMLP.prototype.getContent = function() { + return (this.m_cSrc == XMLP._CONT_XML) ? this.m_xml : this.m_cAlt; +} + +XMLP.prototype.getContentBegin = function() { + return this.m_cB; +} + +XMLP.prototype.getContentEnd = function() { + return this.m_cE; +} + +XMLP.prototype.getLineNumber = function() { + return SAXStrings.getLineNumber(this.m_xml, this.m_iP); +} + +XMLP.prototype.getName = function() { + return this.m_name; +} + +XMLP.prototype.pause = function(){ + this.m_pause = true; +} + +XMLP.prototype.resume = function(){ + this.m_pause = false; + this.m_iState = this.m_preInterruptIState; +} + +XMLP.prototype.next = function() { + if(!this.m_pause){ + return this._checkStructure(this._parse()); + } + else{ + //save off the current event loop state and set the state to interrupt + this.m_preInterruptIState = this.m_iState; + return XMLP._INTERRUPT; + } +} + +XMLP.prototype._parse = function() { + if(this.m_iP == this.m_xml.length) { + return XMLP._NONE; + } + + if(this.m_iP == this.m_xml.indexOf("= 0; i--){ + var item = this.m_namespaceList[i]; + if(item.prefix === ''){ + return item.uri; + } + } + + //still nothing, lets just return an empty string + return ''; +} + +XMLP.prototype._removeExpiredNamesapces = function (closingtagname) { + //remove the expiring namespaces from the list (you can id them by scopetag) + var keeps = []; + this.m_namespaceList.map(function (item){ + if(item.scopetag !== closingtagname){ + keeps.push(item); + } + }); + + this.m_namespaceList = keeps; + +} + +//////////////////////////////////////////////////////////////////////// + + +XMLP.prototype._parseAttribute = function(iB, iE) { + var iNB, iNE, iEq, iVB, iVE; + var cQuote, strN, strV; + + this.m_cAlt = ""; //resets the value so we don't use an old one by accident (see testAttribute7 in the test suite) + + iNB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE); + if((iNB == -1) ||(iNB >= iE)) { + return iNB; + } + + iEq = this.m_xml.indexOf("=", iNB); + if((iEq == -1) || (iEq > iE)) { + return this._setErr(XMLP.ERR_ATT_VALUES); + } + + iNE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iNB, iEq); + + iVB = SAXStrings.indexOfNonWhitespace(this.m_xml, iEq + 1, iE); + if((iVB == -1) ||(iVB > iE)) { + return this._setErr(XMLP.ERR_ATT_VALUES); + } + + cQuote = this.m_xml.charAt(iVB); + if(SAXStrings.QUOTES.indexOf(cQuote) == -1) { + return this._setErr(XMLP.ERR_ATT_VALUES); + } + + iVE = this.m_xml.indexOf(cQuote, iVB + 1); + if((iVE == -1) ||(iVE > iE)) { + return this._setErr(XMLP.ERR_ATT_VALUES); + } + + strN = this.m_xml.substring(iNB, iNE + 1); + strV = this.m_xml.substring(iVB + 1, iVE); + + if(strN.indexOf("<") != -1) { + return this._setErr(XMLP.ERR_ATT_LT_NAME); + } + + if(strV.indexOf("<") != -1) { + return this._setErr(XMLP.ERR_ATT_LT_VALUE); + } + + strV = SAXStrings.replace(strV, null, null, "\n", " "); + strV = SAXStrings.replace(strV, null, null, "\t", " "); + iRet = this._replaceEntities(strV); + if(iRet == XMLP._ERROR) { + return iRet; + } + + strV = this.m_cAlt; + + if(this._findAttributeIndex(strN) == -1) { + this._addAttribute(strN, strV); + } + else { + return this._setErr(XMLP.ERR_ATT_DUP); + } + + this.m_iP = iVE + 2; + + return XMLP._ATT; + +} + +XMLP.prototype._parseCDATA = function(iB) { + var iE = this.m_xml.indexOf("]]>", iB); + if (iE == -1) { + //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted + this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-", iB); + if (iE == -1) { + //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted + this.m_chunkTransitionContinuation = this.m_xml.slice(iB-4);//the '-4' adds the '