# HG changeset patch # User zuwiki # Date 1267079169 28800 # Node ID 74a24eb1fb440a7ed4d156fd8d3ee92309d99a06 # Parent b328899c936aa93428383ea35a56672c25139e73 Making node-xml into a submodule. Gosh I hope I know what I'm doing. diff -r b328899c936a -r 74a24eb1fb44 .gitmodules --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitmodules Wed Feb 24 22:26:09 2010 -0800 @@ -0,0 +1,3 @@ +[submodule "node-xml"] + path = node-xml + url = git://github.com/robrighter/node-xml.git diff -r b328899c936a -r 74a24eb1fb44 node-xml.js --- a/node-xml.js Wed Feb 24 22:04:06 2010 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1249 +0,0 @@ -// node-xml -// An xml parser for node.js -// (C) Rob Righter (@robrighter) 2009 - 2010, Licensed under the MIT-LICENSE -// Contributions from David Joham - - -(function () { - -// CONSTANTS -var whitespace = "\n\r\t "; - - -//XMLP is a pull-based parser. The calling application passes in a XML string -//to the constructor, then repeatedly calls .next() to parse the next segment. -//.next() returns a flag indicating what type of segment was found, and stores -//data temporarily in couple member variables (name, content, array of -//attributes), which can be accessed by several .get____() methods. -// -//Basically, XMLP is the lowest common denominator parser - an very simple -//API which other wrappers can be built against. - - -var XMLP = function(strXML) { - // Normalize line breaks - strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n"); - strXML = SAXStrings.replace(strXML, null, null, "\r", "\n"); - - this.m_xml = strXML; - this.m_iP = 0; - this.m_iState = XMLP._STATE_PROLOG; - this.m_stack = new Stack(); - this._clearAttributes(); - this.m_pause = false; - this.m_preInterruptIState = XMLP._STATE_PROLOG; - this.m_namespaceList = new Array(); - this.m_chunkTransitionContinuation = null; - -} - - -// CONSTANTS (these must be below the constructor) -XMLP._NONE = 0; -XMLP._ELM_B = 1; -XMLP._ELM_E = 2; -XMLP._ELM_EMP = 3; -XMLP._ATT = 4; -XMLP._TEXT = 5; -XMLP._ENTITY = 6; -XMLP._PI = 7; -XMLP._CDATA = 8; -XMLP._COMMENT = 9; -XMLP._DTD = 10; -XMLP._ERROR = 11; -XMLP._INTERRUPT = 12; - -XMLP._CONT_XML = 0; -XMLP._CONT_ALT = 1; - -XMLP._ATT_NAME = 0; -XMLP._ATT_VAL = 1; - -XMLP._STATE_PROLOG = 1; -XMLP._STATE_DOCUMENT = 2; -XMLP._STATE_MISC = 3; - -XMLP._errs = new Array(); -XMLP._errs[XMLP.ERR_CLOSE_PI = 0 ] = "PI: missing closing sequence"; -XMLP._errs[XMLP.ERR_CLOSE_DTD = 1 ] = "DTD: missing closing sequence"; -XMLP._errs[XMLP.ERR_CLOSE_COMMENT = 2 ] = "Comment: missing closing sequence"; -XMLP._errs[XMLP.ERR_CLOSE_CDATA = 3 ] = "CDATA: missing closing sequence"; -XMLP._errs[XMLP.ERR_CLOSE_ELM = 4 ] = "Element: missing closing sequence"; -XMLP._errs[XMLP.ERR_CLOSE_ENTITY = 5 ] = "Entity: missing closing sequence"; -XMLP._errs[XMLP.ERR_PI_TARGET = 6 ] = "PI: target is required"; -XMLP._errs[XMLP.ERR_ELM_EMPTY = 7 ] = "Element: cannot be both empty and closing"; -XMLP._errs[XMLP.ERR_ELM_NAME = 8 ] = "Element: name must immediatly follow \"<\""; -XMLP._errs[XMLP.ERR_ELM_LT_NAME = 9 ] = "Element: \"<\" not allowed in element names"; -XMLP._errs[XMLP.ERR_ATT_VALUES = 10] = "Attribute: values are required and must be in quotes"; -XMLP._errs[XMLP.ERR_ATT_LT_NAME = 11] = "Element: \"<\" not allowed in attribute names"; -XMLP._errs[XMLP.ERR_ATT_LT_VALUE = 12] = "Attribute: \"<\" not allowed in attribute values"; -XMLP._errs[XMLP.ERR_ATT_DUP = 13] = "Attribute: duplicate attributes not allowed"; -XMLP._errs[XMLP.ERR_ENTITY_UNKNOWN = 14] = "Entity: unknown entity"; -XMLP._errs[XMLP.ERR_INFINITELOOP = 15] = "Infininte loop"; -XMLP._errs[XMLP.ERR_DOC_STRUCTURE = 16] = "Document: only comments, processing instructions, or whitespace allowed outside of document element"; -XMLP._errs[XMLP.ERR_ELM_NESTING = 17] = "Element: must be nested correctly"; - - - -XMLP.prototype.continueParsing = function(strXML) { - - if(this.m_chunkTransitionContinuation){ - strXML = this.m_chunkTransitionContinuation + strXML; - } - // Normalize line breaks - strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n"); - strXML = SAXStrings.replace(strXML, null, null, "\r", "\n"); - - this.m_xml = strXML; - this.m_iP = 0; - this.m_iState = XMLP._STATE_DOCUMENT; - //this.m_stack = new Stack(); - //this._clearAttributes(); - this.m_pause = false; - this.m_preInterruptIState = XMLP._STATE_PROLOG; - this.m_chunkTransitionContinuation = null; - -} - -XMLP.prototype._addAttribute = function(name, value) { - this.m_atts[this.m_atts.length] = new Array(name, value); -} - -XMLP.prototype._checkStructure = function(iEvent) { - if(XMLP._STATE_PROLOG == this.m_iState) { - if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) { - if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) { - return this._setErr(XMLP.ERR_DOC_STRUCTURE); - } - } - - if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) { - this.m_iState = XMLP._STATE_DOCUMENT; - // Don't return - fall through to next state - } - } - if(XMLP._STATE_DOCUMENT == this.m_iState) { - if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) { - this.m_stack.push(this.getName()); - } - - if((XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent)) { - var strTop = this.m_stack.pop(); - if((strTop == null) || (strTop != this.getName())) { - return this._setErr(XMLP.ERR_ELM_NESTING); - } - } - - if(this.m_stack.count() == 0) { - this.m_iState = XMLP._STATE_MISC; - return iEvent; - } - } - if(XMLP._STATE_MISC == this.m_iState) { - if((XMLP._ELM_B == iEvent) || (XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent) || (XMLP.EVT_DTD == iEvent)) { - return this._setErr(XMLP.ERR_DOC_STRUCTURE); - } - - if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) { - if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) { - return this._setErr(XMLP.ERR_DOC_STRUCTURE); - } - } - } - - return iEvent; - -} - -XMLP.prototype._clearAttributes = function() { - this.m_atts = new Array(); -} - -XMLP.prototype._findAttributeIndex = function(name) { - for(var i = 0; i < this.m_atts.length; i++) { - if(this.m_atts[i][XMLP._ATT_NAME] == name) { - return i; - } - } - return -1; - -} - -XMLP.prototype.getAttributeCount = function() { - return this.m_atts ? this.m_atts.length : 0; -} - -XMLP.prototype.getAttributeName = function(index) { - return ((index < 0) || (index >= this.m_atts.length)) ? null : this.m_atts[index][XMLP._ATT_NAME]; -} - -XMLP.prototype.getAttributeValue = function(index) { - return ((index < 0) || (index >= this.m_atts.length)) ? null : __unescapeString(this.m_atts[index][XMLP._ATT_VAL]); -} - -XMLP.prototype.getAttributeValueByName = function(name) { - return this.getAttributeValue(this._findAttributeIndex(name)); -} - -XMLP.prototype.getColumnNumber = function() { - return SAXStrings.getColumnNumber(this.m_xml, this.m_iP); -} - -XMLP.prototype.getContent = function() { - return (this.m_cSrc == XMLP._CONT_XML) ? this.m_xml : this.m_cAlt; -} - -XMLP.prototype.getContentBegin = function() { - return this.m_cB; -} - -XMLP.prototype.getContentEnd = function() { - return this.m_cE; -} - -XMLP.prototype.getLineNumber = function() { - return SAXStrings.getLineNumber(this.m_xml, this.m_iP); -} - -XMLP.prototype.getName = function() { - return this.m_name; -} - -XMLP.prototype.pause = function(){ - this.m_pause = true; -} - -XMLP.prototype.resume = function(){ - this.m_pause = false; - this.m_iState = this.m_preInterruptIState; -} - -XMLP.prototype.next = function() { - if(!this.m_pause){ - return this._checkStructure(this._parse()); - } - else{ - //save off the current event loop state and set the state to interrupt - this.m_preInterruptIState = this.m_iState; - return XMLP._INTERRUPT; - } -} - -XMLP.prototype._parse = function() { - if(this.m_iP == this.m_xml.length) { - return XMLP._NONE; - } - - if(this.m_iP == this.m_xml.indexOf("= 0; i--){ - var item = this.m_namespaceList[i]; - if(item.prefix === ''){ - return item.uri; - } - } - - //still nothing, lets just return an empty string - return ''; -} - -XMLP.prototype._removeExpiredNamesapces = function (closingtagname) { - //remove the expiring namespaces from the list (you can id them by scopetag) - var keeps = []; - this.m_namespaceList.map(function (item){ - if(item.scopetag !== closingtagname){ - keeps.push(item); - } - }); - - this.m_namespaceList = keeps; - -} - -//////////////////////////////////////////////////////////////////////// - - -XMLP.prototype._parseAttribute = function(iB, iE) { - var iNB, iNE, iEq, iVB, iVE; - var cQuote, strN, strV; - - this.m_cAlt = ""; //resets the value so we don't use an old one by accident (see testAttribute7 in the test suite) - - iNB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE); - if((iNB == -1) ||(iNB >= iE)) { - return iNB; - } - - iEq = this.m_xml.indexOf("=", iNB); - if((iEq == -1) || (iEq > iE)) { - return this._setErr(XMLP.ERR_ATT_VALUES); - } - - iNE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iNB, iEq); - - iVB = SAXStrings.indexOfNonWhitespace(this.m_xml, iEq + 1, iE); - if((iVB == -1) ||(iVB > iE)) { - return this._setErr(XMLP.ERR_ATT_VALUES); - } - - cQuote = this.m_xml.charAt(iVB); - if(SAXStrings.QUOTES.indexOf(cQuote) == -1) { - return this._setErr(XMLP.ERR_ATT_VALUES); - } - - iVE = this.m_xml.indexOf(cQuote, iVB + 1); - if((iVE == -1) ||(iVE > iE)) { - return this._setErr(XMLP.ERR_ATT_VALUES); - } - - strN = this.m_xml.substring(iNB, iNE + 1); - strV = this.m_xml.substring(iVB + 1, iVE); - - if(strN.indexOf("<") != -1) { - return this._setErr(XMLP.ERR_ATT_LT_NAME); - } - - if(strV.indexOf("<") != -1) { - return this._setErr(XMLP.ERR_ATT_LT_VALUE); - } - - strV = SAXStrings.replace(strV, null, null, "\n", " "); - strV = SAXStrings.replace(strV, null, null, "\t", " "); - iRet = this._replaceEntities(strV); - if(iRet == XMLP._ERROR) { - return iRet; - } - - strV = this.m_cAlt; - - if(this._findAttributeIndex(strN) == -1) { - this._addAttribute(strN, strV); - } - else { - return this._setErr(XMLP.ERR_ATT_DUP); - } - - this.m_iP = iVE + 2; - - return XMLP._ATT; - -} - -XMLP.prototype._parseCDATA = function(iB) { - var iE = this.m_xml.indexOf("]]>", iB); - if (iE == -1) { - //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted - this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-", iB); - if (iE == -1) { - //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted - this.m_chunkTransitionContinuation = this.m_xml.slice(iB-4);//the '-4' adds the '