node-xml.js

changeset 16
74a24eb1fb44
parent 15
b328899c936a
child 17
701b83c8b687
equal deleted inserted replaced
15:b328899c936a 16:74a24eb1fb44
1 // node-xml
2 // An xml parser for node.js
3 // (C) Rob Righter (@robrighter) 2009 - 2010, Licensed under the MIT-LICENSE
4 // Contributions from David Joham
5
6
7 (function () {
8
9 // CONSTANTS
10 var whitespace = "\n\r\t ";
11
12
13 //XMLP is a pull-based parser. The calling application passes in a XML string
14 //to the constructor, then repeatedly calls .next() to parse the next segment.
15 //.next() returns a flag indicating what type of segment was found, and stores
16 //data temporarily in couple member variables (name, content, array of
17 //attributes), which can be accessed by several .get____() methods.
18 //
19 //Basically, XMLP is the lowest common denominator parser - an very simple
20 //API which other wrappers can be built against.
21
22
23 var XMLP = function(strXML) {
24 // Normalize line breaks
25 strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n");
26 strXML = SAXStrings.replace(strXML, null, null, "\r", "\n");
27
28 this.m_xml = strXML;
29 this.m_iP = 0;
30 this.m_iState = XMLP._STATE_PROLOG;
31 this.m_stack = new Stack();
32 this._clearAttributes();
33 this.m_pause = false;
34 this.m_preInterruptIState = XMLP._STATE_PROLOG;
35 this.m_namespaceList = new Array();
36 this.m_chunkTransitionContinuation = null;
37
38 }
39
40
41 // CONSTANTS (these must be below the constructor)
42 XMLP._NONE = 0;
43 XMLP._ELM_B = 1;
44 XMLP._ELM_E = 2;
45 XMLP._ELM_EMP = 3;
46 XMLP._ATT = 4;
47 XMLP._TEXT = 5;
48 XMLP._ENTITY = 6;
49 XMLP._PI = 7;
50 XMLP._CDATA = 8;
51 XMLP._COMMENT = 9;
52 XMLP._DTD = 10;
53 XMLP._ERROR = 11;
54 XMLP._INTERRUPT = 12;
55
56 XMLP._CONT_XML = 0;
57 XMLP._CONT_ALT = 1;
58
59 XMLP._ATT_NAME = 0;
60 XMLP._ATT_VAL = 1;
61
62 XMLP._STATE_PROLOG = 1;
63 XMLP._STATE_DOCUMENT = 2;
64 XMLP._STATE_MISC = 3;
65
66 XMLP._errs = new Array();
67 XMLP._errs[XMLP.ERR_CLOSE_PI = 0 ] = "PI: missing closing sequence";
68 XMLP._errs[XMLP.ERR_CLOSE_DTD = 1 ] = "DTD: missing closing sequence";
69 XMLP._errs[XMLP.ERR_CLOSE_COMMENT = 2 ] = "Comment: missing closing sequence";
70 XMLP._errs[XMLP.ERR_CLOSE_CDATA = 3 ] = "CDATA: missing closing sequence";
71 XMLP._errs[XMLP.ERR_CLOSE_ELM = 4 ] = "Element: missing closing sequence";
72 XMLP._errs[XMLP.ERR_CLOSE_ENTITY = 5 ] = "Entity: missing closing sequence";
73 XMLP._errs[XMLP.ERR_PI_TARGET = 6 ] = "PI: target is required";
74 XMLP._errs[XMLP.ERR_ELM_EMPTY = 7 ] = "Element: cannot be both empty and closing";
75 XMLP._errs[XMLP.ERR_ELM_NAME = 8 ] = "Element: name must immediatly follow \"<\"";
76 XMLP._errs[XMLP.ERR_ELM_LT_NAME = 9 ] = "Element: \"<\" not allowed in element names";
77 XMLP._errs[XMLP.ERR_ATT_VALUES = 10] = "Attribute: values are required and must be in quotes";
78 XMLP._errs[XMLP.ERR_ATT_LT_NAME = 11] = "Element: \"<\" not allowed in attribute names";
79 XMLP._errs[XMLP.ERR_ATT_LT_VALUE = 12] = "Attribute: \"<\" not allowed in attribute values";
80 XMLP._errs[XMLP.ERR_ATT_DUP = 13] = "Attribute: duplicate attributes not allowed";
81 XMLP._errs[XMLP.ERR_ENTITY_UNKNOWN = 14] = "Entity: unknown entity";
82 XMLP._errs[XMLP.ERR_INFINITELOOP = 15] = "Infininte loop";
83 XMLP._errs[XMLP.ERR_DOC_STRUCTURE = 16] = "Document: only comments, processing instructions, or whitespace allowed outside of document element";
84 XMLP._errs[XMLP.ERR_ELM_NESTING = 17] = "Element: must be nested correctly";
85
86
87
88 XMLP.prototype.continueParsing = function(strXML) {
89
90 if(this.m_chunkTransitionContinuation){
91 strXML = this.m_chunkTransitionContinuation + strXML;
92 }
93 // Normalize line breaks
94 strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n");
95 strXML = SAXStrings.replace(strXML, null, null, "\r", "\n");
96
97 this.m_xml = strXML;
98 this.m_iP = 0;
99 this.m_iState = XMLP._STATE_DOCUMENT;
100 //this.m_stack = new Stack();
101 //this._clearAttributes();
102 this.m_pause = false;
103 this.m_preInterruptIState = XMLP._STATE_PROLOG;
104 this.m_chunkTransitionContinuation = null;
105
106 }
107
108 XMLP.prototype._addAttribute = function(name, value) {
109 this.m_atts[this.m_atts.length] = new Array(name, value);
110 }
111
112 XMLP.prototype._checkStructure = function(iEvent) {
113 if(XMLP._STATE_PROLOG == this.m_iState) {
114 if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) {
115 if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) {
116 return this._setErr(XMLP.ERR_DOC_STRUCTURE);
117 }
118 }
119
120 if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) {
121 this.m_iState = XMLP._STATE_DOCUMENT;
122 // Don't return - fall through to next state
123 }
124 }
125 if(XMLP._STATE_DOCUMENT == this.m_iState) {
126 if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) {
127 this.m_stack.push(this.getName());
128 }
129
130 if((XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent)) {
131 var strTop = this.m_stack.pop();
132 if((strTop == null) || (strTop != this.getName())) {
133 return this._setErr(XMLP.ERR_ELM_NESTING);
134 }
135 }
136
137 if(this.m_stack.count() == 0) {
138 this.m_iState = XMLP._STATE_MISC;
139 return iEvent;
140 }
141 }
142 if(XMLP._STATE_MISC == this.m_iState) {
143 if((XMLP._ELM_B == iEvent) || (XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent) || (XMLP.EVT_DTD == iEvent)) {
144 return this._setErr(XMLP.ERR_DOC_STRUCTURE);
145 }
146
147 if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) {
148 if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) {
149 return this._setErr(XMLP.ERR_DOC_STRUCTURE);
150 }
151 }
152 }
153
154 return iEvent;
155
156 }
157
158 XMLP.prototype._clearAttributes = function() {
159 this.m_atts = new Array();
160 }
161
162 XMLP.prototype._findAttributeIndex = function(name) {
163 for(var i = 0; i < this.m_atts.length; i++) {
164 if(this.m_atts[i][XMLP._ATT_NAME] == name) {
165 return i;
166 }
167 }
168 return -1;
169
170 }
171
172 XMLP.prototype.getAttributeCount = function() {
173 return this.m_atts ? this.m_atts.length : 0;
174 }
175
176 XMLP.prototype.getAttributeName = function(index) {
177 return ((index < 0) || (index >= this.m_atts.length)) ? null : this.m_atts[index][XMLP._ATT_NAME];
178 }
179
180 XMLP.prototype.getAttributeValue = function(index) {
181 return ((index < 0) || (index >= this.m_atts.length)) ? null : __unescapeString(this.m_atts[index][XMLP._ATT_VAL]);
182 }
183
184 XMLP.prototype.getAttributeValueByName = function(name) {
185 return this.getAttributeValue(this._findAttributeIndex(name));
186 }
187
188 XMLP.prototype.getColumnNumber = function() {
189 return SAXStrings.getColumnNumber(this.m_xml, this.m_iP);
190 }
191
192 XMLP.prototype.getContent = function() {
193 return (this.m_cSrc == XMLP._CONT_XML) ? this.m_xml : this.m_cAlt;
194 }
195
196 XMLP.prototype.getContentBegin = function() {
197 return this.m_cB;
198 }
199
200 XMLP.prototype.getContentEnd = function() {
201 return this.m_cE;
202 }
203
204 XMLP.prototype.getLineNumber = function() {
205 return SAXStrings.getLineNumber(this.m_xml, this.m_iP);
206 }
207
208 XMLP.prototype.getName = function() {
209 return this.m_name;
210 }
211
212 XMLP.prototype.pause = function(){
213 this.m_pause = true;
214 }
215
216 XMLP.prototype.resume = function(){
217 this.m_pause = false;
218 this.m_iState = this.m_preInterruptIState;
219 }
220
221 XMLP.prototype.next = function() {
222 if(!this.m_pause){
223 return this._checkStructure(this._parse());
224 }
225 else{
226 //save off the current event loop state and set the state to interrupt
227 this.m_preInterruptIState = this.m_iState;
228 return XMLP._INTERRUPT;
229 }
230 }
231
232 XMLP.prototype._parse = function() {
233 if(this.m_iP == this.m_xml.length) {
234 return XMLP._NONE;
235 }
236
237 if(this.m_iP == this.m_xml.indexOf("<?", this.m_iP)) {
238 return this._parsePI (this.m_iP + 2);
239 }
240 else if(this.m_iP == this.m_xml.indexOf("<!DOCTYPE", this.m_iP)) {
241 return this._parseDTD (this.m_iP + 9);
242 }
243 else if(this.m_iP == this.m_xml.indexOf("<!--", this.m_iP)) {
244 return this._parseComment(this.m_iP + 4);
245 }
246 else if(this.m_iP == this.m_xml.indexOf("<![CDATA[", this.m_iP)) {
247 return this._parseCDATA (this.m_iP + 9);
248 }
249 else if(this.m_iP == this.m_xml.indexOf("<", this.m_iP)) {
250 return this._parseElement(this.m_iP + 1);
251 }
252 else if(this.m_iP == this.m_xml.indexOf("&", this.m_iP)) {
253 return this._parseEntity (this.m_iP + 1);
254 }
255 else{
256 return this._parseText (this.m_iP);
257 }
258
259
260 }
261
262 ////////// NAMESPACE SUPPORT //////////////////////////////////////////
263 XMLP.prototype._parsePrefixAndElementName = function (elementlabel){
264 splits = elementlabel.split(':',2);
265 return { prefix : ((splits.length === 1) ? '' : splits[0]), name : ((splits.length === 1) ? elementlabel : splits[1]), };
266 }
267
268 XMLP.prototype._parseNamespacesAndAtts = function (atts){
269 //translate namespaces into objects with "prefix","uri", "scopetag" Add them to: this.m_namespaceList
270 //The function should return a new list of tag attributes with the namespaces filtered
271 that = this;
272 var newnamespaces = [];
273 var filteredatts = [];
274 atts.map(function (item){
275 if(item[0].slice(0,5) === "xmlns"){
276 newnamespaces.push({
277 prefix : item[0].slice(6),
278 uri : item[1],
279 scopetag : that.m_name,
280 });
281 }
282 else{
283 filteredatts.push(item);
284 }
285 return "not used";
286 });
287 this.m_namespaceList = this.m_namespaceList.concat(newnamespaces);
288 return [ filteredatts, newnamespaces.map(function(item){return [item.prefix,item.uri];}) ];
289 }
290
291 XMLP.prototype._getContextualNamespace = function (prefix){
292 if(prefix !== ''){
293 for(item in this.m_namespaceList){
294 item = this.m_namespaceList[item];
295 if(item.prefix === prefix){
296 return item.uri;
297 }
298 }
299 }
300
301 //no match was found for the prefix so pop off the first non-prefix namespace
302 for(var i = (this.m_namespaceList.length-1); i>= 0; i--){
303 var item = this.m_namespaceList[i];
304 if(item.prefix === ''){
305 return item.uri;
306 }
307 }
308
309 //still nothing, lets just return an empty string
310 return '';
311 }
312
313 XMLP.prototype._removeExpiredNamesapces = function (closingtagname) {
314 //remove the expiring namespaces from the list (you can id them by scopetag)
315 var keeps = [];
316 this.m_namespaceList.map(function (item){
317 if(item.scopetag !== closingtagname){
318 keeps.push(item);
319 }
320 });
321
322 this.m_namespaceList = keeps;
323
324 }
325
326 ////////////////////////////////////////////////////////////////////////
327
328
329 XMLP.prototype._parseAttribute = function(iB, iE) {
330 var iNB, iNE, iEq, iVB, iVE;
331 var cQuote, strN, strV;
332
333 this.m_cAlt = ""; //resets the value so we don't use an old one by accident (see testAttribute7 in the test suite)
334
335 iNB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE);
336 if((iNB == -1) ||(iNB >= iE)) {
337 return iNB;
338 }
339
340 iEq = this.m_xml.indexOf("=", iNB);
341 if((iEq == -1) || (iEq > iE)) {
342 return this._setErr(XMLP.ERR_ATT_VALUES);
343 }
344
345 iNE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iNB, iEq);
346
347 iVB = SAXStrings.indexOfNonWhitespace(this.m_xml, iEq + 1, iE);
348 if((iVB == -1) ||(iVB > iE)) {
349 return this._setErr(XMLP.ERR_ATT_VALUES);
350 }
351
352 cQuote = this.m_xml.charAt(iVB);
353 if(SAXStrings.QUOTES.indexOf(cQuote) == -1) {
354 return this._setErr(XMLP.ERR_ATT_VALUES);
355 }
356
357 iVE = this.m_xml.indexOf(cQuote, iVB + 1);
358 if((iVE == -1) ||(iVE > iE)) {
359 return this._setErr(XMLP.ERR_ATT_VALUES);
360 }
361
362 strN = this.m_xml.substring(iNB, iNE + 1);
363 strV = this.m_xml.substring(iVB + 1, iVE);
364
365 if(strN.indexOf("<") != -1) {
366 return this._setErr(XMLP.ERR_ATT_LT_NAME);
367 }
368
369 if(strV.indexOf("<") != -1) {
370 return this._setErr(XMLP.ERR_ATT_LT_VALUE);
371 }
372
373 strV = SAXStrings.replace(strV, null, null, "\n", " ");
374 strV = SAXStrings.replace(strV, null, null, "\t", " ");
375 iRet = this._replaceEntities(strV);
376 if(iRet == XMLP._ERROR) {
377 return iRet;
378 }
379
380 strV = this.m_cAlt;
381
382 if(this._findAttributeIndex(strN) == -1) {
383 this._addAttribute(strN, strV);
384 }
385 else {
386 return this._setErr(XMLP.ERR_ATT_DUP);
387 }
388
389 this.m_iP = iVE + 2;
390
391 return XMLP._ATT;
392
393 }
394
395 XMLP.prototype._parseCDATA = function(iB) {
396 var iE = this.m_xml.indexOf("]]>", iB);
397 if (iE == -1) {
398 //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
399 this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-<![CDATA[ adds the '<!DOCTYPE' back into the string
400 return XMLP._INTERRUPT;
401 //return this._setErr(XMLP.ERR_CLOSE_CDATA);
402 }
403
404 this._setContent(XMLP._CONT_XML, iB, iE);
405
406 this.m_iP = iE + 3;
407
408 return XMLP._CDATA;
409
410 }
411
412 XMLP.prototype._parseComment = function(iB) {
413 var iE = this.m_xml.indexOf("-" + "->", iB);
414 if (iE == -1) {
415 //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
416 this.m_chunkTransitionContinuation = this.m_xml.slice(iB-4);//the '-4' adds the '<!--' back into the string
417 return XMLP._INTERRUPT;
418 //return this._setErr(XMLP.ERR_CLOSE_COMMENT);
419 }
420
421 this._setContent(XMLP._CONT_XML, iB, iE);
422
423 this.m_iP = iE + 3;
424
425 return XMLP._COMMENT;
426
427 }
428
429 XMLP.prototype._parseDTD = function(iB) {
430 // Eat DTD
431 var iE, strClose, iInt, iLast;
432
433 iE = this.m_xml.indexOf(">", iB);
434 if(iE == -1) {
435 //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
436 this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-9' adds the '<!DOCTYPE' back into the string
437 return XMLP._INTERRUPT;
438 //return this._setErr(XMLP.ERR_CLOSE_DTD);
439 }
440
441 iInt = this.m_xml.indexOf("[", iB);
442 strClose = ((iInt != -1) && (iInt < iE)) ? "]>" : ">";
443
444 while(true) {
445 // DEBUG: Remove
446 if(iE == iLast) {
447 return this._setErr(XMLP.ERR_INFINITELOOP);
448 }
449
450 iLast = iE;
451 // DEBUG: Remove End
452
453 iE = this.m_xml.indexOf(strClose, iB);
454 if(iE == -1) {
455 return this._setErr(XMLP.ERR_CLOSE_DTD);
456 }
457
458 // Make sure it is not the end of a CDATA section
459 if (this.m_xml.substring(iE - 1, iE + 2) != "]]>") {
460 break;
461 }
462 }
463
464 this.m_iP = iE + strClose.length;
465
466 return XMLP._DTD;
467
468 }
469
470 XMLP.prototype._parseElement = function(iB) {
471 sys = require('sys');
472 var iE, iDE, iNE, iRet;
473 var iType, strN, iLast;
474
475 iDE = iE = this.m_xml.indexOf(">", iB);
476 if(iE == -1) {
477 //This element never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
478 this.m_chunkTransitionContinuation = this.m_xml.slice(iB-1);//the '-1' adds the '<' back into the string
479 return XMLP._INTERRUPT;
480 //return this._setErr(XMLP.ERR_CLOSE_ELM);
481 }
482
483 if(this.m_xml.charAt(iB) == "/") {
484 iType = XMLP._ELM_E;
485 iB++;
486 } else {
487 iType = XMLP._ELM_B;
488 }
489
490 if(this.m_xml.charAt(iE - 1) == "/") {
491 if(iType == XMLP._ELM_E) {
492 return this._setErr(XMLP.ERR_ELM_EMPTY);
493 }
494 iType = XMLP._ELM_EMP;
495 iDE--;
496 }
497
498 iDE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iB, iDE);
499
500 //djohack
501 //hack to allow for elements with single character names to be recognized
502
503 if (iE - iB != 1 ) {
504 if(SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iDE) != iB) {
505 return this._setErr(XMLP.ERR_ELM_NAME);
506 }
507 }
508 // end hack -- original code below
509
510 /*
511 if(SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iDE) != iB)
512 return this._setErr(XMLP.ERR_ELM_NAME);
513 */
514 this._clearAttributes();
515
516 iNE = SAXStrings.indexOfWhitespace(this.m_xml, iB, iDE);
517 if(iNE == -1) {
518 iNE = iDE + 1;
519 }
520 else {
521 this.m_iP = iNE;
522 while(this.m_iP < iDE) {
523 // DEBUG: Remove
524 if(this.m_iP == iLast) return this._setErr(XMLP.ERR_INFINITELOOP);
525 iLast = this.m_iP;
526 // DEBUG: Remove End
527
528
529 iRet = this._parseAttribute(this.m_iP, iDE);
530 if(iRet == XMLP._ERROR) return iRet;
531 }
532 }
533
534 strN = this.m_xml.substring(iB, iNE);
535
536 if(strN.indexOf("<") != -1) {
537 return this._setErr(XMLP.ERR_ELM_LT_NAME);
538 }
539
540 this.m_name = strN;
541 this.m_iP = iE + 1;
542
543 return iType;
544
545 }
546
547 XMLP.prototype._parseEntity = function(iB) {
548 var iE = this.m_xml.indexOf(";", iB);
549 if(iE == -1) {
550 //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
551 this.m_chunkTransitionContinuation = this.m_xml.slice(iB-1);//the '-1' adds the '&' back into the string
552 return XMLP._INTERRUPT;
553 //return this._setErr(XMLP.ERR_CLOSE_ENTITY);
554 }
555
556 this.m_iP = iE + 1;
557
558 return this._replaceEntity(this.m_xml, iB, iE);
559
560 }
561
562 XMLP.prototype._parsePI = function(iB) {
563 var iE, iTB, iTE, iCB, iCE;
564
565 iE = this.m_xml.indexOf("?>", iB);
566 if(iE == -1) {
567 //This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
568 this.m_chunkTransitionContinuation = this.m_xml.slice(iB-2);//the '-2' adds the '?>' back into the string
569 return XMLP._INTERRUPT;
570 return this._setErr(XMLP.ERR_CLOSE_PI);
571 }
572
573 iTB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE);
574 if(iTB == -1) {
575 return this._setErr(XMLP.ERR_PI_TARGET);
576 }
577
578 iTE = SAXStrings.indexOfWhitespace(this.m_xml, iTB, iE);
579 if(iTE == -1) {
580 iTE = iE;
581 }
582
583 iCB = SAXStrings.indexOfNonWhitespace(this.m_xml, iTE, iE);
584 if(iCB == -1) {
585 iCB = iE;
586 }
587
588 iCE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iCB, iE);
589 if(iCE == -1) {
590 iCE = iE - 1;
591 }
592
593 this.m_name = this.m_xml.substring(iTB, iTE);
594 this._setContent(XMLP._CONT_XML, iCB, iCE + 1);
595 this.m_iP = iE + 2;
596
597 return XMLP._PI;
598
599 }
600
601 XMLP.prototype._parseText = function(iB) {
602 var iE, iEE;
603
604 iE = this.m_xml.indexOf("<", iB);
605 if(iE == -1) {
606 iE = this.m_xml.length;
607 }
608
609 iEE = this.m_xml.indexOf("&", iB);
610 if((iEE != -1) && (iEE <= iE)) {
611 iE = iEE;
612 }
613
614 this._setContent(XMLP._CONT_XML, iB, iE);
615
616 this.m_iP = iE;
617
618 return XMLP._TEXT;
619
620 }
621
622 XMLP.prototype._replaceEntities = function(strD, iB, iE) {
623 if(SAXStrings.isEmpty(strD)) return "";
624 iB = iB || 0;
625 iE = iE || strD.length;
626
627
628 var iEB, iEE, strRet = "";
629
630 iEB = strD.indexOf("&", iB);
631 iEE = iB;
632
633 while((iEB > 0) && (iEB < iE)) {
634 strRet += strD.substring(iEE, iEB);
635
636 iEE = strD.indexOf(";", iEB) + 1;
637
638 if((iEE == 0) || (iEE > iE)) {
639 return this._setErr(XMLP.ERR_CLOSE_ENTITY);
640 }
641
642 iRet = this._replaceEntity(strD, iEB + 1, iEE - 1);
643 if(iRet == XMLP._ERROR) {
644 return iRet;
645 }
646
647 strRet += this.m_cAlt;
648
649 iEB = strD.indexOf("&", iEE);
650 }
651
652 if(iEE != iE) {
653 strRet += strD.substring(iEE, iE);
654 }
655
656 this._setContent(XMLP._CONT_ALT, strRet);
657
658 return XMLP._ENTITY;
659
660 }
661
662 XMLP.prototype._replaceEntity = function(strD, iB, iE) {
663 if(SAXStrings.isEmpty(strD)) return -1;
664 iB = iB || 0;
665 iE = iE || strD.length;
666
667 switch(strD.substring(iB, iE)) {
668 case "amp": strEnt = "&"; break;
669 case "lt": strEnt = "<"; break;
670 case "gt": strEnt = ">"; break;
671 case "apos": strEnt = "'"; break;
672 case "quot": strEnt = "\""; break;
673 case "nbsp":strEnt = ''; break;
674 case "lt":strEnt = '<'; break;
675 case "gt":strEnt = '>'; break;
676 case "amp":strEnt = '&'; break;
677 case "cent":strEnt = "¢"; break;
678 case "pound":strEnt = '£'; break;
679 case "yen":strEnt = 'Â¥'; break;
680 case "euro":strEnt = '€'; break;
681 case "sect":strEnt = '§'; break;
682 case "copy":strEnt = '©'; break;
683 case "reg":strEnt = '®'; break;
684 default:
685 if(strD.charAt(iB) == "#") {
686 strEnt = String.fromCharCode(parseInt(strD.substring(iB + 1, iE)));
687 } else {
688 strEnt = ' ';
689 //return this._setErr(XMLP.ERR_ENTITY_UNKNOWN);
690 }
691 break;
692 }
693 this._setContent(XMLP._CONT_ALT, strEnt);
694
695 return XMLP._ENTITY;
696 }
697
698 XMLP.prototype._setContent = function(iSrc) {
699 var args = arguments;
700
701 if(XMLP._CONT_XML == iSrc) {
702 this.m_cAlt = null;
703 this.m_cB = args[1];
704 this.m_cE = args[2];
705 } else {
706 this.m_cAlt = args[1];
707 this.m_cB = 0;
708 this.m_cE = args[1].length;
709 }
710 this.m_cSrc = iSrc;
711
712 }
713
714 XMLP.prototype._setErr = function(iErr) {
715 var strErr = XMLP._errs[iErr];
716
717 this.m_cAlt = strErr;
718 this.m_cB = 0;
719 this.m_cE = strErr.length;
720 this.m_cSrc = XMLP._CONT_ALT;
721
722 return XMLP._ERROR;
723
724 } // end function _setErr
725
726
727 //SaxParser is an object that basically wraps an XMLP instance, and provides an
728 //event-based interface for parsing. This is the object users interact with when coding
729 //with XML for <SCRIPT>
730 var SaxParser = function(eventhandlerfactory) {
731
732 var eventhandler = new function(){
733
734 }
735
736 var thehandler = function() {};
737 thehandler.prototype.onStartDocument = function (funct){
738 eventhandler.onStartDocument = funct;
739 }
740 thehandler.prototype.onEndDocument = function (funct){
741 eventhandler.onEndDocument = funct;
742 }
743 thehandler.prototype.onStartElementNS = function (funct){
744 eventhandler.onStartElementNS = funct;
745 }
746 thehandler.prototype.onEndElementNS = function (funct){
747 eventhandler.onEndElementNS = funct;
748 }
749 thehandler.prototype.onCharacters = function(funct) {
750 eventhandler.onCharacters = funct;
751 }
752 thehandler.prototype.onCdata = function(funct) {
753 eventhandler.onCdata = funct;
754 }
755 thehandler.prototype.onComment = function(funct) {
756 eventhandler.onComment = funct;
757 }
758 thehandler.prototype.onWarning = function(funct) {
759 eventhandler.onWarning = funct;
760 }
761
762 thehandler.prototype.onError = function(funct) {
763 eventhandler.onError = funct;
764 }
765
766
767 eventhandlerfactory(new thehandler());
768 //eventhandler = eventhandler(eventhandler);
769 this.m_hndDoc = eventhandler;
770 this.m_hndErr = eventhandler;
771 this.m_hndLex = eventhandler;
772 this.m_interrupted = false;
773 }
774
775
776 // CONSTANTS (these must be below the constructor)
777 SaxParser.DOC_B = 1;
778 SaxParser.DOC_E = 2;
779 SaxParser.ELM_B = 3;
780 SaxParser.ELM_E = 4;
781 SaxParser.CHARS = 5;
782 SaxParser.PI = 6;
783 SaxParser.CD_B = 7;
784 SaxParser.CD_E = 8;
785 SaxParser.CMNT = 9;
786 SaxParser.DTD_B = 10;
787 SaxParser.DTD_E = 11;
788
789 SaxParser.prototype.parseFile = function(filename) { //This function will only work in the node.js environment.
790 var fs = require('fs');
791 var that = this;
792 fs.cat(filename).addCallback(function (content) {
793 that.parseString(content);
794 });
795 }
796
797
798 SaxParser.prototype.parseString = function(strD) {
799 sys = require('sys');
800 var that = this;
801
802 setTimeout(function(){
803 var startnew = true;
804 if(!that.m_parser){
805 that.m_parser = new XMLP(strD);
806 startnew = false;
807 }
808 else{
809 that.m_parser.continueParsing(strD);
810 startnew = true;
811 }
812
813 //if(that.m_hndDoc && that.m_hndDoc.setDocumentLocator) {
814 // that.m_hndDoc.setDocumentLocator(that);
815 //}
816
817 that.m_bErr = false;
818
819 if(!that.m_bErr && !startnew) {
820 that._fireEvent(SaxParser.DOC_B);
821 }
822 that._parseLoop();
823 if(!that.m_bErr && !that.m_interrupted) {
824 that._fireEvent(SaxParser.DOC_E);
825 }
826
827 that.m_xml = null;
828 that.m_iP = 0;
829 that.m_interrupted = false;
830 }, 0);
831
832 }
833
834 SaxParser.prototype.pause = function() {
835 this.m_parser.pause();
836 }
837
838 SaxParser.prototype.resume = function() {
839 //reset the state
840 this.m_parser.resume();
841 //now start up the parse loop
842 var that = this;
843 setTimeout(function(){
844 that._parseLoop();
845 }, 0);
846 }
847
848 SaxParser.prototype.setDocumentHandler = function(hnd) {
849 this.m_hndDoc = hnd;
850 }
851
852 SaxParser.prototype.setErrorHandler = function(hnd) {
853 this.m_hndErr = hnd;
854 }
855
856 SaxParser.prototype.setLexicalHandler = function(hnd) {
857 this.m_hndLex = hnd;
858 }
859
860 SaxParser.prototype.getColumnNumber = function() {
861 return this.m_parser.getColumnNumber();
862 }
863
864 SaxParser.prototype.getLineNumber = function() {
865 return this.m_parser.getLineNumber();
866 }
867
868 SaxParser.prototype.getMessage = function() {
869 return this.m_strErrMsg;
870 }
871
872 SaxParser.prototype.getPublicId = function() {
873 return null;
874 }
875
876 SaxParser.prototype.getSystemId = function() {
877 return null;
878 }
879
880 SaxParser.prototype.getLength = function() {
881 return this.m_parser.getAttributeCount();
882 }
883
884 SaxParser.prototype.getName = function(index) {
885 return this.m_parser.getAttributeName(index);
886 }
887
888 SaxParser.prototype.getValue = function(index) {
889 return this.m_parser.getAttributeValue(index);
890 }
891
892 SaxParser.prototype.getValueByName = function(name) {
893 return this.m_parser.getAttributeValueByName(name);
894 }
895
896 SaxParser.prototype._fireError = function(strMsg) {
897 this.m_strErrMsg = strMsg;
898 this.m_bErr = true;
899
900 if(this.m_hndErr && this.m_hndErr.onError) {
901 this.m_hndErr.onError(this.m_strErrMsg);
902 }
903 }
904
905
906
907 SaxParser.prototype._fireEvent = function(iEvt) {
908 var hnd, func, args = arguments, iLen = args.length - 1;
909
910
911 if(this.m_bErr) return;
912
913 if(SaxParser.DOC_B == iEvt) {
914 func = "onStartDocument"; hnd = this.m_hndDoc;
915 }
916 else if (SaxParser.DOC_E == iEvt) {
917 func = "onEndDocument"; hnd = this.m_hndDoc;
918 }
919 else if (SaxParser.ELM_B == iEvt) {
920 func = "onStartElementNS"; hnd = this.m_hndDoc;
921 }
922 else if (SaxParser.ELM_E == iEvt) {
923 func = "onEndElementNS"; hnd = this.m_hndDoc;
924 }
925 else if (SaxParser.CHARS == iEvt) {
926 func = "onCharacters"; hnd = this.m_hndDoc;
927 }
928 else if (SaxParser.PI == iEvt) {
929 func = "processingInstruction"; hnd = this.m_hndDoc;
930 }
931 else if (SaxParser.CD_B == iEvt) {
932 func = "onCdata"; hnd = this.m_hndLex;
933 }
934 else if (SaxParser.CD_E == iEvt) {
935 func = "onEndCDATA"; hnd = this.m_hndLex;
936 }
937 else if (SaxParser.CMNT == iEvt) {
938 func = "onComment"; hnd = this.m_hndLex;
939 }
940
941 if(hnd && hnd[func]) {
942 if(0 == iLen) {
943 hnd[func]();
944 }
945 else if (1 == iLen) {
946 hnd[func](args[1]);
947 }
948 else if (2 == iLen) {
949 hnd[func](args[1], args[2]);
950 }
951 else if (3 == iLen) {
952 hnd[func](args[1], args[2], args[3]);
953 }
954 else if (4 == iLen) {
955 hnd[func](args[1], args[2], args[3], args[4]);
956 }
957 else if (5 == iLen) {
958 hnd[func](args[1], args[2], args[3], args[4], args[5]);
959 }
960 }
961
962 }
963
964
965
966
967 SaxParser.prototype._parseLoop = function(parser) {
968 var iEvent, parser;
969
970 parser = this.m_parser;
971 while(!this.m_bErr) {
972 iEvent = parser.next();
973
974 if(iEvent == XMLP._ELM_B) {
975 theatts = this.m_parser.m_atts;
976 nameobject = parser._parsePrefixAndElementName(parser.getName());
977 theattsandnamespace = parser._parseNamespacesAndAtts(theatts);
978 var theuri = parser._getContextualNamespace(nameobject.prefix);
979 this._fireEvent(SaxParser.ELM_B, nameobject.name, theattsandnamespace[0], (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri ,theattsandnamespace[1] );
980 }
981 else if(iEvent == XMLP._ELM_E) {
982 nameobject = parser._parsePrefixAndElementName(parser.getName());
983 var theuri = parser._getContextualNamespace(nameobject.prefix);
984 parser._removeExpiredNamesapces(parser.getName());
985 this._fireEvent(SaxParser.ELM_E, nameobject.name, (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri);
986 }
987 else if(iEvent == XMLP._ELM_EMP) {
988 //this is both a begin and end element
989 theatts = this.m_parser.m_atts;
990 nameobject = parser._parsePrefixAndElementName(parser.getName());
991 theattsandnamespace = parser._parseNamespacesAndAtts(theatts);
992 var theuri = parser._getContextualNamespace(nameobject.prefix);
993 this._fireEvent(SaxParser.ELM_B, nameobject.name, theattsandnamespace[0], (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri ,theattsandnamespace[1] );
994
995 parser._removeExpiredNamesapces(parser.getName());
996 this._fireEvent(SaxParser.ELM_E, nameobject.name, (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri);
997 //this._fireEvent(SaxParser.ELM_B, parser.getName(), this.m_parser.m_atts.map(function(item){return { name : item[0], value : item[1], };}) );
998 //this._fireEvent(SaxParser.ELM_E, parser.getName());
999 }
1000 else if(iEvent == XMLP._TEXT) {
1001 this._fireEvent(SaxParser.CHARS, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
1002 }
1003 else if(iEvent == XMLP._ENTITY) {
1004 this._fireEvent(SaxParser.CHARS, parser.getContent(), parser.getContentBegin(), parser.getContentEnd() - parser.getContentBegin());
1005 }
1006 else if(iEvent == XMLP._PI) {
1007 this._fireEvent(SaxParser.PI, parser.getName(), parser.getContent().substring(parser.getContentBegin(), parser.getContentEnd()));
1008 }
1009 else if(iEvent == XMLP._CDATA) {
1010 this._fireEvent(SaxParser.CD_B, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
1011 //this._fireEvent(SaxParser.CHARS, parser.getContent(), parser.getContentBegin(), parser.getContentEnd() - parser.getContentBegin());
1012 //this._fireEvent(SaxParser.CD_E);
1013 }
1014 else if(iEvent == XMLP._COMMENT) {
1015 this._fireEvent(SaxParser.CMNT, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
1016 }
1017 else if(iEvent == XMLP._DTD) {
1018 }
1019 else if(iEvent == XMLP._ERROR) {
1020 this._fireError(parser.getContent());
1021 }
1022 else if(iEvent == XMLP._INTERRUPT){
1023 this.m_interrupted = true;
1024 return;//just return and wait to be restarted
1025 }
1026 else if(iEvent == XMLP._NONE) {
1027 return;
1028 }
1029 }
1030
1031 }
1032
1033 //SAXStrings: a useful object containing string manipulation functions
1034 var SAXStrings = function() {
1035 //This is the constructor of the SAXStrings object
1036 }
1037
1038
1039 // CONSTANTS (these must be below the constructor)
1040 SAXStrings.WHITESPACE = " \t\n\r";
1041 SAXStrings.QUOTES = "\"'";
1042
1043
1044 SAXStrings.getColumnNumber = function(strD, iP) {
1045 if(SAXStrings.isEmpty(strD)) {
1046 return -1;
1047 }
1048 iP = iP || strD.length;
1049
1050 var arrD = strD.substring(0, iP).split("\n");
1051 var strLine = arrD[arrD.length - 1];
1052 arrD.length--;
1053 var iLinePos = arrD.join("\n").length;
1054
1055 return iP - iLinePos;
1056
1057 }
1058
1059 SAXStrings.getLineNumber = function(strD, iP) {
1060 if(SAXStrings.isEmpty(strD)) {
1061 return -1;
1062 }
1063 iP = iP || strD.length;
1064
1065 return strD.substring(0, iP).split("\n").length
1066 }
1067
1068 SAXStrings.indexOfNonWhitespace = function(strD, iB, iE) {
1069 if(SAXStrings.isEmpty(strD)) {
1070 return -1;
1071 }
1072 iB = iB || 0;
1073 iE = iE || strD.length;
1074
1075 for(var i = iB; i < iE; i++){
1076 if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) == -1) {
1077 return i;
1078 }
1079 }
1080 return -1;
1081 }
1082
1083 SAXStrings.indexOfWhitespace = function(strD, iB, iE) {
1084 if(SAXStrings.isEmpty(strD)) {
1085 return -1;
1086 }
1087 iB = iB || 0;
1088 iE = iE || strD.length;
1089
1090 for(var i = iB; i < iE; i++) {
1091 if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) != -1) {
1092 return i;
1093 }
1094 }
1095 return -1;
1096 }
1097
1098 SAXStrings.isEmpty = function(strD) {
1099 return (strD == null) || (strD.length == 0);
1100 }
1101
1102 SAXStrings.lastIndexOfNonWhitespace = function(strD, iB, iE) {
1103 if(SAXStrings.isEmpty(strD)) {
1104 return -1;
1105 }
1106 iB = iB || 0;
1107 iE = iE || strD.length;
1108
1109 for(var i = iE - 1; i >= iB; i--){
1110 if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) == -1){
1111 return i;
1112 }
1113 }
1114 return -1;
1115 }
1116
1117 SAXStrings.replace = function(strD, iB, iE, strF, strR) {
1118 if(SAXStrings.isEmpty(strD)) {
1119 return "";
1120 }
1121 iB = iB || 0;
1122 iE = iE || strD.length;
1123
1124 return strD.substring(iB, iE).split(strF).join(strR);
1125
1126 }
1127
1128 var Stack = function() {
1129 this.m_arr = new Array();
1130 }
1131
1132 Stack.prototype.clear = function() {
1133 this.m_arr = new Array();
1134 }
1135
1136 Stack.prototype.count = function() {
1137 return this.m_arr.length;
1138 }
1139
1140 Stack.prototype.destroy = function() {
1141 this.m_arr = null;
1142 }
1143
1144 Stack.prototype.peek = function() {
1145 if(this.m_arr.length == 0) {
1146 return null;
1147 }
1148
1149 return this.m_arr[this.m_arr.length - 1];
1150
1151 }
1152
1153 Stack.prototype.pop = function() {
1154 if(this.m_arr.length == 0) {
1155 return null;
1156 }
1157
1158 var o = this.m_arr[this.m_arr.length - 1];
1159 this.m_arr.length--;
1160 return o;
1161
1162 }
1163
1164 Stack.prototype.push = function(o) {
1165 this.m_arr[this.m_arr.length] = o;
1166 }
1167
1168 // CONVENIENCE FUNCTIONS
1169 function isEmpty(str) {
1170 return (str==null) || (str.length==0);
1171 }
1172
1173
1174 function trim(trimString, leftTrim, rightTrim) {
1175 if (isEmpty(trimString)) {
1176 return "";
1177 }
1178
1179 // the general focus here is on minimal method calls - hence only one
1180 // substring is done to complete the trim.
1181
1182 if (leftTrim == null) {
1183 leftTrim = true;
1184 }
1185
1186 if (rightTrim == null) {
1187 rightTrim = true;
1188 }
1189
1190 var left=0;
1191 var right=0;
1192 var i=0;
1193 var k=0;
1194
1195
1196 // modified to properly handle strings that are all whitespace
1197 if (leftTrim == true) {
1198 while ((i<trimString.length) && (whitespace.indexOf(trimString.charAt(i++))!=-1)) {
1199 left++;
1200 }
1201 }
1202 if (rightTrim == true) {
1203 k=trimString.length-1;
1204 while((k>=left) && (whitespace.indexOf(trimString.charAt(k--))!=-1)) {
1205 right++;
1206 }
1207 }
1208 return trimString.substring(left, trimString.length - right);
1209 }
1210
1211 function __escapeString(str) {
1212
1213 var escAmpRegEx = /&/g;
1214 var escLtRegEx = /</g;
1215 var escGtRegEx = />/g;
1216 var quotRegEx = /"/g;
1217 var aposRegEx = /'/g;
1218
1219 str = str.replace(escAmpRegEx, "&amp;");
1220 str = str.replace(escLtRegEx, "&lt;");
1221 str = str.replace(escGtRegEx, "&gt;");
1222 str = str.replace(quotRegEx, "&quot;");
1223 str = str.replace(aposRegEx, "&apos;");
1224
1225 return str;
1226 }
1227
1228 function __unescapeString(str) {
1229
1230 var escAmpRegEx = /&amp;/g;
1231 var escLtRegEx = /&lt;/g;
1232 var escGtRegEx = /&gt;/g;
1233 var quotRegEx = /&quot;/g;
1234 var aposRegEx = /&apos;/g;
1235
1236 str = str.replace(escAmpRegEx, "&");
1237 str = str.replace(escLtRegEx, "<");
1238 str = str.replace(escGtRegEx, ">");
1239 str = str.replace(quotRegEx, "\"");
1240 str = str.replace(aposRegEx, "'");
1241
1242 return str;
1243 }
1244
1245 process.mixin(exports, {
1246 SaxParser: SaxParser
1247 });
1248
1249 })()

mercurial