1    // Copyright (C) 2003 Adam Megacz <adam@xwt.org> all rights reserved.
2    //
3    // You may modify, copy, and redistribute this code under the terms of
4    // the GNU Library Public License version 2.1, with the exception of
5    // the portion of clause 6a after the semicolon (aka the "obnoxious
6    // relink clause")
7    
8    package org.xwt.util;
9    
10   import java.io.Reader;
11   import java.io.IOException;
12   import java.io.EOFException;
13   
14   /**
15    * An Event-Driving, Non-Validating XML Parser with Namespace support.
16    *
17    * A subclass can implement the abstract functions for receiving details
18    * about an xml file as it is parsed. To initate a parse, use the parse()
19    * function. 
20    *
21    * <h3>Implementation Notes</h3>
22    * <p>As the parser traverses into an element, it adds it to the linked list
23    * called <tt>elements</tt>. However, <tt>elements</tt> has been pre-filled
24    * with instances of the Element inner class. So in the vast majority of
25    * cases, the pointer current is moved along one, and the values for the
26    * new element are filled into the current object.</p>
27    *
28    * <p>This parser supports all the unicode ranges required by the XML
29    * Specification. However, it is optimised for well-formed ASCII documents.
30    * Documents containing unicode Names and Attributes will take much longer
31    * to process, and invalid documents (badly formed Names or invalid attributes)
32    * will be run through a test on every single unicode character range before 
33    * being declared invalid.</p> 
34    *
35    * <ul>
36    *  <li>Each time the buffer offset <tt>off</tt> is moved, the length
37    *   <tt>len</tt> must be decreased.</li>
38    *  <li>Each time the buffer length is decreased, it must be checked to make
39    *   sure it is >0.</li>
40    *  <li><i>error</i> is defined as a Validity Constraint Violation and
41    *   is recoverable</li>
42    *  <li><i>fatal error</i> is defined as a Well-formedness Constraint
43    *   Violation and is not recoverable</li>
44    * </ul> 
45    *
46    * @author David Crawshaw 
47    * @see <a href="http://w3.org/TR/REC-xml">XML Specification</a> 
48    * @see <a href="http://w3.org/TR/REC-xml-names">XML Namespaces</a>
49    */
50   public abstract class XML
51   {
52       /////////////////////////////////////////////////////////////////////////////////////////////
53       // XML Parser
54       /////////////////////////////////////////////////////////////////////////////////////////////
55   
56       public static final int BUFFER_SIZE = 255;
57   
58       /** static pool of XML.Element instances shared by all XML Parsers. */
59       private static final Queue elements = new Queue(30);
60   
61       private static final char[] single_amp  = new char[] { '&'  };
62       private static final char[] single_apos = new char[] { '\'' };
63       private static final char[] single_gt   = new char[] { '>'  };
64       private static final char[] single_lt   = new char[] { '<'  };
65       private static final char[] single_quot = new char[] { '"'  };
66   
67       private int line;
68       private int col;
69   
70       private Reader in;
71       private char[] buf;
72       private int    off;
73       private int    len;
74   
75       private Element current;
76   
77       // used in readEntity() to process a single character without creating a new array
78       private char[] singlechar = new char[1];
79   
80   
81       public XML() { this(BUFFER_SIZE); }
82   
83       public XML(int bSize) {
84           buf = new char[bSize];
85   
86           current = (Element)elements.remove(false);
87           if (current == null) current = new Element();
88       }
89   
90   
91       /** Returns the line number at the beginning of the last process call. */
92       public int getLine() { return line; }
93   
94       /** Returns the column number at the beginning of the last process call. */
95       public int getCol()  { return col; }
96   
97       /**
98        * Parse given input and call the abstract event functions.
99        *
100       * Careful with threading, as this function is not synchronized.
101       */ 
102      public final void parse(Reader reader) throws IOException, Exn {
103          in  = reader;
104          off = len = 0;
105          line = col = 1;
106  
107          clear(); // clean up possible mid-way linked-list element
108  
109          try {
110              // process the stream
111              while (true) {
112                  if (!buffer(1)) {
113                      if (current.qName == null) break;
114                      throw new Exn("reached eof without closing <"+current.qName+"> element", Exn.WFC, getLine(), getCol());
115                  }
116  
117                  if (buf[off] == '<') readTag();
118                  readChars(current.qName != null);
119              }
120          } finally { clear(); } // clean up elements
121      }
122  
123      /** remove any leftover elements from the linked list and queue them */
124      private final void clear() {
125          for (Element last = current; current.parent != null; ) {
126              current = current.parent;
127              last.clear();
128              elements.append(last);
129          }
130          current.clear();
131      }
132  
133      /** reads in a tag. expects <tt>buf[off] == '<'</tt> */
134      private final void readTag() throws IOException, Exn {
135          // Start Tag    '<' Name (S Attribute)* S? '>'
136          boolean starttag  = true;
137  
138          // End Tag     '</' Name S? '>'
139          boolean endtag    = false;
140  
141          // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>'
142  
143          // Position in the name of the ':' namespace prefix
144          int prefix = -1;
145  
146          int namelen   = 0;
147  
148          col++; off++; len--;
149          if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag");
150  
151          // work out what we can from the beginning of the tag
152          char s = buf[off]; 
153          if (s == '!') {
154              // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented)
155              col++; off++; len--; 
156              if (!buffer(4)) throw new EOFException("Unexpected EOF processing <! element");
157  
158              boolean bad = false;
159              switch (buf[off]) {
160                  case '-':
161                      if (buf[off+1] != '-') { bad = true; break; }
162                      col += 2; off += 2; len -= 2;
163  
164                      // Comment        '<!--'      ((Char - '-') | ('-' (Char - '-')))* '-->'
165                      readChars(false, "-->", false); 
166                      col += 3; off += 3; len -= 3;
167                      break;
168  
169                  // we don't care about the following definitions
170  
171                  case 'A':
172                      if (!buffer(7)
173                              || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L'
174                              || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') {
175                          bad = true; break;
176                      } 
177                      col += 7; off += 7; len -= 7; 
178  
179                      // ATTLIST        '<!ATTLIST'   (Char* - '>') '>'
180                      readChars(false, ">", true); 
181                      col++; off++; len--;
182                      break;
183                  case 'D':
184                      if (!buffer(7)
185                              || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T'
186                              || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') {
187                          bad = true; break;
188                      }
189                      col += 7; off += 7; len -= 7;
190  
191                      // DTD            '<!DOCTYPE'   (Char* - '>') '>'
192                      readChars(false, ">", true); 
193                      col++; off++; len--;
194                      break; 
195                  case 'E':
196                      if (!buffer(7)) {
197                          bad = true;
198                      } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M'
199                              && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') {
200                          // ELEMENT        '<!ELEMENT'   (Char* - '>') '>'
201                          readChars(false, ">", true); 
202                          col++; off++; len--;
203  
204                      } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I'
205                              && buf[off+4] == 'T' && buf[off+5] == 'Y') {
206                          // ENTITY         '<!ENTITY'    (Char* - '>') '>'
207                          readChars(false, ">", true); 
208                          col++; off++; len--;
209  
210                      } else {
211                          bad = true;
212                      }
213                      break;
214  
215                  case 'N':
216                      if (!buffer(8)
217                              || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T'
218                              || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') {
219                          bad = true; break;
220                      }
221                      col += 8; off += 8; len -= 8;
222                      // NOTATION       '<!NOTATION'  (Char* - '>') '>'
223                      readChars(false, ">", true); 
224                      col++; off++; len--;
225  
226                      break;
227                  default: bad = true;
228              }
229  
230              if (bad) throw new Exn("element tag start character is invalid", Exn.MARKUP, getLine(), getCol());
231  
232          } else if (s == '?') {
233              // PI (Ignored)   '<?'  (Char* - (Char* '?>' Char*))  '?>'
234              col++; off++; len--;
235              readChars(false, "?>", true);
236              if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction");
237              col += 2; off += 2; len -= 2;
238  
239          } else if (s == '[') {
240              if (!buffer(7)
241                      || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A'
242                      || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') {
243                  col++; off--; len++; 
244                  // Conditional    '<![' (Char* - (Char* ']]>' Char*)) ']]>'
245                  readChars(false, "]]>", false); 
246              } else {
247                  col += 7; off += 7; len -=7;
248                  // CDATA          '<![CDATA[' (Char* - (Char* ']]>' Char*))        ']]>'
249                  readChars(true, "]]>", false);
250              } 
251              col += 3; off += 3; len -= 3;
252          } else {
253              if (s == '/') {
254                  // End Tag        '</' Name S? '>'
255                  starttag = false; 
256                  endtag = true;
257  
258                  col++; off++; len--;
259                  if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag");
260                  s = buf[off];
261              }
262  
263              if (!Name(s)) throw new Exn("invalid starting character in element name", Exn.MARKUP, getLine(), getCol()); 
264  
265              // find the element name (defined in XML Spec: section 2.3)
266              for (namelen = 0; ; namelen++) {
267                  if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name");
268  
269                  s = buf[off+namelen];
270  
271                  if (S(s) || s == '>') {
272                      break;
273                  } else if (s == '/') {
274                      endtag = true;
275                      break;
276                  } else if (s == ':' && namelen > 0 && prefix < 1) {
277                      // we have a definition of the prefix range available
278                      prefix = namelen; 
279                  } else if (!NameChar(s)) {
280                      throw new Exn("element name contains invalid character", Exn.MARKUP, getLine(), getCol());
281                  }
282              }
283  
284              // process name (based on calculated region)
285              if (namelen < 1) throw new Exn("element name is null", Exn.MARKUP, getLine(), getCol()); 
286  
287              // we have marked out the name region, so turn it into a string and move on
288              String qName = new String(buf, off, namelen);
289  
290              col += namelen; off += namelen; len -= namelen;
291  
292              if (starttag) {
293                  // create the in-memory element representation of this beast
294                  // if current.qName == null then this is the root element we're dealing with
295                  if (current.qName != null) {
296                      Element next = (Element)elements.remove(false);
297                      if (next == null) next = new Element();
298                      //next.clear(); // TODO: remove as elements now checked as they're added to the queue
299                      next.parent = current;
300                      current = next;
301                  }
302  
303                  current.qName = qName;
304  
305                  if (prefix > 0) {
306                      current.prefix = current.qName.substring(0, prefix);
307                      current.localName = current.qName.substring(prefix+1);
308                  } else {
309                      current.prefix = null;
310                      current.localName = current.qName;
311                  }
312  
313                  // process attributes
314                  readWhitespace(); 
315                  if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1");
316                  while (buf[off] != '/' && buf[off] != '>') {
317                      readAttribute();
318                      if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2");
319                      readWhitespace();
320                  }
321  
322                  // work out the uri of this element
323                  current.uri = current.getUri(current.getPrefix()); 
324                  if (current.getUri().equals("") && current.getPrefix() != null)
325                      current.addError(new Exn("undefined prefix '"+current.getPrefix()+"'", Exn.NC, getLine(), getCol()));
326  
327              } else {
328                  // this is an end-of-element tag
329                  if (!qName.equals(current.getQName())) throw new Exn(
330                      "end tag </"+qName+"> does not line up with start tag <"+current.getQName()+">", Exn.WFC, getLine(), getCol()
331                  );
332              }
333  
334              // deal with whitespace
335              readWhitespace(); 
336  
337              // process tag close
338              if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag"); 
339              if (buf[off] == '/') {
340                  endtag = true;
341                  off++; len--; col++;
342              }
343              if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag"); 
344              if (buf[off] == '>') {
345                  off++; len--; col++;
346              } else {
347                  throw new Exn("missing '>' character from element '"+qName+"'", Exn.MARKUP, getLine(), getCol());
348              }
349  
350              // send element signals
351              if (starttag) startElement(current);
352              if (endtag) {
353                  endElement(current);
354  
355                  // we just closed an element, so remove it from the element 'stack'
356                  if (current.getParent() == null) {
357                      // we just finished the root element
358                      current.clear(); 
359                  } else {
360                      Element last = current;
361                      current = current.parent;
362                      last.clear();
363                      elements.append(last);
364                  }
365              }
366          }
367      }
368  
369      /** reads in an attribute of an element. expects Name(buf[off]) */
370      private final void readAttribute() throws IOException, Exn {
371          int ref = 0;
372          int prefix = 0;
373          String n, v, p, u; // attribute name, value, prefix and uri respectively
374          n = v = p = u = null;
375          char s;
376  
377          // find the element name (defined in XML Spec: section 2.3)
378          for (ref= 0; ; ref++) {
379              if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1");
380  
381              s = buf[off+ref];
382  
383              if (s == '=' || S(s)) {
384                  break;
385              } else if (s == ':' && ref > 0 && prefix < 1) {
386                  // we have a definition of the prefix range available
387                  prefix = ref+1;
388              } else if (!NameChar(s)) {
389                  throw new Exn("attribute name contains invalid characters", Exn.MARKUP, getLine(), getCol());
390              }
391          }
392  
393          // determine prefix and key name
394          if (prefix > 0) {
395              p = new String(buf, off, prefix-1);
396              col += prefix; off += prefix; len -= prefix; ref -= prefix;
397          }
398          n = new String(buf, off, ref);
399          col += ref; off += ref; len -= ref;
400  
401          // find name/value divider ('=')
402          readWhitespace();
403          if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider");
404          if (buf[off] != '=') throw new Exn("attribute name not followed by '=' sign", Exn.MARKUP, getLine(), getCol());
405  
406          col++; off++; len--;
407          readWhitespace();
408  
409          if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider");
410  
411          char wrap;
412          if (buf[off] == '\'' || buf[off] == '"') {
413              wrap = buf[off];
414          } else {
415              throw new Exn("attribute '"+n+"' must have attribute wrapped in ' or \"", Exn.MARKUP, getLine(), getCol());
416          }
417          col++; off++; len--;
418  
419          // find the attribute value
420          attval: for (ref = 0; ; ref++) {
421              if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value");
422  
423              if (buf[off+ref] == wrap) {
424                  break attval;
425              } else if (buf[off+ref] == '<') {
426                  throw new Exn("attribute value for '"+n+"' must not contain '<'", Exn.WFC, getLine(), getCol());
427              } 
428          }
429  
430          v = new String(buf, off, ref);
431          col += ref; off += ref; len -= ref;
432  
433          // remove end wrapper character
434          col++; off++; len--;
435  
436          // process attribute
437          if (p != null && p.equals("xmlns")) {
438              current.addUri(n, v);
439          } else if (n.equals("xmlns")) {
440              if (current.getUri().equals("")) {
441                  current.addUri("", v);
442              } else {
443                  current.addError(new Exn("default namespace definition repeated", Exn.NC, getLine(), getCol()));
444              }
445          } else {
446              // find attribute uri
447              u = current.getUri(p); 
448              if (p != null && u.equals("")) current.addError(new Exn("undefined attribute prefix '"+p+"'", Exn.NC, getLine(), getCol()));
449  
450              // check to see if attribute is a repeat
451              for (int i=0; current.len > i; i++) if (n.equals(current.getAttrKey(i)) && u.equals(current.getAttrUri(i))) throw new Exn(
452                  "attribute name '"+n+"' may not appear more than once in the same element tag", Exn.WFC, getLine(), getCol()
453              );
454  
455              current.addAttr(n, v, u); 
456          }
457      }
458  
459      /** reads an entity and processes out its value. expects buf[off] == '&' */
460      private final void readEntity() throws IOException, Exn {
461          off++; len--;
462          if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity");
463  
464          boolean unknown = false;
465          switch (buf[off]) {
466              case '#':
467                  off++; len--;
468  
469                  int radix;
470                  if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; }
471                  int c = 0;
472  
473                  // read in each char, then shift total value to the left and add the extra
474                  // style of loop is slightly different from all the others, as this should run a limited number of times 
475                  findchar: while (true) {
476                      if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity");
477                      int d = Character.digit(buf[off], radix);
478                      if (d == -1) {
479                          if (buf[off] != ';') throw new Exn("illegal characters in entity reference", Exn.WFC, getLine(), getCol());
480                          off++; len--; col++;
481                          break findchar;
482                      }
483                      c = (c * radix) + d;
484  
485                      off++; len--;
486                  }
487  
488                  singlechar[0] = Character.forDigit(c, radix);
489                  characters(singlechar, 0, 1);
490                  break;
491  
492              case 'a':
493                  if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') {
494                      characters(single_amp, 0, 1); // &
495                      off += 4; len -= 4; col++;
496                  } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') {
497                      characters(single_apos, 0, 1); // '
498                      off += 5; len -= 5; col++;
499                  } else {
500                      unknown = true;
501                  }
502                  break;
503  
504              case 'g':
505                  if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
506                      characters(single_gt, 0, 1); // >
507                      off += 3; len -= 3; col++;
508                  } else {
509                      unknown = true;
510                  }
511                  break;
512  
513              case 'l':
514                  if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
515                      characters(single_lt, 0, 1); // <
516                      off += 3; len -= 3; col++;
517                  } else {
518                      unknown = true;
519                  }
520                  break;
521  
522              case 'q':
523                  if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') {
524                      characters(single_quot, 0, 1); // "
525                      off += 5; len -= 5; col++;
526                  } else {
527                      unknown = true;
528                  }
529                  break;
530  
531              // TODO: check a parser-level Hash of defined entities
532          }
533  
534          if (unknown) throw new Exn("unknown entity (<!ENTITY> not supported)", Exn.WFC, getLine(), getCol());
535      }
536  
537      /** reads until the passed string is encountered. */
538      private final void readChars(boolean p, String match, boolean entities) throws IOException, Exn {
539          int ref;
540          char[] end = match.toCharArray();
541  
542          for (boolean more = true; more;) {
543              if (!buffer(1)) return;
544  
545              buf: for (ref = 0; ref < len; ref++) {
546                  switch (buf[off+ref]) {
547                      case '\r': // windows or macos9 newline
548                          // normalise and process
549                          buf[off+ref] = '\n'; ref++;
550                          if (p) characters(buf, off, ref);
551                          off += ref; len -= ref; ref = -1;
552                          line++; col = 1;
553  
554                          // windows double-char newline; skip the next char
555                          if (!buffer(1)) return;
556                          if (buf[off] == '\n') { off++; len--; }
557                          break;
558  
559                      case '\n': // unix newline
560                          ref++;
561                          if (p) characters(buf, off, ref);
562                          off += ref; len -= ref; ref = -1;
563                          line++; col = 1;
564                          break;
565  
566                      case '&':  // entity
567                          if (entities) {
568                              if (p) {
569                                  if (ref > 0) characters(buf, off, ref);
570                                  off += ref; len -= ref; ref = -1;
571                                  readEntity();
572                              }
573                              break;
574                          }
575  
576                      default:
577                          if (!buffer(ref+end.length)) continue buf;
578                          for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf;
579                          more = false;
580                          break buf;
581                  }
582              }
583  
584              if (p && ref > 0) characters(buf, off, ref);
585              off += ref; len -= ref; col += ref;
586          }
587      }
588  
589      /**
590       * reads until a <tt><</tt> symbol is encountered
591       * @param p If true call the characters(char[],int,int) funciton for the processed characters 
592       */
593      private final void readChars(boolean p) throws IOException, Exn {
594          int ref;
595  
596          for (boolean more = true; more;) {
597              if (!buffer(1)) return;
598  
599              buf: for (ref = 0; ref < len; ref++) {
600                  switch (buf[off+ref]) {
601                      case '\r': // windows or macos9 newline
602                          // normalise and process
603                          buf[off+ref] = '\n'; ref++;
604                          if (p) characters(buf, off, ref);
605                          off += ref; len -= ref; ref = -1;
606                          line++; col = 1;
607  
608                          // windows double-char newline; skip the next char
609                          if (!buffer(1)) return;
610                          if (buf[off] == '\n') { off++; len--; }
611                          break;
612  
613                      case '\n': // unix newline
614                          ref++;
615                          if (p) characters(buf, off, ref);
616                          off += ref; len -= ref; ref = -1;
617                          line++; col = 1;
618                          break;
619  
620                      case '&':  // entity
621                          if (p) {
622                              if (ref > 0) characters(buf, off, ref);
623                              off += ref; len -= ref; ref = -1;
624                              readEntity();
625                          }
626                          break;
627  
628                      case '<':  // end of chars section
629                          more = false;
630                          break buf;
631                  }
632              }
633  
634              if (p && ref > 0) characters(buf, off, ref);
635              off += ref; len -= ref; col += ref;
636          }
637      }
638  
639      /** reads until a non-whitespace symbol is encountered */
640      private final void readWhitespace() throws IOException, Exn {
641          int ref;
642  
643          for (boolean more = true; more;) {
644              if (!buffer(1)) return;
645  
646              buf: for (ref = 0; ref < len; ref++) {
647                  switch (buf[off+ref]) {
648                      case '\r': // windows or macos9 newline
649                          // normalise and process
650                          buf[off+ref] = '\n';
651                          whitespace(buf, off, ++ref);
652                          off += ref; len -= ref; ref = -1;
653                          line++; col = 1;
654  
655                          // windows double-char newline; skip the next char
656                          if (!buffer(1)) return;
657                          if (buf[off] == '\n') { off++; len--; }
658                          break;
659  
660                      case '\n': // unix newline
661                          whitespace(buf, off, ++ref);
662                          off += ref; len -= ref; ref = -1;
663                          line++; col = 1;
664                          break;
665  
666                      case ' ':  // space
667                      case '\t': // tab
668                          break;
669  
670                      default:   // end of whitespace
671                          more = false;
672                          break buf;
673                  }
674              }
675  
676              off += ref; len -= ref; col += ref;
677          }
678      }
679  
680      /**
681       * attempt to fill the buffer.
682       *
683       * @param min Minimum number of characters to read (even if we have to block to do it).
684       * @return return false if min can't be reached.
685       */
686      private final boolean buffer(int min) throws IOException {
687          if (len > min) return true;
688  
689          if (buf.length - (off+len) >= min) {
690              // plenty of space left on the end of the buffer
691          } else if (off >= min) {
692              // moving offset data to start will leave enough free space on the end
693              System.arraycopy(buf, off, buf, 0, len); 
694              off = 0;
695          } else {
696              // buffer size will have to be increased
697              char[] newbuf = new char[buf.length * 2];
698              System.arraycopy(buf, off, newbuf, 0, len);
699              buf = newbuf;
700              off = 0;
701          }
702  
703          while (min > len) {
704              int newlen = in.read(buf, off+len, buf.length-(off+len));
705              if (newlen < 0) return false; 
706              len += newlen;
707          }
708  
709          return true;
710      }
711  
712  
713      /////////////////////////////////////////////////////////////////////////////////////////////
714      // Abstract SAX-Like Interface
715      /////////////////////////////////////////////////////////////////////////////////////////////
716  
717      /**
718       * Called when the start of an element is processed.
719       *
720       * <p><b>DO NOT</b> store a reference to the Element object, as
721       * they are reused by XML Parser.</p>
722       */ 
723      public abstract void startElement(Element e) throws Exn;
724  
725      /**
726       * Represents up to a line of character data. 
727       *
728       * <p>Newlines are all normalised to the Unix \n as per the XML Spec,
729       * and a newline will only appear as the last character in the passed
730       * array segment.</p>
731       *
732       * <p>XML.getLine() and XML.getCol() report the position at the
733       * beginning of this character segment, which can be processed in a
734       * line-by-line fashion due to the above newline restriction.</p>
735       */
736      public abstract void characters(char[] ch, int start, int length) throws Exn, IOException;
737  
738      /** Represents up to a line of ignorable whitespace. */
739      public abstract void whitespace(char[] ch, int start, int length) throws Exn, IOException;
740  
741      /** Represents the end of an Element. */
742      public abstract void endElement(Element e) throws Exn, IOException;
743  
744  
745      /////////////////////////////////////////////////////////////////////////////////////////////
746      // Inner Classes for Parser Support
747      /////////////////////////////////////////////////////////////////////////////////////////////
748  
749      /**
750       * Represents an element in an XML document. Stores a reference to its
751       * parent, forming a one-way linked list.
752       *
753       * Element objects are reused, so client code making use of them must
754       * drop their references after the specific element process function
755       * has returned.
756       */
757      public static final class Element {
758  
759          private static final int DEFAULT_ATTR_SIZE = 10;
760  
761          protected Element parent = null;
762  
763          protected String uri = null;
764          protected String localName = null;
765          protected String qName = null;
766          protected String prefix = null;
767  
768          protected Hash urimap = new Hash(3,3);
769  
770          protected String[] keys = new String[DEFAULT_ATTR_SIZE];
771          protected String[] vals = new String[DEFAULT_ATTR_SIZE];
772          protected String[] uris = new String[DEFAULT_ATTR_SIZE];
773          protected int len = 0;
774  
775          protected Exn[] errors = new Exn[] {};
776  
777  
778          /** Parent of current element. */
779          public Element getParent() { return parent; }
780  
781          /** Qualified Name of current element.  XML Namespace Spec 14-Jan-1999 [6] */
782          public String getQName() { return qName; }
783  
784          /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */
785          public String getLocalName() { return localName; }
786  
787          /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */
788          public String getPrefix() { return prefix; }
789  
790          /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */
791          public String getUri() { return getUri(prefix); }
792  
793          /** URI of a given prefix. Never returns null, instead gives "". */
794          public String getUri(String p) {
795              String ret = null;
796              for (Element e = this; e != null && ret == null; e = e.getParent()) {
797                  ret = (String)e.urimap.get(p == null ? "" : p);
798              }
799              return ret == null ? "" : ret;
800          }
801  
802          /** An array of attribute names. */
803          public String getAttrKey(int pos) { return len > pos ? keys[pos] : null; }
804  
805          /** An array of attribute values. */
806          public String getAttrVal(int pos) { return len > pos ? vals[pos] : null; }
807  
808          /** An array of attribute uris. */
809          public String getAttrUri(int pos) { return len > pos ? uris[pos] : null; }
810  
811          /** Current number of attributes in the element. */
812          public int getAttrLen() { return len; }
813  
814          /** An array of non-fatal errors related to this element. */
815          public Exn[] getErrors() { return errors; }
816  
817  
818          protected Element() { }
819  
820          /** Add (replace if exists in current element) a Namespace prefix/uri map. */
821          protected void addUri(String name, String value) {
822              urimap.put(name, value);
823          }
824  
825          /** Add