Source for gnu.xml.stream.XMLParser

   1: /* XMLParser.java -- 
   2:    Copyright (C) 2005  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version.
  37: 
  38: Partly derived from code which carried the following notice:
  39: 
  40:   Copyright (c) 1997, 1998 by Microstar Software Ltd.
  41: 
  42:   AElfred is free for both commercial and non-commercial use and
  43:   redistribution, provided that Microstar's copyright and disclaimer are
  44:   retained intact.  You are free to modify AElfred for your own use and
  45:   to redistribute AElfred with your modifications, provided that the
  46:   modifications are clearly documented.
  47: 
  48:   This program is distributed in the hope that it will be useful, but
  49:   WITHOUT ANY WARRANTY; without even the implied warranty of
  50:   merchantability or fitness for a particular purpose.  Please use it AT
  51:   YOUR OWN RISK.
  52: */
  53: 
  54: package gnu.xml.stream;
  55: 
  56: import java.io.BufferedInputStream;
  57: import java.io.EOFException;
  58: import java.io.File;
  59: import java.io.FileOutputStream;
  60: import java.io.FileWriter;
  61: import java.io.InputStream;
  62: import java.io.InputStreamReader;
  63: import java.io.IOException;
  64: import java.io.Reader;
  65: import java.io.StringReader;
  66: import java.io.UnsupportedEncodingException;
  67: import java.net.MalformedURLException;
  68: import java.net.URL;
  69: import java.util.ArrayList;
  70: import java.util.Collections;
  71: import java.util.HashSet;
  72: import java.util.Iterator;
  73: import java.util.LinkedHashMap;
  74: import java.util.LinkedList;
  75: import java.util.Map;
  76: import java.util.NoSuchElementException;
  77: import java.util.StringTokenizer;
  78: 
  79: import javax.xml.XMLConstants;
  80: import javax.xml.namespace.NamespaceContext;
  81: import javax.xml.namespace.QName;
  82: import javax.xml.stream.Location;
  83: import javax.xml.stream.XMLInputFactory;
  84: import javax.xml.stream.XMLReporter;
  85: import javax.xml.stream.XMLResolver;
  86: import javax.xml.stream.XMLStreamConstants;
  87: import javax.xml.stream.XMLStreamException;
  88: import javax.xml.stream.XMLStreamReader;
  89: 
  90: import gnu.java.net.CRLFInputStream;
  91: import gnu.classpath.debug.TeeInputStream;
  92: import gnu.classpath.debug.TeeReader;
  93: 
  94: /**
  95:  * An XML parser.
  96:  * This parser supports the following additional StAX properties:
  97:  * <table>
  98:  * <tr><td>gnu.xml.stream.stringInterning</td>
  99:  * <td>Boolean</td>
 100:  * <td>Indicates whether markup strings will be interned</td></tr>
 101:  * <tr><td>gnu.xml.stream.xmlBase</td>
 102:  * <td>Boolean</td>
 103:  * <td>Indicates whether XML Base processing will be performed</td></tr>
 104:  * <tr><td>gnu.xml.stream.baseURI</td>
 105:  * <td>String</td>
 106:  * <td>Returns the base URI of the current event</td></tr>
 107:  * </table>
 108:  *
 109:  * @see http://www.w3.org/TR/REC-xml/
 110:  * @see http://www.w3.org/TR/xml11/
 111:  * @see http://www.w3.org/TR/REC-xml-names
 112:  * @see http://www.w3.org/TR/xml-names11
 113:  * @see http://www.w3.org/TR/xmlbase/
 114:  * 
 115:  * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
 116:  */
 117: public class XMLParser
 118:   implements XMLStreamReader, NamespaceContext
 119: {
 120: 
 121:   // -- parser state machine states --
 122:   private static final int INIT = 0; // start state
 123:   private static final int PROLOG = 1; // in prolog
 124:   private static final int CONTENT = 2; // in content
 125:   private static final int EMPTY_ELEMENT = 3; // empty element state
 126:   private static final int MISC = 4; // in Misc (after root element)
 127: 
 128:   // -- parameters for parsing literals --
 129:   private final static int LIT_ENTITY_REF = 2;
 130:   private final static int LIT_NORMALIZE = 4;
 131:   private final static int LIT_ATTRIBUTE = 8;
 132:   private final static int LIT_DISABLE_PE = 16;
 133:   private final static int LIT_DISABLE_CREF = 32;
 134:   private final static int LIT_DISABLE_EREF = 64;
 135:   private final static int LIT_PUBID = 256;
 136: 
 137:   // -- types of attribute values --
 138:   final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
 139:   final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
 140:   final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
 141:   final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
 142:   final static int ATTRIBUTE_DEFAULT_FIXED = 34;
 143: 
 144:   // -- additional event types --
 145:   final static int START_ENTITY = 50;
 146:   final static int END_ENTITY = 51;
 147: 
 148:   /**
 149:    * The current input.
 150:    */
 151:   private Input input;
 152: 
 153:   /**
 154:    * Stack of inputs representing XML general entities.
 155:    * The input representing the XML input stream or reader is always the
 156:    * first element in this stack.
 157:    */
 158:   private LinkedList inputStack = new LinkedList();
 159: 
 160:   /**
 161:    * Stack of start-entity events to be reported.
 162:    */
 163:   private LinkedList startEntityStack = new LinkedList();
 164: 
 165:   /**
 166:    * Stack of end-entity events to be reported.
 167:    */
 168:   private LinkedList endEntityStack = new LinkedList();
 169:   
 170:   /**
 171:    * Current parser state within the main state machine.
 172:    */
 173:   private int state = INIT;
 174: 
 175:   /**
 176:    * The (type of the) current event.
 177:    */
 178:   private int event;
 179: 
 180:   /**
 181:    * Whether we are looking ahead. Used by hasNext.
 182:    */
 183:   private boolean lookahead;
 184: 
 185:   /**
 186:    * The element name stack. The first element in this stack will be the
 187:    * root element.
 188:    */
 189:   private LinkedList stack = new LinkedList();
 190: 
 191:   /**
 192:    * Stack of namespace contexts. These are maps specifying prefix-to-URI
 193:    * mappings. The first element in this stack is the most recent namespace
 194:    * context (i.e. the other way around from the element name stack).
 195:    */
 196:   private LinkedList namespaces = new LinkedList();
 197: 
 198:   /**
 199:    * The base-URI stack. This holds the base URI context for each element.
 200:    * The first element in this stack is the most recent context (i.e. the
 201:    * other way around from the element name stack).
 202:    */
 203:   private LinkedList bases = new LinkedList();
 204: 
 205:   /**
 206:    * The list of attributes for the current element, in the order defined in
 207:    * the XML stream.
 208:    */
 209:   private ArrayList attrs = new ArrayList();
 210: 
 211:   /**
 212:    * Buffer for text and character data.
 213:    */
 214:   private StringBuffer buf = new StringBuffer();
 215: 
 216:   /**
 217:    * Buffer for NMTOKEN strings (markup).
 218:    */
 219:   private StringBuffer nmtokenBuf = new StringBuffer();
 220: 
 221:   /**
 222:    * Buffer for string literals. (e.g. attribute values)
 223:    */
 224:   private StringBuffer literalBuf = new StringBuffer();
 225: 
 226:   /**
 227:    * Temporary Unicode character buffer used during character data reads.
 228:    */
 229:   private int[] tmpBuf = new int[1024];
 230:   
 231:   /**
 232:    * The element content model for the current element.
 233:    */
 234:   private ContentModel currentContentModel;
 235: 
 236:   /**
 237:    * The validation stack. This holds lists of the elements seen for each
 238:    * element, in order to determine whether the names and order of these
 239:    * elements match the content model for the element. The last entry in
 240:    * this stack represents the current element.
 241:    */
 242:   private LinkedList validationStack;
 243: 
 244:   /**
 245:    * These sets contain the IDs and the IDREFs seen in the document, to
 246:    * ensure that IDs are unique and that each IDREF refers to an ID in the
 247:    * document.
 248:    */
 249:   private HashSet ids, idrefs;
 250: 
 251:   /**
 252:    * The target and data associated with the current processing instruction
 253:    * event.
 254:    */
 255:   private String piTarget, piData;
 256: 
 257:   /**
 258:    * The XML version declared in the XML declaration.
 259:    */
 260:   private String xmlVersion;
 261: 
 262:   /**
 263:    * The encoding declared in the XML declaration.
 264:    */
 265:   private String xmlEncoding;
 266: 
 267:   /**
 268:    * The standalone value declared in the XML declaration.
 269:    */
 270:   private Boolean xmlStandalone;
 271: 
 272:   /**
 273:    * The document type definition.
 274:    */
 275:   Doctype doctype;
 276: 
 277:   /**
 278:    * State variables for determining parameter-entity expansion.
 279:    */
 280:   private boolean expandPE, peIsError;
 281: 
 282:   /**
 283:    * Whether this is a validating parser.
 284:    */
 285:   private final boolean validating;
 286: 
 287:   /**
 288:    * Whether strings representing markup will be interned.
 289:    */
 290:   private final boolean stringInterning;
 291: 
 292:   /**
 293:    * If true, CDATA sections will be merged with adjacent text nodes into a
 294:    * single event.
 295:    */
 296:   private final boolean coalescing;
 297: 
 298:   /**
 299:    * Whether to replace general entity references with their replacement
 300:    * text automatically during parsing.
 301:    * Otherwise entity-reference events will be issued.
 302:    */
 303:   private final boolean replaceERefs;
 304: 
 305:   /**
 306:    * Whether to support external entities.
 307:    */
 308:   private final boolean externalEntities;
 309: 
 310:   /**
 311:    * Whether to support DTDs.
 312:    */
 313:   private final boolean supportDTD;
 314: 
 315:   /**
 316:    * Whether to support XML namespaces. If true, namespace information will
 317:    * be available. Otherwise namespaces will simply be reported as ordinary
 318:    * attributes.
 319:    */
 320:   private final boolean namespaceAware;
 321: 
 322:   /**
 323:    * Whether to support XML Base. If true, URIs specified in xml:base
 324:    * attributes will be honoured when resolving external entities.
 325:    */
 326:   private final boolean baseAware;
 327: 
 328:   /**
 329:    * Whether to report extended event types (START_ENTITY and END_ENTITY)
 330:    * in addition to the standard event types. Used by the SAX parser.
 331:    */
 332:   private final boolean extendedEventTypes;
 333: 
 334:   /**
 335:    * The reporter to receive parsing warnings.
 336:    */
 337:   final XMLReporter reporter;
 338: 
 339:   /**
 340:    * Callback interface for resolving external entities.
 341:    */
 342:   final XMLResolver resolver;
 343: 
 344:   // -- Constants for testing the next kind of markup event --
 345:   private static final String TEST_START_ELEMENT = "<";
 346:   private static final String TEST_END_ELEMENT = "</";
 347:   private static final String TEST_COMMENT = "<!--";
 348:   private static final String TEST_PI = "<?";
 349:   private static final String TEST_CDATA = "<![CDATA[";
 350:   private static final String TEST_XML_DECL = "<?xml";
 351:   private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
 352:   private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
 353:   private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
 354:   private static final String TEST_ENTITY_DECL = "<!ENTITY";
 355:   private static final String TEST_NOTATION_DECL = "<!NOTATION";
 356:   private static final String TEST_KET = ">";
 357:   private static final String TEST_END_COMMENT = "--";
 358:   private static final String TEST_END_PI = "?>";
 359:   private static final String TEST_END_CDATA = "]]>";
 360: 
 361:   /**
 362:    * The general entities predefined by the XML specification.
 363:    */
 364:   private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
 365:   static
 366:   {
 367:     PREDEFINED_ENTITIES.put("amp", "&");
 368:     PREDEFINED_ENTITIES.put("lt", "<");
 369:     PREDEFINED_ENTITIES.put("gt", ">");
 370:     PREDEFINED_ENTITIES.put("apos", "'");
 371:     PREDEFINED_ENTITIES.put("quot", "\"");
 372:   }
 373: 
 374:   /**
 375:    * Creates a new XML parser for the given input stream.
 376:    * This constructor should be used where possible, as it allows the
 377:    * encoding of the XML data to be correctly determined from the stream.
 378:    * @param in the input stream
 379:    * @param systemId the URL from which the input stream was retrieved
 380:    * (necessary if there are external entities to be resolved)
 381:    * @param validating if the parser is to be a validating parser
 382:    * @param namespaceAware if the parser should support XML Namespaces
 383:    * @param coalescing if CDATA sections should be merged into adjacent text
 384:    * nodes
 385:    * @param replaceERefs if entity references should be automatically
 386:    * replaced by their replacement text (otherwise they will be reported as
 387:    * entity-reference events)
 388:    * @param externalEntities if external entities should be loaded
 389:    * @param supportDTD if support for the XML DTD should be enabled
 390:    * @param baseAware if the parser should support XML Base to resolve
 391:    * external entities
 392:    * @param stringInterning whether strings will be interned during parsing
 393:    * @param reporter the reporter to receive warnings during processing
 394:    * @param resolver the callback interface used to resolve external
 395:    * entities
 396:    */
 397:   public XMLParser(InputStream in, String systemId,
 398:                    boolean validating,
 399:                    boolean namespaceAware,
 400:                    boolean coalescing,
 401:                    boolean replaceERefs,
 402:                    boolean externalEntities,
 403:                    boolean supportDTD,
 404:                    boolean baseAware,
 405:                    boolean stringInterning,
 406:                    boolean extendedEventTypes,
 407:                    XMLReporter reporter,
 408:                    XMLResolver resolver)
 409:   {
 410:     this.validating = validating;
 411:     this.namespaceAware = namespaceAware;
 412:     this.coalescing = coalescing;
 413:     this.replaceERefs = replaceERefs;
 414:     this.externalEntities = externalEntities;
 415:     this.supportDTD = supportDTD;
 416:     this.baseAware = baseAware;
 417:     this.stringInterning = stringInterning;
 418:     this.extendedEventTypes = extendedEventTypes;
 419:     this.reporter = reporter;
 420:     this.resolver = resolver;
 421:     if (validating)
 422:       {
 423:         validationStack = new LinkedList();
 424:         ids = new HashSet();
 425:         idrefs = new HashSet();
 426:       }
 427:     String debug = System.getProperty("gnu.xml.debug.input");
 428:     if (debug != null)
 429:       {
 430:         try
 431:           {
 432:             File file = File.createTempFile(debug, ".xml");
 433:             in = new TeeInputStream(in, new FileOutputStream(file));
 434:           }
 435:         catch (IOException e)
 436:           {
 437:             RuntimeException e2 = new RuntimeException();
 438:             e2.initCause(e);
 439:             throw e2;
 440:           }
 441:       }
 442:     systemId = canonicalize(systemId);
 443:     pushInput(new Input(in, null, null, systemId, null, null, false, true));
 444:   }
 445: 
 446:   /**
 447:    * Creates a new XML parser for the given character stream.
 448:    * This constructor is only available for compatibility with the JAXP
 449:    * APIs, which permit XML to be parsed from a character stream. Because
 450:    * the encoding specified by the character stream may conflict with that
 451:    * specified in the XML declaration, this method should be avoided where
 452:    * possible.
 453:    * @param in the input stream
 454:    * @param systemId the URL from which the input stream was retrieved
 455:    * (necessary if there are external entities to be resolved)
 456:    * @param validating if the parser is to be a validating parser
 457:    * @param namespaceAware if the parser should support XML Namespaces
 458:    * @param coalescing if CDATA sections should be merged into adjacent text
 459:    * nodes
 460:    * @param replaceERefs if entity references should be automatically
 461:    * replaced by their replacement text (otherwise they will be reported as
 462:    * entity-reference events)
 463:    * @param externalEntities if external entities should be loaded
 464:    * @param supportDTD if support for the XML DTD should be enabled
 465:    * @param baseAware if the parser should support XML Base to resolve
 466:    * external entities
 467:    * @param stringInterning whether strings will be interned during parsing
 468:    * @param reporter the reporter to receive warnings during processing
 469:    * @param resolver the callback interface used to resolve external
 470:    * entities
 471:    */
 472:   public XMLParser(Reader reader, String systemId,
 473:                    boolean validating,
 474:                    boolean namespaceAware,
 475:                    boolean coalescing,
 476:                    boolean replaceERefs,
 477:                    boolean externalEntities,
 478:                    boolean supportDTD,
 479:                    boolean baseAware,
 480:                    boolean stringInterning,
 481:                    boolean extendedEventTypes,
 482:                    XMLReporter reporter,
 483:                    XMLResolver resolver)
 484:   {
 485:     this.validating = validating;
 486:     this.namespaceAware = namespaceAware;
 487:     this.coalescing = coalescing;
 488:     this.replaceERefs = replaceERefs;
 489:     this.externalEntities = externalEntities;
 490:     this.supportDTD = supportDTD;
 491:     this.baseAware = baseAware;
 492:     this.stringInterning = stringInterning;
 493:     this.extendedEventTypes = extendedEventTypes;
 494:     this.reporter = reporter;
 495:     this.resolver = resolver;
 496:     if (validating)
 497:       {
 498:         validationStack = new LinkedList();
 499:         ids = new HashSet();
 500:         idrefs = new HashSet();
 501:       }
 502:     String debug = System.getProperty("gnu.xml.debug.input");
 503:     if (debug != null)
 504:       {
 505:         try
 506:           {
 507:             File file = File.createTempFile(debug, ".xml");
 508:             reader = new TeeReader(reader, new FileWriter(file));
 509:           }
 510:         catch (IOException e)
 511:           {
 512:             RuntimeException e2 = new RuntimeException();
 513:             e2.initCause(e);
 514:             throw e2;
 515:           }
 516:       }
 517:     systemId = canonicalize(systemId);
 518:     pushInput(new Input(null, reader, null, systemId, null, null, false, true));
 519:   }
 520: 
 521:   // -- NamespaceContext --
 522: 
 523:   public String getNamespaceURI(String prefix)
 524:   {
 525:     if (XMLConstants.XML_NS_PREFIX.equals(prefix))
 526:       return XMLConstants.XML_NS_URI;
 527:     if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
 528:       return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
 529:     for (Iterator i = namespaces.iterator(); i.hasNext(); )
 530:       {
 531:         LinkedHashMap ctx = (LinkedHashMap) i.next();
 532:         String namespaceURI = (String) ctx.get(prefix);
 533:         if (namespaceURI != null)
 534:           return namespaceURI;
 535:       }
 536:     return null;
 537:   }
 538: 
 539:   public String getPrefix(String namespaceURI)
 540:   {
 541:     if (XMLConstants.XML_NS_URI.equals(namespaceURI))
 542:       return XMLConstants.XML_NS_PREFIX;
 543:     if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
 544:       return XMLConstants.XMLNS_ATTRIBUTE;
 545:     for (Iterator i = namespaces.iterator(); i.hasNext(); )
 546:       {
 547:         LinkedHashMap ctx = (LinkedHashMap) i.next();
 548:         if (ctx.containsValue(namespaceURI))
 549:           {
 550:             for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
 551:               {
 552:                 Map.Entry entry = (Map.Entry) i.next();
 553:                 String uri = (String) entry.getValue();
 554:                 if (uri.equals(namespaceURI))
 555:                   return (String) entry.getKey();
 556:               }
 557:           }
 558:       }
 559:     return null;
 560:   }
 561: 
 562:   public Iterator getPrefixes(String namespaceURI)
 563:   {
 564:     if (XMLConstants.XML_NS_URI.equals(namespaceURI))
 565:       return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
 566:     if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
 567:       return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
 568:     LinkedList acc = new LinkedList();
 569:     for (Iterator i = namespaces.iterator(); i.hasNext(); )
 570:       {
 571:         LinkedHashMap ctx = (LinkedHashMap) i.next();
 572:         if (ctx.containsValue(namespaceURI))
 573:           {
 574:             for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
 575:               {
 576:                 Map.Entry entry = (Map.Entry) i.next();
 577:                 String uri = (String) entry.getValue();
 578:                 if (uri.equals(namespaceURI))
 579:                   acc.add(entry.getKey());
 580:               }
 581:           }
 582:       }
 583:     return acc.iterator();
 584:   }
 585: 
 586:   // -- XMLStreamReader --
 587: 
 588:   public void close()
 589:     throws XMLStreamException
 590:   {
 591:     stack = null;
 592:     namespaces = null;
 593:     bases = null;
 594:     buf = null;
 595:     attrs = null;
 596:     doctype = null;
 597: 
 598:     inputStack = null;
 599:     validationStack = null;
 600:     ids = null;
 601:     idrefs = null;
 602:   }
 603: 
 604:   public NamespaceContext getNamespaceContext()
 605:   {
 606:     return this;
 607:   }
 608: 
 609:   public int getAttributeCount()
 610:   {
 611:     return attrs.size();
 612:   }
 613: 
 614:   public String getAttributeLocalName(int index)
 615:   {
 616:     Attribute a = (Attribute) attrs.get(index);
 617:     return a.localName;
 618:   }
 619: 
 620:   public String getAttributeNamespace(int index)
 621:   {
 622:     String prefix = getAttributePrefix(index);
 623:     return getNamespaceURI(prefix);
 624:   }
 625: 
 626:   public String getAttributePrefix(int index)
 627:   {
 628:     Attribute a = (Attribute) attrs.get(index);
 629:     return a.prefix;
 630:   }
 631: 
 632:   public QName getAttributeName(int index)
 633:   {
 634:     Attribute a = (Attribute) attrs.get(index);
 635:     String namespaceURI = getNamespaceURI(a.prefix);
 636:     return new QName(namespaceURI, a.localName, a.prefix);
 637:   }
 638: 
 639:   public String getAttributeType(int index)
 640:   {
 641:     Attribute a = (Attribute) attrs.get(index);
 642:     return a.type;
 643:   }
 644: 
 645:   private String getAttributeType(String elementName, String attName)
 646:   {
 647:     if (doctype != null)
 648:       {
 649:         AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
 650:         if (att != null)
 651:           return att.type;
 652:       }
 653:     return "CDATA";
 654:   }
 655: 
 656:   public String getAttributeValue(int index)
 657:   {
 658:     Attribute a = (Attribute) attrs.get(index);
 659:     return a.value;
 660:   }
 661: 
 662:   public String getAttributeValue(String namespaceURI, String localName)
 663:   {
 664:     for (Iterator i = attrs.iterator(); i.hasNext(); )
 665:       {
 666:         Attribute a = (Attribute) i.next();
 667:         if (a.localName.equals(localName))
 668:           {
 669:             String uri = getNamespaceURI(a.prefix);
 670:             if ((uri == null && namespaceURI == null) ||
 671:                 (uri != null && uri.equals(namespaceURI)))
 672:               return a.value;
 673:           }
 674:       }
 675:     return null;
 676:   }
 677: 
 678:   boolean isAttributeDeclared(int index)
 679:   {
 680:     if (doctype == null)
 681:       return false;
 682:     Attribute a = (Attribute) attrs.get(index);
 683:     String qn = ("".equals(a.prefix)) ? a.localName :
 684:       a.prefix + ":" + a.localName;
 685:     String elementName = buf.toString();
 686:     return doctype.isAttributeDeclared(elementName, qn);
 687:   }
 688:   
 689:   public String getCharacterEncodingScheme()
 690:   {
 691:     return xmlEncoding;
 692:   }
 693: 
 694:   public String getElementText()
 695:     throws XMLStreamException
 696:   {
 697:     if (event != XMLStreamConstants.START_ELEMENT)
 698:       throw new XMLStreamException("current event must be START_ELEMENT");
 699:     StringBuffer elementText = new StringBuffer();
 700:     int depth = stack.size();
 701:     while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
 702:       {
 703:         switch (next())
 704:           {
 705:           case XMLStreamConstants.CHARACTERS:
 706:           case XMLStreamConstants.SPACE:
 707:             elementText.append(buf.toString());
 708:           }
 709:       }
 710:     return elementText.toString();
 711:   }
 712: 
 713:   public String getEncoding()
 714:   {
 715:     return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
 716:   }
 717: 
 718:   public int getEventType()
 719:   {
 720:     return event;
 721:   }
 722: 
 723:   public String getLocalName()
 724:   {
 725:     switch (event)
 726:       {
 727:       case XMLStreamConstants.START_ELEMENT:
 728:       case XMLStreamConstants.END_ELEMENT:
 729:         String qName = buf.toString();
 730:         int ci = qName.indexOf(':');
 731:         return (ci == -1) ? qName : qName.substring(ci + 1);
 732:       default:
 733:         return null;
 734:       }
 735:   }
 736: 
 737:   public Location getLocation()
 738:   {
 739:     return input;
 740:   }
 741: 
 742:   public QName getName()
 743:   {
 744:     switch (event)
 745:       {
 746:       case XMLStreamConstants.START_ELEMENT:
 747:       case XMLStreamConstants.END_ELEMENT:
 748:         String qName = buf.toString();
 749:         int ci = qName.indexOf(':');
 750:         String localName = (ci == -1) ? qName : qName.substring(ci + 1);
 751:         String prefix = (ci == -1) ?
 752:           (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
 753:           qName.substring(0, ci);
 754:         String namespaceURI = getNamespaceURI(prefix);
 755:         return new QName(namespaceURI, localName, prefix);
 756:       default:
 757:         return null;
 758:       }
 759:   }
 760: 
 761:   public int getNamespaceCount()
 762:   {
 763:     if (!namespaceAware || namespaces.isEmpty())
 764:       return 0;
 765:     switch (event)
 766:       {
 767:       case XMLStreamConstants.START_ELEMENT:
 768:       case XMLStreamConstants.END_ELEMENT:
 769:         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
 770:         return ctx.size();
 771:       default:
 772:         return 0;
 773:       }
 774:   }
 775: 
 776:   public String getNamespacePrefix(int index)
 777:   {
 778:     LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
 779:     int count = 0;
 780:     for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
 781:       {
 782:         String prefix = (String) i.next();
 783:         if (count++ == index)
 784:           return prefix;
 785:       }
 786:     return null;
 787:   }
 788: 
 789:   public String getNamespaceURI()
 790:   {
 791:     switch (event)
 792:       {
 793:       case XMLStreamConstants.START_ELEMENT:
 794:       case XMLStreamConstants.END_ELEMENT:
 795:         String qName = buf.toString();
 796:         int ci = qName.indexOf(':');
 797:         if (ci == -1)
 798:           return null;
 799:         String prefix = qName.substring(0, ci);
 800:         return getNamespaceURI(prefix);
 801:       default:
 802:         return null;
 803:       }
 804:   }
 805: 
 806:   public String getNamespaceURI(int index)
 807:   {
 808:     LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
 809:     int count = 0;
 810:     for (Iterator i = ctx.values().iterator(); i.hasNext(); )
 811:       {
 812:         String uri = (String) i.next();
 813:         if (count++ == index)
 814:           return uri;
 815:       }
 816:     return null;
 817:   }
 818: 
 819:   public String getPIData()
 820:   {
 821:     return piData;
 822:   }
 823: 
 824:   public String getPITarget()
 825:   {
 826:     return piTarget;
 827:   }
 828: 
 829:   public String getPrefix()
 830:   {
 831:     switch (event)
 832:       {
 833:       case XMLStreamConstants.START_ELEMENT:
 834:       case XMLStreamConstants.END_ELEMENT:
 835:         String qName = buf.toString();
 836:         int ci = qName.indexOf(':');
 837:         return (ci == -1) ?
 838:           (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
 839:           qName.substring(0, ci);
 840:       default:
 841:         return null;
 842:       }
 843:   }
 844: 
 845:   public Object getProperty(String name)
 846:     throws IllegalArgumentException
 847:   {
 848:     if (name == null)
 849:       throw new IllegalArgumentException("name is null");
 850:     if (XMLInputFactory.ALLOCATOR.equals(name))
 851:       return null;
 852:     if (XMLInputFactory.IS_COALESCING.equals(name))
 853:       return coalescing ? Boolean.TRUE : Boolean.FALSE;
 854:     if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
 855:       return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
 856:     if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
 857:       return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
 858:     if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
 859:       return externalEntities ? Boolean.TRUE : Boolean.FALSE;
 860:     if (XMLInputFactory.IS_VALIDATING.equals(name))
 861:       return Boolean.FALSE;
 862:     if (XMLInputFactory.REPORTER.equals(name))
 863:       return reporter;
 864:     if (XMLInputFactory.RESOLVER.equals(name))
 865:       return resolver;
 866:     if (XMLInputFactory.SUPPORT_DTD.equals(name))
 867:       return supportDTD ? Boolean.TRUE : Boolean.FALSE;
 868:     if ("gnu.xml.stream.stringInterning".equals(name))
 869:       return stringInterning ? Boolean.TRUE : Boolean.FALSE;
 870:     if ("gnu.xml.stream.xmlBase".equals(name))
 871:       return baseAware ? Boolean.TRUE : Boolean.FALSE;
 872:     if ("gnu.xml.stream.baseURI".equals(name))
 873:       return getXMLBase();
 874:     return null;
 875:   }
 876: 
 877:   public String getText()
 878:   {
 879:     return buf.toString();
 880:   }
 881: 
 882:   public char[] getTextCharacters()
 883:   {
 884:     return buf.toString().toCharArray();
 885:   }
 886: 
 887:   public int getTextCharacters(int sourceStart, char[] target,
 888:                                int targetStart, int length)
 889:     throws XMLStreamException
 890:   {
 891:     length = Math.min(sourceStart + buf.length(), length);
 892:     int sourceEnd = sourceStart + length;
 893:     buf.getChars(sourceStart, sourceEnd, target, targetStart);
 894:     return length;
 895:   }
 896: 
 897:   public int getTextLength()
 898:   {
 899:     return buf.length();
 900:   }
 901: 
 902:   public int getTextStart()
 903:   {
 904:     return 0;
 905:   }
 906: 
 907:   public String getVersion()
 908:   {
 909:     return (xmlVersion == null) ? "1.0" : xmlVersion;
 910:   }
 911: 
 912:   public boolean hasName()
 913:   {
 914:     switch (event)
 915:       {
 916:       case XMLStreamConstants.START_ELEMENT:
 917:       case XMLStreamConstants.END_ELEMENT:
 918:         return true;
 919:       default:
 920:         return false;
 921:       }
 922:   }
 923: 
 924:   public boolean hasText()
 925:   {
 926:     switch (event)
 927:       {
 928:       case XMLStreamConstants.CHARACTERS:
 929:       case XMLStreamConstants.SPACE:
 930:         return true;
 931:       default:
 932:         return false;
 933:       }
 934:   }
 935: 
 936:   public boolean isAttributeSpecified(int index)
 937:   {
 938:     Attribute a = (Attribute) attrs.get(index);
 939:     return a.specified;
 940:   }
 941: 
 942:   public boolean isCharacters()
 943:   {
 944:     return (event == XMLStreamConstants.CHARACTERS);
 945:   }
 946: 
 947:   public boolean isEndElement()
 948:   {
 949:     return (event == XMLStreamConstants.END_ELEMENT);
 950:   }
 951: 
 952:   public boolean isStandalone()
 953:   {
 954:     return Boolean.TRUE.equals(xmlStandalone);
 955:   }
 956: 
 957:   public boolean isStartElement()
 958:   {
 959:     return (event == XMLStreamConstants.START_ELEMENT);
 960:   }
 961: 
 962:   public boolean isWhiteSpace()
 963:   {
 964:     return (event == XMLStreamConstants.SPACE);
 965:   }
 966: 
 967:   public int nextTag()
 968:     throws XMLStreamException
 969:   {
 970:     do
 971:       {
 972:         switch (next())
 973:           {
 974:           case XMLStreamConstants.START_ELEMENT:
 975:           case XMLStreamConstants.END_ELEMENT:
 976:           case XMLStreamConstants.CHARACTERS:
 977:           case XMLStreamConstants.SPACE:
 978:           case XMLStreamConstants.COMMENT:
 979:           case XMLStreamConstants.PROCESSING_INSTRUCTION:
 980:             break;
 981:           default:
 982:             throw new XMLStreamException("Unexpected event type: " + event);
 983:           }
 984:       }
 985:     while (event != XMLStreamConstants.START_ELEMENT &&
 986:            event != XMLStreamConstants.END_ELEMENT);
 987:     return event;
 988:   }
 989: 
 990:   public void require(int type, String namespaceURI, String localName)
 991:     throws XMLStreamException
 992:   {
 993:     if (event != type)
 994:       throw new XMLStreamException("Current event type is " + event);
 995:     if (event == XMLStreamConstants.START_ELEMENT ||
 996:         event == XMLStreamConstants.END_ELEMENT)
 997:       {
 998:         String ln = getLocalName();
 999:         if (!ln.equals(localName))
1000:           throw new XMLStreamException("Current local-name is " + ln);
1001:         String uri = getNamespaceURI();
1002:         if ((uri == null && namespaceURI != null) ||
1003:             (uri != null && !uri.equals(namespaceURI)))
1004:           throw new XMLStreamException("Current namespace URI is " + uri);
1005:       }
1006:   }
1007: 
1008:   public boolean standaloneSet()
1009:   {
1010:     return (xmlStandalone != null);
1011:   }
1012: 
1013:   public boolean hasNext()
1014:     throws XMLStreamException
1015:   {
1016:     if (event == XMLStreamConstants.END_DOCUMENT)
1017:       return false;
1018:     if (!lookahead)
1019:       {
1020:         next();
1021:         lookahead = true;
1022:       }
1023:     return event != -1;
1024:   }
1025:   
1026:   public int next()
1027:     throws XMLStreamException
1028:   {
1029:     if (lookahead)
1030:       {
1031:         lookahead = false;
1032:         return event;
1033:       }
1034:     if (event == XMLStreamConstants.END_ELEMENT)
1035:       {
1036:         // Pop namespace context
1037:         if (namespaceAware && !namespaces.isEmpty())
1038:           namespaces.removeFirst();
1039:         // Pop base context
1040:         if (baseAware && !bases.isEmpty())
1041:           bases.removeFirst();
1042:       }
1043:     if (!startEntityStack.isEmpty())
1044:       {
1045:         String entityName = (String) startEntityStack.removeFirst();
1046:         buf.setLength(0);
1047:         buf.append(entityName);
1048:         event = START_ENTITY;
1049:         return extendedEventTypes ? event : next();
1050:       }
1051:     else if (!endEntityStack.isEmpty())
1052:       {
1053:         String entityName = (String) endEntityStack.removeFirst();
1054:         buf.setLength(0);
1055:         buf.append(entityName);
1056:         event = END_ENTITY;
1057:         return extendedEventTypes ? event : next();
1058:       }
1059:     try
1060:       {
1061:         if (!input.initialized)
1062:           input.init();
1063:         switch (state)
1064:           {
1065:           case CONTENT:
1066:             if (tryRead(TEST_END_ELEMENT))
1067:               {
1068:                 readEndElement();
1069:                 if (stack.isEmpty())
1070:                   state = MISC;
1071:                 event = XMLStreamConstants.END_ELEMENT;
1072:               }
1073:             else if (tryRead(TEST_COMMENT))
1074:               {
1075:                 readComment(false);
1076:                 event = XMLStreamConstants.COMMENT;
1077:               }
1078:             else if (tryRead(TEST_PI))
1079:               {
1080:                 readPI(false);
1081:                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1082:               }
1083:             else if (tryRead(TEST_CDATA))
1084:               {
1085:                 readCDSect();
1086:                 event = XMLStreamConstants.CDATA;
1087:               }
1088:             else if (tryRead(TEST_START_ELEMENT))
1089:               {
1090:                 state = readStartElement();
1091:                 event = XMLStreamConstants.START_ELEMENT;
1092:               }
1093:             else
1094:               {
1095:                 // Check for character reference or predefined entity
1096:                 mark(8);
1097:                 int c = readCh();
1098:                 if (c == 0x26) // '&'
1099:                   {
1100:                     c = readCh();
1101:                     if (c == 0x23) // '#'
1102:                       {
1103:                         reset();
1104:                         event = readCharData(null);
1105:                       }
1106:                     else
1107:                       {
1108:                         // entity reference
1109:                         reset();
1110:                         readCh(); // &
1111:                         readReference();
1112:                         String ref = buf.toString();
1113:                         String text = (String) PREDEFINED_ENTITIES.get(ref);
1114:                         if (text != null)
1115:                           {
1116:                             event = readCharData(text);
1117:                           }
1118:                         else if (replaceERefs && !isUnparsedEntity(ref))
1119:                           {
1120:                             // this will report a start-entity event
1121:                             boolean external = false;
1122:                             if (doctype != null)
1123:                               {
1124:                                 Object entity = doctype.getEntity(ref);
1125:                                 if (entity instanceof ExternalIds)
1126:                                   external = true;
1127:                               }
1128:                             expandEntity(ref, false, external);
1129:                             event = next();
1130:                           }
1131:                         else
1132:                           {
1133:                             event = XMLStreamConstants.ENTITY_REFERENCE;
1134:                           }
1135:                       }
1136:                   }
1137:                 else
1138:                   {
1139:                     reset();
1140:                     event = readCharData(null);
1141:                     if (validating && doctype != null)
1142:                       validatePCData(buf.toString());
1143:                   }
1144:               }
1145:             break;
1146:           case EMPTY_ELEMENT:
1147:             String elementName = (String) stack.removeLast();
1148:             buf.setLength(0);
1149:             buf.append(elementName);
1150:             state = stack.isEmpty() ? MISC : CONTENT;
1151:             event = XMLStreamConstants.END_ELEMENT;
1152:             if (validating && doctype != null)
1153:               endElementValidationHook();
1154:             break;
1155:           case INIT: // XMLDecl?
1156:             if (tryRead(TEST_XML_DECL))
1157:               readXMLDecl();
1158:             input.finalizeEncoding();
1159:             event = XMLStreamConstants.START_DOCUMENT;
1160:             state = PROLOG;
1161:             break;
1162:           case PROLOG: // Misc* (doctypedecl Misc*)?
1163:             skipWhitespace();
1164:             if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1165:               {
1166:                 readDoctypeDecl();
1167:                 event = XMLStreamConstants.DTD;
1168:               }
1169:             else if (tryRead(TEST_COMMENT))
1170:               {
1171:                 readComment(false);
1172:                 event = XMLStreamConstants.COMMENT;
1173:               }
1174:             else if (tryRead(TEST_PI))
1175:               {
1176:                 readPI(false);
1177:                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1178:               }
1179:             else if (tryRead(TEST_START_ELEMENT))
1180:               {
1181:                 state = readStartElement();
1182:                 event = XMLStreamConstants.START_ELEMENT;
1183:               }
1184:             else
1185:               {
1186:                 int c = readCh();
1187:                 error("no root element: U+" + Integer.toHexString(c));
1188:               }
1189:             break;
1190:           case MISC: // Comment | PI | S
1191:             skipWhitespace();
1192:             if (tryRead(TEST_COMMENT))
1193:               {
1194:                 readComment(false);
1195:                 event = XMLStreamConstants.COMMENT;
1196:               }
1197:             else if (tryRead(TEST_PI))
1198:               {
1199:                 readPI(false);
1200:                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1201:               }
1202:             else
1203:               {
1204:                 if (event == XMLStreamConstants.END_DOCUMENT)
1205:                   throw new NoSuchElementException();
1206:                 int c = readCh();
1207:                 if (c != -1)
1208:                   error("Only comments and PIs may appear after " +
1209:                         "the root element");
1210:                 event = XMLStreamConstants.END_DOCUMENT;
1211:               }
1212:             break;
1213:           default:
1214:             event = -1;
1215:           }
1216:         return event;
1217:       }
1218:     catch (IOException e)
1219:       {
1220:         XMLStreamException e2 = new XMLStreamException();
1221:         e2.initCause(e);
1222:         throw e2;
1223:       }
1224:   }
1225: 
1226:   // package private
1227: 
1228:   /**
1229:    * Returns the current element name.
1230:    */
1231:   String getCurrentElement()
1232:   {
1233:     return (String) stack.getLast();
1234:   }
1235: 
1236:   // private
1237: 
1238:   private void mark(int limit)
1239:     throws IOException
1240:   {
1241:     input.mark(limit);
1242:   }
1243: 
1244:   private void reset()
1245:     throws IOException
1246:   {
1247:     input.reset();
1248:   }
1249: 
1250:   private int read()
1251:     throws IOException
1252:   {
1253:     return input.read();
1254:   }
1255: 
1256:   private int read(int[] b, int off, int len)
1257:     throws IOException
1258:   {
1259:     return input.read(b, off, len);
1260:   }
1261:   
1262:   /**
1263:    * Parsed character read.
1264:    */
1265:   private int readCh()
1266:     throws IOException, XMLStreamException
1267:   {
1268:     int c = read();
1269:     if (expandPE && c == 0x25) // '%'
1270:       {
1271:         if (peIsError)
1272:           error("PE reference within decl in internal subset.");
1273:         expandPEReference();
1274:         return readCh();
1275:       }
1276:     return c;
1277:   }
1278: 
1279:   /**
1280:    * Reads the next character, ensuring it is the character specified.
1281:    * @param delim the character to match
1282:    * @exception XMLStreamException if the next character is not the
1283:    * specified one
1284:    */
1285:   private void require(char delim)
1286:     throws IOException, XMLStreamException
1287:   {
1288:     mark(1);
1289:     int c = readCh();
1290:     if (delim != c)
1291:       {
1292:         reset();
1293:         error("required character (got U+" + Integer.toHexString(c) + ")",
1294:               new Character(delim));
1295:       }
1296:   }
1297: 
1298:   /**
1299:    * Reads the next few characters, ensuring they match the string specified.
1300:    * @param delim the string to match
1301:    * @exception XMLStreamException if the next characters do not match the
1302:    * specified string
1303:    */
1304:   private void require(String delim)
1305:     throws IOException, XMLStreamException
1306:   {
1307:     char[] chars = delim.toCharArray();
1308:     int len = chars.length;
1309:     mark(len);
1310:     int off = 0;
1311:     do
1312:       {
1313:         int l2 = read(tmpBuf, off, len - off);
1314:         if (l2 == -1)
1315:           {
1316:             reset();
1317:             error("EOF before required string", delim);
1318:           }
1319:         off += l2;
1320:       }
1321:     while (off < len);
1322:     for (int i = 0; i < chars.length; i++)
1323:       {
1324:         if (chars[i] != tmpBuf[i])
1325:           {
1326:             reset();
1327:             error("required string", delim);
1328:           }
1329:       }
1330:   }
1331: 
1332:   /**
1333:    * Try to read a single character. On failure, reset the stream.
1334:    * @param delim the character to test
1335:    * @return true if the character matched delim, false otherwise.
1336:    */
1337:   private boolean tryRead(char delim)
1338:     throws IOException, XMLStreamException
1339:   {
1340:     mark(1);
1341:     int c = readCh();
1342:     if (delim != c)
1343:       {
1344:         reset();
1345:         return false;
1346:       }
1347:     return true;
1348:   }
1349: 
1350:   /**
1351:    * Tries to read the specified characters.
1352:    * If successful, the stream is positioned after the last character,
1353:    * otherwise it is reset.
1354:    * @param test the string to test
1355:    * @return true if the characters matched the test string, false otherwise.
1356:    */
1357:   private boolean tryRead(String test)
1358:     throws IOException
1359:   {
1360:     char[] chars = test.toCharArray();
1361:     int len = chars.length;
1362:     mark(len);
1363:     int count = 0;
1364:     int l2 = read(tmpBuf, 0, len);
1365:     if (l2 == -1)
1366:       {
1367:         reset();
1368:         return false;
1369:       }
1370:     count += l2;
1371:     // check the characters we received first before doing additional reads
1372:     for (int i = 0; i < count; i++)
1373:       {
1374:         if (chars[i] != tmpBuf[i])
1375:           {
1376:             reset();
1377:             return false;
1378:           }
1379:       }
1380:     while (count < len)
1381:       {
1382:         // force read
1383:         int c = read();
1384:         if (c == -1)
1385:           {
1386:             reset();
1387:             return false;
1388:           }
1389:         tmpBuf[count] = (char) c;
1390:         // check each character as it is read
1391:         if (chars[count] != tmpBuf[count])
1392:           {
1393:             reset();
1394:             return false;
1395:           }
1396:         count++;
1397:       }
1398:     return true;
1399:   }
1400: 
1401:   /**
1402:    * Reads characters until the specified test string is encountered.
1403:    * @param delim the string delimiting the end of the characters
1404:    */
1405:   private void readUntil(String delim)
1406:     throws IOException, XMLStreamException
1407:   {
1408:     int startLine = input.line;
1409:     try
1410:       {
1411:         while (!tryRead(delim))
1412:           {
1413:             int c = readCh();
1414:             if (c == -1)
1415:               throw new EOFException();
1416:             else if (input.xml11)
1417:               {
1418:                 if (!isXML11Char(c) || isXML11RestrictedChar(c))
1419:                   error("illegal XML 1.1 character",
1420:                         "U+" + Integer.toHexString(c));
1421:               }
1422:             else if (!isChar(c))
1423:               error("illegal XML character", 
1424:                     "U+" + Integer.toHexString(c));
1425:             buf.append(Character.toChars(c));
1426:           }
1427:       }
1428:     catch (EOFException e)
1429:       {
1430:         error("end of input while looking for delimiter "+
1431:               "(started on line " + startLine + ')', delim);
1432:       }
1433:   }
1434: 
1435:   /**
1436:    * Reads any whitespace characters.
1437:    * @return true if whitespace characters were read, false otherwise
1438:    */
1439:   private boolean tryWhitespace()
1440:     throws IOException, XMLStreamException
1441:   {
1442:     boolean white;
1443:     boolean ret = false;
1444:     do
1445:       {
1446:         mark(1);
1447:         int c = readCh();
1448:         while (c == -1 && inputStack.size() > 1)
1449:           {
1450:             popInput();
1451:             c = readCh();
1452:           }
1453:         white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1454:         if (white)
1455:           ret = true;
1456:       }
1457:     while (white);
1458:     reset();
1459:     return ret;
1460:   }
1461: 
1462:   /**
1463:    * Skip over any whitespace characters.
1464:    */
1465:   private void skipWhitespace()
1466:     throws IOException, XMLStreamException
1467:   {
1468:     boolean white;
1469:     do
1470:       {
1471:         mark(1);
1472:         int c = readCh();
1473:         while (c == -1 && inputStack.size() > 1)
1474:           {
1475:             popInput();
1476:             c = readCh();
1477:           }
1478:         white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1479:       }
1480:     while (white);
1481:     reset();
1482:   }
1483: 
1484:   /**
1485:    * Try to read as many whitespace characters as are available.
1486:    * @exception XMLStreamException if no whitespace characters were seen
1487:    */
1488:   private void requireWhitespace()
1489:     throws IOException, XMLStreamException
1490:   {
1491:     if (!tryWhitespace())
1492:       error("whitespace required");
1493:   }
1494: 
1495:   /**
1496:    * Returns the current base URI for resolving external entities.
1497:    */
1498:   String getXMLBase()
1499:   {
1500:     if (baseAware)
1501:       {
1502:         for (Iterator i = bases.iterator(); i.hasNext(); )
1503:           {
1504:             String base = (String) i.next();
1505:             if (base != null)
1506:               return base;
1507:           }
1508:       }
1509:     return input.systemId;
1510:   }
1511: 
1512:   /**
1513:    * Push the specified text input source.
1514:    */
1515:   private void pushInput(String name, String text, boolean report,
1516:                          boolean normalize)
1517:     throws IOException, XMLStreamException
1518:   {
1519:     // Check for recursion
1520:     if (name != null && !"".equals(name))
1521:       {
1522:         for (Iterator i = inputStack.iterator(); i.hasNext(); )
1523:           {
1524:             Input ctx = (Input) i.next();
1525:             if (name.equals(ctx.name))
1526:               error("entities may not be self-recursive", name);
1527:           }
1528:       }
1529:     else
1530:       report = false;
1531:     pushInput(new Input(null, new StringReader(text), input.publicId,
1532:                         input.systemId, name, input.inputEncoding, report,
1533:                         normalize));
1534:   }
1535: 
1536:   /**
1537:    * Push the specified external input source.
1538:    */
1539:   private void pushInput(String name, ExternalIds ids, boolean report,
1540:                          boolean normalize)
1541:     throws IOException, XMLStreamException
1542:   {
1543:     if (!externalEntities)
1544:       return;
1545:     String url = canonicalize(absolutize(input.systemId, ids.systemId));
1546:     // Check for recursion
1547:     for (Iterator i = inputStack.iterator(); i.hasNext(); )
1548:       {
1549:         Input ctx = (Input) i.next();
1550:         if (url.equals(ctx.systemId))
1551:           error("entities may not be self-recursive", url);
1552:         if (name != null && !"".equals(name) && name.equals(ctx.name))
1553:           error("entities may not be self-recursive", name);
1554:       }
1555:     if (name == null || "".equals(name))
1556:       report = false;
1557:     InputStream in = null;
1558:     if (resolver != null)
1559:       {
1560:         Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
1561:                                             null);
1562:         if (obj instanceof InputStream)
1563:           in = (InputStream) obj;
1564:       }
1565:     if (in == null)
1566:       in = resolve(url);
1567:     if (in == null)
1568:       error("unable to resolve external entity",
1569:             (ids.systemId != null) ? ids.systemId : ids.publicId);
1570:     pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1571:                         normalize));
1572:     input.init();
1573:     if (tryRead(TEST_XML_DECL))
1574:       readTextDecl();
1575:     input.finalizeEncoding();
1576:   }
1577: 
1578:   /**
1579:    * Push the specified input source (general entity) onto the input stack.
1580:    */
1581:   private void pushInput(Input input)
1582:   {
1583:     if (input.report)
1584:       startEntityStack.addFirst(input.name);
1585:     inputStack.addLast(input);
1586:     if (this.input != null)
1587:       input.xml11 = this.input.xml11;
1588:     this.input = input;
1589:   }
1590: 
1591:   /**
1592:    * Returns a canonicalized version of the specified URL.
1593:    * This is largely to work around a problem with the specification of
1594:    * file URLs.
1595:    */
1596:   static String canonicalize(String url)
1597:   {
1598:     if (url == null)
1599:       return null;
1600:     if (url.startsWith("file:") && !url.startsWith("file://"))
1601:       url = "file://" + url.substring(5);
1602:     return url;
1603:   }
1604: 
1605:   /**
1606:    * "Absolutize" a URL. This resolves a relative URL into an absolute one.
1607:    * @param base the current base URL
1608:    * @param href the (absolute or relative) URL to resolve
1609:    */
1610:   public static String absolutize(String base, String href)
1611:   {
1612:     if (href == null)
1613:       return null;
1614:     int ci = href.indexOf(':');
1615:     if (ci > 1 && isURLScheme(href.substring(0, ci)))
1616:       {
1617:         // href is absolute already
1618:         return href;
1619:       }
1620:     if (base == null)
1621:       base = "";
1622:     else
1623:       {
1624:         int i = base.lastIndexOf('/');
1625:         if (i != -1)
1626:           base = base.substring(0, i + 1);
1627:         else
1628:           base = "";
1629:       }
1630:     if ("".equals(base))
1631:       {
1632:         // assume file URL relative to current directory
1633:         base = System.getProperty("user.dir");
1634:         if (base.charAt(0) == '/')
1635:           base = base.substring(1);
1636:         base = "file:///" + base.replace(File.separatorChar, '/');
1637:         if (!base.endsWith("/"))
1638:           base += "/";
1639:       }
1640:     // We can't use java.net.URL here to do the parsing, as it searches for
1641:     // a protocol handler. A protocol handler may not be registered for the
1642:     // URL scheme here. Do it manually.
1643:     // 
1644:     // Set aside scheme and host portion of base URL
1645:     String basePrefix = null;
1646:     ci = base.indexOf(':');
1647:     if (ci > 1 && isURLScheme(base.substring(0, ci)))
1648:       {
1649:           if (base.length() > (ci + 3)  &&
1650:               base.charAt(ci + 1) == '/' &&
1651:               base.charAt(ci + 2) == '/')
1652:             {
1653:               int si = base.indexOf('/', ci + 3);
1654:               if (si == -1)
1655:                 base = null;
1656:               else
1657:                 {
1658:                   basePrefix = base.substring(0, si);
1659:                   base = base.substring(si);
1660:                 }
1661:             }
1662:           else
1663:             base = null;
1664:       }
1665:     if (base == null) // unknown or malformed base URL, use href
1666:       return href;
1667:     if (href.startsWith("/")) // absolute href pathname
1668:       return (basePrefix == null) ? href : basePrefix + href;
1669:     // relative href pathname
1670:     if (!base.endsWith("/"))
1671:       {
1672:         int lsi = base.lastIndexOf('/');
1673:         if (lsi == -1)
1674:           base = "/";
1675:         else
1676:           base = base.substring(0, lsi + 1);
1677:       }
1678:     while (href.startsWith("../") || href.startsWith("./"))
1679:       {
1680:         if (href.startsWith("../"))
1681:           {
1682:             // strip last path component from base
1683:             int lsi = base.lastIndexOf('/', base.length() - 2);
1684:             if (lsi > -1)
1685:               base = base.substring(0, lsi + 1);
1686:             href = href.substring(3); // strip ../ prefix
1687:           }
1688:         else
1689:           {
1690:             href = href.substring(2); // strip ./ prefix
1691:           }
1692:       }
1693:     return (basePrefix == null) ? base + href : basePrefix + base + href;
1694:   }
1695: 
1696:   /**
1697:    * Indicates whether the specified characters match the scheme portion of
1698:    * a URL.
1699:    * @see RFC 1738 section 2.1
1700:    */
1701:   private static boolean isURLScheme(String text)
1702:   {
1703:     int len = text.length();
1704:     for (int i = 0; i < len; i++)
1705:       {
1706:         char c = text.charAt(i);
1707:         if (c == '+' || c == '.' || c == '-')
1708:           continue;
1709:         if (c < 65 || (c > 90 && c < 97) || c > 122)
1710:           return false;
1711:       }
1712:     return true;
1713:   }
1714: 
1715:   /**
1716:    * Returns an input stream for the given URL.
1717:    */
1718:   static InputStream resolve(String url)
1719:     throws IOException
1720:   {
1721:     try
1722:       {
1723:         return new URL(url).openStream();
1724:       }
1725:     catch (MalformedURLException e)
1726:       {
1727:         return null;
1728:       }
1729:     catch (IOException e)
1730:       {
1731:         IOException e2 = new IOException("error resolving " + url);
1732:         e2.initCause(e);
1733:         throw e2;
1734:       }
1735:   }
1736: 
1737:   /**
1738:    * Pops the current input source (general entity) off the stack.
1739:    */
1740:   private void popInput()
1741:   {
1742:     Input old = (Input) inputStack.removeLast();
1743:     if (old.report)
1744:       endEntityStack.addFirst(old.name);
1745:     input = (Input) inputStack.getLast();
1746:   }
1747: 
1748:   /**
1749:    * Parse an entity text declaration.
1750:    */
1751:   private void readTextDecl()
1752:     throws IOException, XMLStreamException
1753:   {
1754:     final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1755:     requireWhitespace();
1756:     if (tryRead("version"))
1757:       {
1758:         readEq();
1759:         String v = readLiteral(flags, false);
1760:         if ("1.0".equals(v))
1761:           input.xml11 = false;
1762:         else if ("1.1".equals(v))
1763:           {
1764:             Input i1 = (Input) inputStack.getFirst();
1765:             if (!i1.xml11)
1766:               error("external entity specifies later version number");
1767:             input.xml11 = true;
1768:           }
1769:         else
1770:           throw new XMLStreamException("illegal XML version: " + v);
1771:         requireWhitespace();
1772:       }
1773:     require("encoding");
1774:     readEq();
1775:     String enc = readLiteral(flags, false);
1776:     skipWhitespace();
1777:     require("?>");
1778:     input.setInputEncoding(enc);
1779:   }
1780: 
1781:   /**
1782:    * Parse the XML declaration.
1783:    */
1784:   private void readXMLDecl()
1785:     throws IOException, XMLStreamException
1786:   {
1787:     final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1788:     
1789:     requireWhitespace();
1790:     require("version");
1791:     readEq();
1792:     xmlVersion = readLiteral(flags, false);
1793:     if ("1.0".equals(xmlVersion))
1794:       input.xml11 = false;
1795:     else if ("1.1".equals(xmlVersion))
1796:       input.xml11 = true;
1797:     else
1798:       throw new XMLStreamException("illegal XML version: " + xmlVersion);
1799:     
1800:     boolean white = tryWhitespace();
1801:     
1802:     if (tryRead("encoding"))
1803:       {
1804:         if (!white)
1805:           error("whitespace required before 'encoding='");
1806:         readEq();
1807:         xmlEncoding = readLiteral(flags, false);
1808:         white = tryWhitespace();
1809:       }
1810:     
1811:     if (tryRead("standalone"))
1812:       {
1813:         if (!white)
1814:           error("whitespace required before 'standalone='");
1815:         readEq();
1816:         String standalone = readLiteral(flags, false);
1817:         if ("yes".equals(standalone))
1818:           xmlStandalone = Boolean.TRUE;
1819:         else if ("no".equals(standalone))
1820:           xmlStandalone = Boolean.FALSE;
1821:         else
1822:           error("standalone flag must be 'yes' or 'no'", standalone);
1823:       }
1824: 
1825:     skipWhitespace();
1826:     require("?>");
1827:     if (xmlEncoding != null)
1828:       input.setInputEncoding(xmlEncoding);
1829:   }
1830: 
1831:   /**
1832:    * Parse the DOCTYPE declaration.
1833:    */
1834:   private void readDoctypeDecl()
1835:     throws IOException, XMLStreamException
1836:   {
1837:     if (!supportDTD)
1838:       error("parser was configured not to support DTDs");
1839:     requireWhitespace();
1840:     String rootName = readNmtoken(true);
1841:     skipWhitespace();
1842:     ExternalIds ids = readExternalIds(false, true);
1843:     doctype =
1844:       this.new Doctype(rootName, ids.publicId, ids.systemId);
1845:     
1846:     // Parse internal subset first
1847:     skipWhitespace();
1848:     if (tryRead('['))
1849:       {
1850:         while (true)
1851:           {
1852:             expandPE = true;
1853:             skipWhitespace();
1854:             expandPE = false;
1855:             if (tryRead(']'))
1856:               break;
1857:             else
1858:               readMarkupdecl(false);
1859:           }
1860:       }
1861:     skipWhitespace();
1862:     require('>');
1863: 
1864:     // Parse external subset
1865:     if (ids.systemId != null && externalEntities)
1866:       {
1867:         pushInput("", ">", false, false);
1868:         pushInput("[dtd]", ids, true, true);
1869:         // loop until we get back to ">"
1870:         while (true)
1871:           {
1872:             expandPE = true;
1873:             skipWhitespace();
1874:             expandPE = false;
1875:             mark(1);
1876:             int c = readCh();
1877:             if (c == 0x3e) // '>'
1878:               break;
1879:             else if (c == -1)
1880:               popInput();
1881:             else
1882:               {
1883:                 reset();
1884:                 expandPE = true;
1885:                 readMarkupdecl(true);
1886:                 expandPE = true;
1887:               }
1888:           }
1889:         if (inputStack.size() != 2)
1890:           error("external subset has unmatched '>'");
1891:         popInput();
1892:       }
1893:     checkDoctype();
1894:     if (validating)
1895:       validateDoctype();
1896: 
1897:     // Make rootName available for reading
1898:     buf.setLength(0);
1899:     buf.append(rootName);
1900:   }
1901: 
1902:   /**
1903:    * Checks the well-formedness of the DTD.
1904:    */
1905:   private void checkDoctype()
1906:     throws XMLStreamException
1907:   {
1908:     // TODO check entity recursion
1909:   }
1910: 
1911:   /**
1912:    * Parse the markupdecl production.
1913:    */
1914:   private void readMarkupdecl(boolean inExternalSubset)
1915:     throws IOException, XMLStreamException
1916:   {
1917:     boolean saved = expandPE;
1918:     mark(1);
1919:     require('<');
1920:     reset();
1921:     expandPE = false;
1922:     if (tryRead(TEST_ELEMENT_DECL))
1923:       {
1924:         expandPE = saved;
1925:         readElementDecl();
1926:       }
1927:     else if (tryRead(TEST_ATTLIST_DECL))
1928:       {
1929:         expandPE = saved;
1930:         readAttlistDecl();
1931:       }
1932:     else if (tryRead(TEST_ENTITY_DECL))
1933:       {
1934:         expandPE = saved;
1935:         readEntityDecl(inExternalSubset);
1936:       }
1937:     else if (tryRead(TEST_NOTATION_DECL))
1938:       {
1939:         expandPE = saved;
1940:         readNotationDecl(inExternalSubset);
1941:       }
1942:     else if (tryRead(TEST_PI))
1943:       {
1944:         readPI(true);
1945:         expandPE = saved;
1946:       }
1947:     else if (tryRead(TEST_COMMENT))
1948:       {
1949:         readComment(true);
1950:         expandPE = saved;
1951:       }
1952:     else if (tryRead("<!["))
1953:       {
1954:         // conditional section
1955:         expandPE = saved;
1956:         if (inputStack.size() < 2)
1957:           error("conditional sections illegal in internal subset");
1958:         skipWhitespace();
1959:         if (tryRead("INCLUDE"))
1960:           {
1961:             skipWhitespace();
1962:             require('[');
1963:             skipWhitespace();
1964:             while (!tryRead("]]>"))
1965:               {
1966:                 readMarkupdecl(inExternalSubset);
1967:                 skipWhitespace();
1968:               }
1969:           }
1970:         else if (tryRead("IGNORE"))
1971:           {
1972:             skipWhitespace();
1973:             require('[');
1974:             expandPE = false;
1975:             for (int nesting = 1; nesting > 0; )
1976:               {
1977:                 int c = readCh();
1978:                 switch (c)
1979:                   {
1980:                   case 0x3c: // '<'
1981:                     if (tryRead("!["))
1982:                       nesting++;
1983:                     break;
1984:                   case 0x5d: // ']'
1985:                     if (tryRead("]>"))
1986:                       nesting--;
1987:                     break;
1988:                   case -1:
1989:                     throw new EOFException();
1990:                   }
1991:               }
1992:             expandPE = saved;
1993:           }
1994:         else
1995:           error("conditional section must begin with INCLUDE or IGNORE");
1996:       }
1997:     else
1998:       error("expected markup declaration");
1999:   }
2000: 
2001:   /**
2002:    * Parse the elementdecl production.
2003:    */
2004:   private void readElementDecl()
2005:     throws IOException, XMLStreamException
2006:   {
2007:     requireWhitespace();
2008:     boolean saved = expandPE;
2009:     expandPE = (inputStack.size() > 1);
2010:     String name = readNmtoken(true);
2011:     expandPE = saved;
2012:     requireWhitespace();
2013:     readContentspec(name);
2014:     skipWhitespace();
2015:     require('>');
2016:   }
2017: 
2018:   /**
2019:    * Parse the contentspec production.
2020:    */
2021:   private void readContentspec(String elementName)
2022:     throws IOException, XMLStreamException
2023:   {
2024:     if (tryRead("EMPTY"))
2025:       doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
2026:     else if (tryRead("ANY"))
2027:       doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
2028:     else
2029:       {
2030:         ContentModel model;
2031:         StringBuffer acc = new StringBuffer();
2032:         require('(');
2033:         acc.append('(');
2034:         skipWhitespace();
2035:         if (tryRead("#PCDATA"))
2036:           {
2037:             // mixed content
2038:             acc.append("#PCDATA");
2039:             MixedContentModel mm = new MixedContentModel();
2040:             model = mm;
2041:             skipWhitespace();
2042:             if (tryRead(')'))
2043:               {
2044:                 acc.append(")");
2045:                 if (tryRead('*'))
2046:                   {
2047:                     mm.min = 0;
2048:                     mm.max = -1;
2049:                   }
2050:               }
2051:             else
2052:               {
2053:                 while (!tryRead(")"))
2054:                   {
2055:                     require('|');
2056:                     acc.append('|');
2057:                     skipWhitespace();
2058:                     String name = readNmtoken(true);
2059:                     acc.append(name);
2060:                     mm.addName(name);
2061:                     skipWhitespace();
2062:                   }
2063:                 require('*');
2064:                 acc.append(")*");
2065:                 mm.min = 0;
2066:                 mm.max = -1;
2067:               }
2068:           }
2069:         else
2070:           model = readElements(acc);
2071:         doctype.addElementDecl(elementName, acc.toString(), model);
2072:       }
2073:   }
2074: 
2075:   /**
2076:    * Parses an element content model.
2077:    */
2078:   private ElementContentModel readElements(StringBuffer acc)
2079:     throws IOException, XMLStreamException
2080:   {
2081:     int separator;
2082:     ElementContentModel model = new ElementContentModel();
2083:     
2084:     // Parse first content particle
2085:     skipWhitespace();
2086:     model.addContentParticle(readContentParticle(acc));
2087:     // End or separator
2088:     skipWhitespace();
2089:     int c = readCh();
2090:     switch (c)
2091:       {
2092:       case 0x29: // ')'
2093:         acc.append(')');
2094:         mark(1);
2095:         c = readCh();
2096:         switch (c)
2097:           {
2098:           case 0x3f: // '?'
2099:             acc.append('?');
2100:             model.min = 0;
2101:             model.max = 1;
2102:             break;
2103:           case 0x2a: // '*'
2104:             acc.append('*');
2105:             model.min = 0;
2106:             model.max = -1;
2107:             break;
2108:           case 0x2b: // '+'
2109:             acc.append('+');
2110:             model.min = 1;
2111:             model.max = -1;
2112:             break;
2113:           default:
2114:             reset();
2115:           }
2116:         return model; // done
2117:       case 0x7c: // '|'
2118:         model.or = true;
2119:         // fall through
2120:       case 0x2c: // ','
2121:         separator = c;
2122:         acc.append(Character.toChars(c));
2123:         break;
2124:       default:
2125:         error("bad separator in content model",
2126:               "U+" + Integer.toHexString(c));
2127:         return model;
2128:       }
2129:     // Parse subsequent content particles
2130:     while (true)
2131:       {
2132:         skipWhitespace();
2133:         model.addContentParticle(readContentParticle(acc));
2134:         skipWhitespace();
2135:         c = readCh();
2136:         if (c == 0x29) // ')'
2137:           {
2138:             acc.append(')');
2139:             break;
2140:           }
2141:         else if (c != separator)
2142:           {
2143:             error("bad separator in content model",
2144:                   "U+" + Integer.toHexString(c));
2145:             return model;
2146:           }
2147:         else
2148:           acc.append(c);
2149:       }
2150:     // Check for occurrence indicator
2151:     mark(1);
2152:     c = readCh();
2153:     switch (c)
2154:       {
2155:       case 0x3f: // '?'
2156:         acc.append('?');
2157:         model.min = 0;
2158:         model.max = 1;
2159:         break;
2160:       case 0x2a: // '*'
2161:         acc.append('*');
2162:         model.min = 0;
2163:         model.max = -1;
2164:         break;
2165:       case 0x2b: // '+'
2166:         acc.append('+');
2167:         model.min = 1;
2168:         model.max = -1;
2169:         break;
2170:       default:
2171:         reset();
2172:       }
2173:     return model;
2174:   }
2175: 
2176:   /**
2177:    * Parse a cp production.
2178:    */
2179:   private ContentParticle readContentParticle(StringBuffer acc)
2180:     throws IOException, XMLStreamException
2181:   {
2182:     ContentParticle cp = new ContentParticle();
2183:     if (tryRead('('))
2184:       {
2185:         acc.append('(');
2186:         cp.content = readElements(acc);
2187:       }
2188:     else
2189:       {
2190:         String name = readNmtoken(true);
2191:         acc.append(name);
2192:         cp.content = name;
2193:         mark(1);
2194:         int c = readCh();
2195:         switch (c)
2196:           {
2197:           case 0x3f: // '?'
2198:             acc.append('?');
2199:             cp.min = 0;
2200:             cp.max = 1;
2201:             break;
2202:           case 0x2a: // '*'
2203:             acc.append('*');
2204:             cp.min = 0;
2205:             cp.max = -1;
2206:             break;
2207:           case 0x2b: // '+'
2208:             acc.append('+');
2209:             cp.min = 1;
2210:             cp.max = -1;
2211:             break;
2212:           default:
2213:             reset();
2214:           }
2215:       }
2216:     return cp;
2217:   }
2218: 
2219:   /**
2220:    * Parse an attribute-list definition.
2221:    */
2222:   private void readAttlistDecl()
2223:     throws IOException, XMLStreamException
2224:   {
2225:     requireWhitespace();
2226:     boolean saved = expandPE;
2227:     expandPE = (inputStack.size() > 1);
2228:     String elementName = readNmtoken(true);
2229:     expandPE = saved;
2230:     boolean white = tryWhitespace();
2231:     while (!tryRead('>'))
2232:       {
2233:         if (!white)
2234:           error("whitespace required before attribute definition");
2235:         readAttDef(elementName);
2236:         white = tryWhitespace();
2237:       }
2238:   }
2239: 
2240:   /**
2241:    * Parse a single attribute definition.
2242:    */
2243:   private void readAttDef(String elementName)
2244:     throws IOException, XMLStreamException
2245:   {
2246:     String name = readNmtoken(true);
2247:     requireWhitespace();
2248:     StringBuffer acc = new StringBuffer();
2249:     HashSet values = new HashSet();
2250:     String type = readAttType(acc, values);
2251:     if (validating)
2252:       {
2253:         if ("ID".equals(type))
2254:           {
2255:             // VC: One ID per Element Type
2256:             for (Iterator i = doctype.attlistIterator(elementName);
2257:                  i.hasNext(); )
2258:               {
2259:                 Map.Entry entry = (Map.Entry) i.next();
2260:                 AttributeDecl decl = (AttributeDecl) entry.getValue();
2261:                 if ("ID".equals(decl.type))
2262:                   error("element types must not have more than one ID " +
2263:                         "attribute");
2264:               }
2265:           }
2266:         else if ("NOTATION".equals(type))
2267:           {
2268:             // VC: One Notation Per Element Type
2269:             for (Iterator i = doctype.attlistIterator(elementName);
2270:                  i.hasNext(); )
2271:               {
2272:                 Map.Entry entry = (Map.Entry) i.next();
2273:                 AttributeDecl decl = (AttributeDecl) entry.getValue();
2274:                 if ("NOTATION".equals(decl.type))
2275:                   error("element types must not have more than one NOTATION " +
2276:                         "attribute");
2277:               }
2278:             // VC: No Notation on Empty Element
2279:             ContentModel model = doctype.getElementModel(elementName);
2280:             if (model != null && model.type == ContentModel.EMPTY)
2281:               error("attributes of type NOTATION must not be declared on an " +
2282:                     "element declared EMPTY");
2283:           }
2284:       }
2285:     String enumer = null;
2286:     if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2287:       enumer = acc.toString();
2288:     else
2289:       values = null;
2290:     requireWhitespace();
2291:     readDefault(elementName, name, type, enumer, values);
2292:   }
2293: 
2294:   /**
2295:    * Parse an attribute type.
2296:    */
2297:   private String readAttType(StringBuffer acc, HashSet values)
2298:     throws IOException, XMLStreamException
2299:   {
2300:     if (tryRead('('))
2301:       {
2302:         readEnumeration(false, acc, values);
2303:         return "ENUMERATION";
2304:       }
2305:     else
2306:       {
2307:         String typeString = readNmtoken(true);
2308:         if ("NOTATION".equals(typeString))
2309:           {
2310:             readNotationType(acc, values);
2311:             return typeString;
2312:           }
2313:         else if ("CDATA".equals(typeString) ||
2314:                  "ID".equals(typeString) ||
2315:                  "IDREF".equals(typeString) ||
2316:                  "IDREFS".equals(typeString) ||
2317:                  "ENTITY".equals(typeString) ||
2318:                  "ENTITIES".equals(typeString) ||
2319:                  "NMTOKEN".equals(typeString) ||
2320:                  "NMTOKENS".equals(typeString))
2321:           return typeString;
2322:         else
2323:           {
2324:             error("illegal attribute type", typeString);
2325:             return null;
2326:           }
2327:       }
2328:   }
2329: 
2330:   /**
2331:    * Parse an enumeration.
2332:    */
2333:   private void readEnumeration(boolean isNames, StringBuffer acc,
2334:                                HashSet values)
2335:     throws IOException, XMLStreamException
2336:   {
2337:     acc.append('(');
2338:     // first token
2339:     skipWhitespace();
2340:     String token = readNmtoken(isNames);
2341:     acc.append(token);
2342:     values.add(token);
2343:     // subsequent tokens
2344:     skipWhitespace();
2345:     while (!tryRead(')'))
2346:       {
2347:         require('|');
2348:         acc.append('|');
2349:         skipWhitespace();
2350:         token = readNmtoken(isNames);
2351:         // VC: No Duplicate Tokens
2352:         if (validating && values.contains(token))
2353:           error("duplicate token", token);
2354:         acc.append(token);
2355:         values.add(token);
2356:         skipWhitespace();
2357:       }
2358:     acc.append(')');
2359:   }
2360: 
2361:   /**
2362:    * Parse a notation type for an attribute.
2363:    */
2364:   private void readNotationType(StringBuffer acc, HashSet values)
2365:     throws IOException, XMLStreamException
2366:   {
2367:     requireWhitespace();
2368:     require('(');
2369:     readEnumeration(true, acc, values);
2370:   }
2371: 
2372:   /**
2373:    * Parse the default value for an attribute.
2374:    */
2375:   private void readDefault(String elementName, String name,
2376:                            String type, String enumeration, HashSet values)
2377:     throws IOException, XMLStreamException
2378:   {
2379:     int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2380:     int flags = LIT_ATTRIBUTE;
2381:     String value = null, defaultType = null;
2382:     boolean saved = expandPE;
2383:     
2384:     if (!"CDATA".equals(type))
2385:       flags |= LIT_NORMALIZE;
2386: 
2387:     expandPE = false;
2388:     if (tryRead('#'))
2389:       {
2390:         if (tryRead("FIXED"))
2391:           {
2392:             defaultType = "#FIXED";
2393:             valueType = ATTRIBUTE_DEFAULT_FIXED;
2394:             requireWhitespace();
2395:             value = readLiteral(flags, false);
2396:           }
2397:         else if (tryRead("REQUIRED"))
2398:           {
2399:             defaultType = "#REQUIRED";
2400:             valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2401:           }
2402:         else if (tryRead("IMPLIED"))
2403:           {
2404:             defaultType = "#IMPLIED";
2405:             valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2406:           }
2407:         else
2408:           error("illegal keyword for attribute default value");
2409:       }
2410:     else
2411:       value = readLiteral(flags, false);
2412:     expandPE = saved;
2413:     if (validating)
2414:       {
2415:         if ("ID".equals(type))
2416:           {
2417:             // VC: Attribute Default Value Syntactically Correct
2418:             if (value != null && !isNmtoken(value, true))
2419:               error("default value must match Name production", value);
2420:             // VC: ID Attribute Default
2421:             if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2422:                 valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2423:               error("ID attributes must have a declared default of " +
2424:                     "#IMPLIED or #REQUIRED");
2425:           }
2426:         else if (value != null)
2427:           {
2428:             // VC: Attribute Default Value Syntactically Correct
2429:             if ("IDREF".equals(type) || "ENTITY".equals(type))
2430:               {
2431:                 if (!isNmtoken(value, true))
2432:                   error("default value must match Name production", value);
2433:               }
2434:             else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2435:               {
2436:                 StringTokenizer st = new StringTokenizer(value);
2437:                 while (st.hasMoreTokens())
2438:                   {
2439:                     String token = st.nextToken();
2440:                     if (!isNmtoken(token, true))
2441:                       error("default value must match Name production", token);
2442:                   }
2443:               }
2444:             else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2445:               {
2446:                 if (!isNmtoken(value, false))
2447:                   error("default value must match Nmtoken production", value);
2448:               }
2449:             else if ("NMTOKENS".equals(type))
2450:               {
2451:                 StringTokenizer st = new StringTokenizer(value);
2452:                 while (st.hasMoreTokens())
2453:                   {
2454:                     String token = st.nextToken();
2455:                     if (!isNmtoken(token, false))
2456:                       error("default value must match Nmtoken production",
2457:                             token);
2458:                   }
2459:               }
2460:           }
2461:       }
2462:     // Register attribute def
2463:     AttributeDecl attribute =
2464:       new AttributeDecl(type, value, valueType, enumeration, values,
2465:                         inputStack.size() != 1);
2466:     doctype.addAttributeDecl(elementName, name, attribute);
2467:   }
2468: 
2469:   /**
2470:    * Parse the EntityDecl production.
2471:    */
2472:   private void readEntityDecl(boolean inExternalSubset)
2473:     throws IOException, XMLStreamException
2474:   {
2475:     int flags = 0;
2476:     // Check if parameter entity
2477:     boolean peFlag = false;
2478:     expandPE = false;
2479:     requireWhitespace();
2480:     if (tryRead('%'))
2481:       {
2482:         peFlag = true;
2483:         requireWhitespace();
2484:       }
2485:     expandPE = true;
2486:     // Read entity name
2487:     String name = readNmtoken(true);
2488:     if (name.indexOf(':') != -1)
2489:       error("illegal character ':' in entity name", name);
2490:     if (peFlag)
2491:       name = "%" + name;
2492:     requireWhitespace();
2493:     mark(1);
2494:     int c = readCh();
2495:     reset();
2496:     if (c == 0x22 || c == 0x27) // " | '
2497:       {
2498:         // Internal entity replacement text
2499:         String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2500:         int ai = value.indexOf('&');
2501:         while (ai != -1)
2502:           {
2503:             int sci = value.indexOf(';', ai);
2504:             if (sci == -1)
2505:               error("malformed reference in entity value", value);
2506:             String ref = value.substring(ai + 1, sci);
2507:             int[] cp = UnicodeReader.toCodePointArray(ref);
2508:             if (cp.length == 0)
2509:               error("malformed reference in entity value", value);
2510:             if (cp[0] == 0x23) // #
2511:               {
2512:                 if (cp.length == 1)
2513:                   error("malformed reference in entity value", value);
2514:                 if (cp[1] == 0x78) // 'x'
2515:                   {
2516:                     if (cp.length == 2)
2517:                       error("malformed reference in entity value", value);
2518:                     for (int i = 2; i < cp.length; i++)
2519:                       {
2520:                         int x = cp[i];
2521:                         if (x < 0x30 ||
2522:                             (x > 0x39 && x < 0x41) ||
2523:                             (x > 0x46 && x < 0x61) ||
2524:                             x > 0x66)
2525:                           error("malformed character reference in entity value",
2526:                                 value);
2527:                       }
2528:                   }
2529:                 else
2530:                   {
2531:                     for (int i = 1; i < cp.length; i++)
2532:                       {
2533:                         int x = cp[i];
2534:                         if (x < 0x30 || x > 0x39)
2535:                           error("malformed character reference in entity value",
2536:                                 value);
2537:                       }
2538:                   }
2539:               }
2540:             else
2541:               {
2542:                 if (!isNameStartCharacter(cp[0], input.xml11))
2543:                   error("malformed reference in entity value", value);
2544:                 for (int i = 1; i < cp.length; i++)
2545:                   {
2546:                     if (!isNameCharacter(cp[i], input.xml11))
2547:                       error("malformed reference in entity value", value);
2548:                   }
2549:               }
2550:             ai = value.indexOf('&', sci);
2551:           }
2552:         doctype.addEntityDecl(name, value, inExternalSubset);
2553:       }
2554:     else
2555:       {
2556:         ExternalIds ids = readExternalIds(false, false);
2557:         // Check for NDATA
2558:         boolean white = tryWhitespace();
2559:         if (!peFlag && tryRead("NDATA"))
2560:           {
2561:             if (!white)
2562:               error("whitespace required before NDATA");
2563:             requireWhitespace();
2564:             ids.notationName = readNmtoken(true);
2565:           }
2566:         doctype.addEntityDecl(name, ids, inExternalSubset);
2567:       }
2568:     // finish
2569:     skipWhitespace();
2570:     require('>');
2571:   }
2572: 
2573:   /**
2574:    * Parse the NotationDecl production.
2575:    */
2576:   private void readNotationDecl(boolean inExternalSubset)
2577:     throws IOException, XMLStreamException
2578:   {
2579:     requireWhitespace();
2580:     String notationName = readNmtoken(true);
2581:     if (notationName.indexOf(':') != -1)
2582:       error("illegal character ':' in notation name", notationName);
2583:     if (validating)
2584:       {
2585:         // VC: Unique Notation Name
2586:         ExternalIds notation = doctype.getNotation(notationName);
2587:         if (notation != null)
2588:           error("duplicate notation name", notationName);
2589:       }
2590:     requireWhitespace();
2591:     ExternalIds ids = readExternalIds(true, false);
2592:     ids.notationName = notationName;
2593:     doctype.addNotationDecl(notationName, ids, inExternalSubset);
2594:     skipWhitespace();
2595:     require('>');
2596:   }
2597: 
2598:   /**
2599:    * Returns a tuple {publicId, systemId}.
2600:    */
2601:   private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2602:     throws IOException, XMLStreamException
2603:   {
2604:     int c;
2605:     int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2606:     ExternalIds ids = new ExternalIds();
2607:     
2608:     if (tryRead("PUBLIC"))
2609:       {
2610:         requireWhitespace();
2611:         ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2612:         if (inNotation)
2613:           {
2614:             skipWhitespace();
2615:             mark(1);
2616:             c = readCh();
2617:             reset();
2618:             if (c == 0x22 || c == 0x27) // " | '
2619:               {
2620:                 String href = readLiteral(flags, false);
2621:                 ids.systemId = absolutize(input.systemId, href);
2622:               }
2623:           }
2624:         else
2625:           {
2626:             requireWhitespace();
2627:             String href = readLiteral(flags, false);
2628:             ids.systemId = absolutize(input.systemId, href);
2629:           }
2630:         // Check valid URI characters
2631:         for (int i = 0; i < ids.publicId.length(); i++)
2632:           {
2633:             char d = ids.publicId.charAt(i);
2634:             if (d >= 'a' && d <= 'z')
2635:               continue;
2636:             if (d >= 'A' && d <= 'Z')
2637:               continue;
2638:             if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2639:               continue;
2640:             error("illegal PUBLIC id character",
2641:                   "U+" + Integer.toHexString(d));
2642:           }
2643:       }
2644:     else if (tryRead("SYSTEM"))
2645:       {
2646:         requireWhitespace();
2647:         String href = readLiteral(flags, false);
2648:         ids.systemId = absolutize(input.systemId, href);
2649:       }
2650:     else if (!isSubset)
2651:       {
2652:         error("missing SYSTEM or PUBLIC keyword");
2653:       }
2654:     if (ids.systemId != null && !inNotation)
2655:       {
2656:         if (ids.systemId.indexOf('#') != -1)
2657:           error("SYSTEM id has a URI fragment", ids.systemId);
2658:       }
2659:     return ids;
2660:   }
2661: 
2662:   /**
2663:    * Parse the start of an element.
2664:    * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
2665:    */
2666:   private int readStartElement()
2667:     throws IOException, XMLStreamException
2668:   {
2669:     // Read element name
2670:     String elementName = readNmtoken(true);
2671:     attrs.clear();
2672:     // Push namespace context
2673:     if (namespaceAware)
2674:       {
2675:         if (elementName.charAt(0) == ':' ||
2676:             elementName.charAt(elementName.length() - 1) == ':')
2677:           error("not a QName", elementName);
2678:         namespaces.addFirst(new LinkedHashMap());
2679:       }
2680:     // Read element content
2681:     boolean white = tryWhitespace();
2682:     mark(1);
2683:     int c = readCh();
2684:     while (c != 0x2f && c != 0x3e) // '/' | '>'
2685:       {
2686:         // Read attribute
2687:         reset();
2688:         if (!white)
2689:           error("need whitespace between attributes");
2690:         readAttribute(elementName);
2691:         white = tryWhitespace();
2692:         mark(1);
2693:         c = readCh();
2694:       }
2695:     // supply defaulted attributes
2696:     if (doctype != null)
2697:       {
2698:         for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2699:           {
2700:             Map.Entry entry = (Map.Entry) i.next();
2701:             String attName = (String) entry.getKey();
2702:             AttributeDecl decl = (AttributeDecl) entry.getValue();
2703:             if (validating)
2704:               {
2705:                 switch (decl.valueType)
2706:                   {
2707:                   case ATTRIBUTE_DEFAULT_REQUIRED:
2708:                     // VC: Required Attribute
2709:                     if (decl.value == null && !attributeSpecified(attName))
2710:                       error("value for " + attName + " attribute is required");
2711:                     break;
2712:                   case ATTRIBUTE_DEFAULT_FIXED:
2713:                     // VC: Fixed Attribute Default
2714:                     for (Iterator j = attrs.iterator(); j.hasNext(); )
2715:                       {
2716:                         Attribute a = (Attribute) j.next();
2717:                         if (attName.equals(a.name) &&
2718:                             !decl.value.equals(a.value))
2719:                           error("value for " + attName + " attribute must be " +
2720:                                 decl.value);
2721:                       }
2722:                     break;
2723:                   }
2724:               }
2725:             if (namespaceAware && attName.equals("xmlns"))
2726:               {
2727:                 LinkedHashMap ctx =
2728:                   (LinkedHashMap) namespaces.getFirst();
2729:                 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2730:                   continue; // namespace was specified
2731:               }
2732:             else if (namespaceAware && attName.startsWith("xmlns:"))
2733:               {
2734:                 LinkedHashMap ctx =
2735:                   (LinkedHashMap) namespaces.getFirst();
2736:                 if (ctx.containsKey(attName.substring(6)))
2737:                   continue; // namespace was specified
2738:               }
2739:             else if (attributeSpecified(attName))
2740:               continue;
2741:             if (decl.value == null)
2742:               continue;
2743:             // VC: Standalone Document Declaration
2744:             if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2745:               error("standalone must be 'no' if attributes inherit values " +
2746:                     "from externally declared markup declarations");
2747:             Attribute attr =
2748:               new Attribute(attName, decl.type, false, decl.value);
2749:             if (namespaceAware)
2750:               {
2751:                 if (!addNamespace(attr))
2752:                   attrs.add(attr);
2753:               }
2754:             else
2755:               attrs.add(attr);
2756:           }
2757:       }
2758:     if (baseAware)
2759:       {
2760:         String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2761:         String base = getXMLBase();
2762:         bases.addFirst(absolutize(base, uri));
2763:       }
2764:     if (namespaceAware)
2765:       {
2766:         // check prefix bindings
2767:         int ci = elementName.indexOf(':');
2768:         if (ci != -1)
2769:           {
2770:             String prefix = elementName.substring(0, ci);
2771:             String uri = getNamespaceURI(prefix);
2772:             if (uri == null)
2773:               error("unbound element prefix", prefix);
2774:             else if (input.xml11 && "".equals(uri))
2775:               error("XML 1.1 unbound element prefix", prefix);
2776:           }
2777:         for (Iterator i = attrs.iterator(); i.hasNext(); )
2778:           {
2779:             Attribute attr = (Attribute) i.next();
2780:             if (attr.prefix != null &&
2781:                 !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2782:               {
2783:                 String uri = getNamespaceURI(attr.prefix);
2784:                 if (uri == null)
2785:                   error("unbound attribute prefix", attr.prefix);
2786:                 else if (input.xml11 && "".equals(uri))
2787:                   error("XML 1.1 unbound attribute prefix", attr.prefix);
2788:               }
2789:           }
2790:       }
2791:     if (validating && doctype != null)
2792:       {
2793:         validateStartElement(elementName);
2794:         currentContentModel = doctype.getElementModel(elementName);
2795:         if (currentContentModel == null)
2796:           error("no element declaration", elementName);
2797:         validationStack.add(new LinkedList());
2798:       }
2799:     // make element name available for read
2800:     buf.setLength(0);
2801:     buf.append(elementName);
2802:     // push element onto stack
2803:     stack.addLast(elementName);
2804:     switch (c)
2805:       {
2806:       case 0x3e: // '>'
2807:         return CONTENT;
2808:       case 0x2f: // '/'
2809:         require('>');
2810:         return EMPTY_ELEMENT;
2811:       }
2812:     return -1; // to satisfy compiler
2813:   }
2814: 
2815:   /**
2816:    * Indicates whether the specified attribute name was specified for the
2817:    * current element.
2818:    */
2819:   private boolean attributeSpecified(String attName)
2820:   {
2821:     for (Iterator j = attrs.iterator(); j.hasNext(); )
2822:       {
2823:         Attribute a = (Attribute) j.next();
2824:         if (attName.equals(a.name))
2825:           return true;
2826:       }
2827:     return false;
2828:   }
2829: 
2830:   /**
2831:    * Parse an attribute.
2832:    */
2833:   private void readAttribute(String elementName)
2834:     throws IOException, XMLStreamException
2835:   {
2836:     // Read attribute name
2837:     String attributeName = readNmtoken(true);
2838:     String type = getAttributeType(elementName, attributeName);
2839:     readEq();
2840:     // Read literal
2841:     final int flags = LIT_ATTRIBUTE |  LIT_ENTITY_REF;
2842:     String value = (type == null || "CDATA".equals(type)) ?
2843:       readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2844:     // add attribute event
2845:     Attribute attr = this.new Attribute(attributeName, type, true, value);
2846:     if (namespaceAware)
2847:       {
2848:         if (attributeName.charAt(0) == ':' ||
2849:             attributeName.charAt(attributeName.length() - 1) == ':')
2850:           error("not a QName", attributeName);
2851:         else if (attributeName.equals("xmlns"))
2852:           {
2853:             LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2854:             if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2855:               error("duplicate default namespace");
2856:           }
2857:         else if (attributeName.startsWith("xmlns:"))
2858:           {
2859:             LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2860:             if (ctx.containsKey(attributeName.substring(6)))
2861:               error("duplicate namespace", attributeName.substring(6));
2862:           }
2863:         else if (attrs.contains(attr))
2864:           error("duplicate attribute", attributeName);
2865:       }
2866:     else if (attrs.contains(attr))
2867:       error("duplicate attribute", attributeName);
2868:     if (validating && doctype != null)
2869:       {
2870:         // VC: Attribute Value Type
2871:         AttributeDecl decl =
2872:           doctype.getAttributeDecl(elementName, attributeName);
2873:         if (decl == null)
2874:           error("attribute must be declared", attributeName);
2875:         if ("ENUMERATION".equals(decl.type))
2876:           {
2877:             // VC: Enumeration
2878:             if (!decl.values.contains(value))
2879:               error("value does not match enumeration " + decl.enumeration,
2880:                     value);
2881:           }
2882:         else if ("ID".equals(decl.type))
2883:           {
2884:             // VC: ID
2885:             if (!isNmtoken(value, true))
2886:               error("ID values must match the Name production");
2887:             if (ids.contains(value))
2888:               error("Duplicate ID", value);
2889:             ids.add(value);
2890:           }
2891:         else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2892:           {
2893:             StringTokenizer st = new StringTokenizer(value);
2894:             while (st.hasMoreTokens())
2895:               {
2896:                 String token = st.nextToken();
2897:                 // VC: IDREF
2898:                 if (!isNmtoken(token, true))
2899:                   error("IDREF values must match the Name production");
2900:                 idrefs.add(token);
2901:               }
2902:           }
2903:         else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2904:           {
2905:             StringTokenizer st = new StringTokenizer(value);
2906:             while (st.hasMoreTokens())
2907:               {
2908:                 String token = st.nextToken();
2909:                 // VC: Name Token
2910:                 if (!isNmtoken(token, false))
2911:                   error("NMTOKEN values must match the Nmtoken production");
2912:               }
2913:           }
2914:         else if ("ENTITY".equals(decl.type))
2915:           {
2916:             // VC: Entity Name
2917:             if (!isNmtoken(value, true))
2918:               error("ENTITY values must match the Name production");
2919:             Object entity = doctype.getEntity(value);
2920:             if (entity == null || !(entity instanceof ExternalIds) ||
2921:                 ((ExternalIds) entity).notationName == null)
2922:               error("ENTITY values must match the name of an unparsed " +
2923:                     "entity declared in the DTD");
2924:           }
2925:         else if ("NOTATION".equals(decl.type))
2926:           {
2927:             if (!decl.values.contains(value))
2928:               error("NOTATION values must match a declared notation name",
2929:                     value);
2930:             // VC: Notation Attributes
2931:             ExternalIds notation = doctype.getNotation(value);
2932:             if (notation == null)
2933:               error("NOTATION values must match the name of a notation " +
2934:                     "declared in the DTD", value);
2935:           }
2936:       }
2937:     if (namespaceAware)
2938:       {
2939:         if (!addNamespace(attr))
2940:           attrs.add(attr);
2941:       }
2942:     else
2943:       attrs.add(attr);
2944:   }
2945: 
2946:   /**
2947:    * Determines whether the specified attribute is a namespace declaration,
2948:    * and adds it to the current namespace context if so. Returns false if
2949:    * the attribute is an ordinary attribute.
2950:    */
2951:   private boolean addNamespace(Attribute attr)
2952:     throws XMLStreamException
2953:   {
2954:     if ("xmlns".equals(attr.name))
2955:       {
2956:         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2957:         if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2958:           error("Duplicate default namespace declaration");
2959:         if (XMLConstants.XML_NS_URI.equals(attr.value))
2960:           error("can't bind XML namespace");
2961:         ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2962:         return true;
2963:       }
2964:     else if ("xmlns".equals(attr.prefix))
2965:       {
2966:         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2967:         if (ctx.get(attr.localName) != null)
2968:           error("Duplicate namespace declaration for prefix",
2969:                 attr.localName);
2970:         if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2971:           {
2972:             if (!XMLConstants.XML_NS_URI.equals(attr.value))
2973:               error("can't redeclare xml prefix");
2974:             else
2975:               return false; // treat as attribute
2976:           }
2977:         if (XMLConstants.XML_NS_URI.equals(attr.value))
2978:           error("can't bind non-xml prefix to XML namespace");
2979:         if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2980:           error("can't redeclare xmlns prefix");
2981:         if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2982:           error("can't bind non-xmlns prefix to XML Namespace namespace");
2983:         if ("".equals(attr.value) && !input.xml11)
2984:           error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2985:         ctx.put(attr.localName, attr.value);
2986:         return true;
2987:       }
2988:     return false;
2989:   }
2990: 
2991:   /**
2992:    * Parse a closing tag.
2993:    */
2994:   private void readEndElement()
2995:     throws IOException, XMLStreamException
2996:   {
2997:     // pop element off stack
2998:     String expected = (String) stack.removeLast();
2999:     require(expected);
3000:     skipWhitespace();
3001:     require('>');
3002:     // Make element name available
3003:     buf.setLength(0);
3004:     buf.append(expected);
3005:     if (validating && doctype != null)
3006:       endElementValidationHook();
3007:   }
3008: 
3009:   /**
3010:    * Validate the end of an element.
3011:    * Called on an end-element or empty element if validating.
3012:    */
3013:   private void endElementValidationHook()
3014:     throws XMLStreamException
3015:   {
3016:     validateEndElement();
3017:     validationStack.removeLast();
3018:     if (stack.isEmpty())
3019:       currentContentModel = null;
3020:     else
3021:       {
3022:         String parent = (String) stack.getLast();
3023:         currentContentModel = doctype.getElementModel(parent);
3024:       }
3025:   }
3026: 
3027:   /**
3028:    * Parse a comment.
3029:    */
3030:   private void readComment(boolean inDTD)
3031:     throws IOException, XMLStreamException
3032:   {
3033:     boolean saved = expandPE;
3034:     expandPE = false;
3035:     buf.setLength(0);
3036:     readUntil(TEST_END_COMMENT);
3037:     require('>');
3038:     expandPE = saved;
3039:     if (inDTD)
3040:       doctype.addComment(buf.toString());
3041:   }
3042: 
3043:   /**
3044:    * Parse a processing instruction.
3045:    */
3046:   private void readPI(boolean inDTD)
3047:     throws IOException, XMLStreamException
3048:   {
3049:     boolean saved = expandPE;
3050:     expandPE = false;
3051:     piTarget = readNmtoken(true);
3052:     if (piTarget.indexOf(':') != -1)
3053:       error("illegal character in PI target", new Character(':'));
3054:     if ("xml".equalsIgnoreCase(piTarget))
3055:       error("illegal PI target", piTarget);
3056:     if (tryRead(TEST_END_PI))
3057:       piData = null;
3058:     else
3059:       {
3060:         if (!tryWhitespace())
3061:           error("whitespace required between PI target and data");
3062:         buf.setLength(0);
3063:         readUntil(TEST_END_PI);
3064:         piData = buf.toString();
3065:       }
3066:     expandPE = saved;
3067:     if (inDTD)
3068:       doctype.addPI(piTarget, piData);
3069:   }
3070: 
3071:   /**
3072:    * Parse an entity reference.
3073:    */
3074:   private void readReference()
3075:     throws IOException, XMLStreamException
3076:   {
3077:     buf.setLength(0);
3078:     String entityName = readNmtoken(true);
3079:     require(';');
3080:     buf.setLength(0);
3081:     buf.append(entityName);
3082:   }
3083: 
3084:   /**
3085:    * Read an CDATA section.
3086:    */
3087:   private void readCDSect()
3088:     throws IOException, XMLStreamException
3089:   {
3090:     buf.setLength(0);
3091:     readUntil(TEST_END_CDATA);
3092:   }
3093: 
3094:   /**
3095:    * Read character data.
3096:    * @return the type of text read (CHARACTERS or SPACE)
3097:    */
3098:   private int readCharData(String prefix)
3099:     throws IOException, XMLStreamException
3100:   {
3101:     boolean white = true;
3102:     buf.setLength(0);
3103:     if (prefix != null)
3104:       buf.append(prefix);
3105:     boolean done = false;
3106:     boolean entities = false;
3107:     while (!done)
3108:       {
3109:         // Block read
3110:         mark(tmpBuf.length);
3111:         int len = read(tmpBuf, 0, tmpBuf.length);
3112:         if (len == -1)
3113:           {
3114:             if (inputStack.size() > 1)
3115:               {
3116:                 popInput();
3117:                 // report end-entity
3118:                 done = true;
3119:               }
3120:             else
3121:               throw new EOFException();
3122:           }
3123:         for (int i = 0; i < len && !done; i++)
3124:           {
3125:             int c = tmpBuf[i];
3126:             switch (c)
3127:               {
3128:               case 0x20:
3129:               case 0x09:
3130:               case 0x0a:
3131:               case 0x0d:
3132:                 buf.append(Character.toChars(c));
3133:                 break; // whitespace
3134:               case 0x26: // '&'
3135:                 reset();
3136:                 read(tmpBuf, 0, i);
3137:                 // character reference?
3138:                 mark(3);
3139:                 c = readCh(); // &
3140:                 c = readCh();
3141:                 if (c == 0x23) // '#'
3142:                   {
3143:                     mark(1);
3144:                     c = readCh();
3145:                     boolean hex = (c == 0x78); // 'x'
3146:                     if (!hex)
3147:                       reset();
3148:                     char[] ch = readCharacterRef(hex ? 16 : 10);
3149:                     buf.append(ch, 0, ch.length);
3150:                     for (int j = 0; j < ch.length; j++)
3151:                       {
3152:                         switch (ch[j])
3153:                           {
3154:                           case 0x20:
3155:                           case 0x09:
3156:                           case 0x0a:
3157:                           case 0x0d:
3158:                             break; // whitespace
3159:                           default:
3160:                             white = false;
3161:                           }
3162:                       }
3163:                   }
3164:                 else
3165:                   {
3166:                     // entity reference
3167:                     reset();
3168:                     c = readCh(); // &
3169:                     String entityName = readNmtoken(true);
3170:                     require(';');
3171:                     String text =
3172:                       (String) PREDEFINED_ENTITIES.get(entityName);
3173:                     if (text != null)
3174:                       buf.append(text);
3175:                     else
3176:                       {
3177:                         pushInput("", "&" + entityName + ";", false, false);
3178:                         done = true;
3179:                         break;
3180:                       }
3181:                   }
3182:                 // continue processing
3183:                 i = -1;
3184:                 mark(tmpBuf.length);
3185:                 len = read(tmpBuf, 0, tmpBuf.length);
3186:                 if (len == -1)
3187:                   {
3188:                     if (inputStack.size() > 1)
3189:                       {
3190:                         popInput();
3191:                         done = true;
3192:                       }
3193:                     else
3194:                       throw new EOFException();
3195:                   }
3196:                 entities = true;
3197:                 break; // end of text sequence
3198:               case 0x3e: // '>'
3199:                 int l = buf.length();
3200:                 if (l > 1 &&
3201:                     buf.charAt(l - 1) == ']' &&
3202:                     buf.charAt(l - 2) == ']')
3203:                   error("Character data may not contain unescaped ']]>'");
3204:                 buf.append(Character.toChars(c));
3205:                 break;
3206:               case 0x3c: // '<'
3207:                 reset();
3208:                 // read i characters
3209:                 int count = 0, remaining = i;
3210:                 do
3211:                   {
3212:                     int r = read(tmpBuf, 0, remaining);
3213:                     count += r;
3214:                     remaining -= r;
3215:                   }
3216:                 while (count < i);
3217:                 i = len;
3218:                 if (coalescing && tryRead(TEST_CDATA))
3219:                   readUntil(TEST_END_CDATA); // read CDATA section into buf
3220:                 else
3221:                   done = true; // end of text sequence
3222:                 break;
3223:               default:
3224:                 if (input.xml11)
3225:                   {
3226:                     if (!isXML11Char(c) || isXML11RestrictedChar(c))
3227:                       error("illegal XML 1.1 character",
3228:                             "U+" + Integer.toHexString(c));
3229:                   }
3230:                 else if (!isChar(c))
3231:                   error("illegal XML character",
3232:                         "U+" + Integer.toHexString(c));
3233:                 white = false;
3234:                 buf.append(Character.toChars(c));
3235:               }
3236:           }
3237:         // if text buffer >= 2MB, return it as a chunk
3238:         // to avoid excessive memory use
3239:         if (buf.length() >= 2097152)
3240:           done = true;
3241:       }
3242:     if (entities)
3243:       normalizeCRLF(buf);
3244:     return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3245:   }
3246: 
3247:   /**
3248:    * Expands the specified entity.
3249:    */
3250:   private void expandEntity(String name, boolean inAttr, boolean normalize)
3251:     throws IOException, XMLStreamException
3252:   {
3253:     if (doctype != null)
3254:       {
3255:         Object value = doctype.getEntity(name);
3256:         if (value != null)
3257:           {
3258:             if (xmlStandalone == Boolean.TRUE)
3259:               {
3260:                 // VC: Standalone Document Declaration
3261:                 if (doctype.isEntityExternal(name))
3262:                   error("reference to external entity in standalone document");
3263:                 else if (value instanceof ExternalIds)
3264:                   {
3265:                     ExternalIds ids = (ExternalIds) value;
3266:                     if (ids.notationName != null &&
3267:                         doctype.isNotationExternal(ids.notationName))
3268:                       error("reference to external notation in " +
3269:                             "standalone document");
3270:                   }
3271:               }
3272:             if (value instanceof String)
3273:               {
3274:                 String text = (String) value;
3275:                 if (inAttr && text.indexOf('<') != -1)
3276:                   error("< in attribute value");
3277:                 pushInput(name, text, !inAttr, normalize);
3278:               }
3279:             else if (inAttr)
3280:               error("reference to external entity in attribute value", name);
3281:             else
3282:               pushInput(name, (ExternalIds) value, !inAttr, normalize);
3283:             return;
3284:           }
3285:       }
3286:     error("reference to undeclared entity", name);
3287:   }
3288: 
3289:   /**
3290:    * Indicates whether the specified entity is unparsed.
3291:    */
3292:   private boolean isUnparsedEntity(String name)
3293:   {
3294:     if (doctype != null)
3295:       {
3296:         Object value = doctype.getEntity(name);
3297:         if (value != null && value instanceof ExternalIds)
3298:           return ((ExternalIds) value).notationName != null;
3299:       }
3300:     return false;
3301:   }
3302: 
3303:   /**
3304:    * Read an equals sign.
3305:    */
3306:   private void readEq()
3307:     throws IOException, XMLStreamException
3308:   { 
3309:     skipWhitespace();
3310:     require('=');
3311:     skipWhitespace();
3312:   }
3313: 
3314:   /**
3315:    * Character read for reading literals.
3316:    * @param recognizePEs whether to recognize parameter-entity references
3317:    */
3318:   private int literalReadCh(boolean recognizePEs)
3319:     throws IOException, XMLStreamException
3320:   {
3321:     int c = recognizePEs ? readCh() : read();
3322:     while (c == -1)
3323:       {
3324:         if (inputStack.size() > 1)
3325:           {
3326:             inputStack.removeLast();
3327:             input = (Input) inputStack.getLast();
3328:             // Don't issue end-entity
3329:             c = recognizePEs ? readCh() : read();
3330:           }
3331:         else
3332:           throw new EOFException();
3333:       }
3334:     return c;
3335:   }
3336: 
3337:   /**
3338:    * Read a string literal.
3339:    */
3340:   private String readLiteral(int flags, boolean recognizePEs)
3341:     throws IOException, XMLStreamException
3342:   {
3343:     boolean saved = expandPE;
3344:     int delim = readCh();
3345:     if (delim != 0x27 && delim != 0x22)
3346:       error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3347:     literalBuf.setLength(0);
3348:     if ((flags & LIT_DISABLE_PE) != 0)
3349:       expandPE = false;
3350:     boolean entities = false;
3351:     int inputStackSize = inputStack.size();
3352:     do
3353:       {
3354:         int c = literalReadCh(recognizePEs);
3355:         if (c == delim && inputStackSize == inputStack.size())
3356:           break;
3357:         switch (c)
3358:           {
3359:           case 0x0a:
3360:           case 0x0d:
3361:             if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3362:               c = 0x20; // normalize to space
3363:             break;
3364:           case 0x09:
3365:             if ((flags & LIT_ATTRIBUTE) != 0)
3366:               c = 0x20; // normalize to space
3367:             break;
3368:           case 0x26: // '&'
3369:             mark(2);
3370:             c = readCh();
3371:             if (c == 0x23) // '#'
3372:               {
3373:                 if ((flags & LIT_DISABLE_CREF) != 0)
3374:                   {
3375:                     reset();
3376:                     c = 0x26; // '&'
3377:                   }
3378:                 else
3379:                   {
3380:                     mark(1);
3381:                     c = readCh();
3382:                     boolean hex = (c == 0x78); // 'x'
3383:                     if (!hex)
3384:                       reset();
3385:                     char[] ref = readCharacterRef(hex ? 16 : 10);
3386:                     for (int i = 0; i < ref.length; i++)
3387:                       literalBuf.append(ref[i]);
3388:                     entities = true;
3389:                     continue;
3390:                   }
3391:               }
3392:             else
3393:               {
3394:                 if ((flags & LIT_DISABLE_EREF) != 0)
3395:                   {
3396:                     reset();
3397:                     c = 0x26; // '&'
3398:                   }
3399:                 else
3400:                   {
3401:                     reset();
3402:                     String entityName = readNmtoken(true);
3403:                     require(';');
3404:                     String text =
3405:                       (String) PREDEFINED_ENTITIES.get(entityName);
3406:                     if (text != null)
3407:                       literalBuf.append(text);
3408:                     else
3409:                       expandEntity(entityName,
3410:                                    (flags & LIT_ATTRIBUTE) != 0,
3411:                                    true);
3412:                     entities = true;
3413:                     continue;
3414:                   }
3415:               }
3416:             break;
3417:           case 0x3c: // '<'
3418:             if ((flags & LIT_ATTRIBUTE) != 0)
3419:               error("attribute values may not contain '<'");
3420:             break;
3421:           case -1:
3422:             if (inputStack.size() > 1)
3423:               {
3424:                 popInput();
3425:                 continue;
3426:               }
3427:             throw new EOFException();
3428:           default:
3429:             if ((c < 0x0020 || c > 0xfffd) ||
3430:                 (c >= 0xd800 && c < 0xdc00) ||
3431:                 (input.xml11 && (c >= 0x007f) &&
3432:                  (c <= 0x009f) && (c != 0x0085)))
3433:               error("illegal character", "U+" + Integer.toHexString(c));
3434:           }
3435:         literalBuf.append(Character.toChars(c));
3436:       }
3437:     while (true);
3438:     expandPE = saved;
3439:     if (entities)
3440:       normalizeCRLF(literalBuf);
3441:     if ((flags & LIT_NORMALIZE) > 0)
3442:       literalBuf = normalize(literalBuf);
3443:     return literalBuf.toString();
3444:   }
3445: 
3446:   /**
3447:    * Performs attribute-value normalization of the text buffer.
3448:    * This discards leading and trailing whitespace, and replaces sequences
3449:    * of whitespace with a single space.
3450:    */
3451:   private StringBuffer normalize(StringBuffer buf)
3452:   {
3453:     StringBuffer acc = new StringBuffer();
3454:     int len = buf.length();
3455:     int avState = 0;
3456:     for (int i = 0; i < len; i++)
3457:       {
3458:         char c = buf.charAt(i);
3459:         if (c == ' ')
3460:           avState = (avState == 0) ? 0 : 1;
3461:         else
3462:           {
3463:             if (avState == 1)
3464:               acc.append(' ');
3465:             acc.append(c);
3466:             avState = 2;
3467:           }
3468:       }
3469:     return acc;
3470:   }
3471: 
3472:   /**
3473:    * Replace any CR/LF pairs in the buffer with LF.
3474:    * This may be necessary if combinations of CR or LF were declared as
3475:    * (character) entity references in the input.
3476:    */
3477:   private void normalizeCRLF(StringBuffer buf)
3478:   {
3479:     int len = buf.length() - 1;
3480:     for (int i = 0; i < len; i++)
3481:       {
3482:         char c = buf.charAt(i);
3483:         if (c == '\r' && buf.charAt(i + 1) == '\n')
3484:           {
3485:             buf.deleteCharAt(i--);
3486:             len--;
3487:           }
3488:       }
3489:   }
3490: 
3491:   /**
3492:    * Parse and expand a parameter entity reference.
3493:    */
3494:   private void expandPEReference()
3495:     throws IOException, XMLStreamException
3496:   {
3497:     String name = readNmtoken(true, new StringBuffer());
3498:     require(';');
3499:     mark(1); // ensure we don't reset to before the semicolon
3500:     if (doctype != null)
3501:       {
3502:         String entityName = "%" + name;
3503:         Object entity = doctype.getEntity(entityName);
3504:         if (entity != null)
3505:           {
3506:             if (xmlStandalone == Boolean.TRUE)
3507:               {
3508:                 if (doctype.isEntityExternal(entityName))
3509:                   error("reference to external parameter entity in " +
3510:                         "standalone document");
3511:               }
3512:             if (entity instanceof String)
3513:               {
3514:                 pushInput(name, (String) entity, false, input.normalize);
3515:                 //pushInput(name, " " + (String) entity + " ");
3516:               }
3517:             else
3518:               {
3519:                 //pushInput("", " ");
3520:                 pushInput(name, (ExternalIds) entity, false, input.normalize);
3521:                 //pushInput("", " ");
3522:               }
3523:           }
3524:         else
3525:           error("reference to undeclared parameter entity", name);
3526:       }
3527:     else
3528:       error("reference to parameter entity without doctype", name);
3529:   }
3530: 
3531:   /**
3532:    * Parse the digits in a character reference.
3533:    * @param base the base of the digits (10 or 16)
3534:    */
3535:   private char[] readCharacterRef(int base)
3536:     throws IOException, XMLStreamException
3537:   {
3538:     StringBuffer b = new StringBuffer();
3539:     for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3540:       b.append(Character.toChars(c));
3541:     try
3542:       {
3543:         int ord = Integer.parseInt(b.toString(), base);
3544:         if (input.xml11)
3545:           {
3546:             if (!isXML11Char(ord))
3547:               error("illegal XML 1.1 character reference " +
3548:                     "U+" + Integer.toHexString(ord));
3549:           }
3550:         else
3551:           {
3552:             if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3553:                 || (ord >= 0xd800 && ord <= 0xdfff)
3554:                 || ord == 0xfffe || ord == 0xffff
3555:                 || ord > 0x0010ffff)
3556:               error("illegal XML character reference " +
3557:                     "U+" + Integer.toHexString(ord));
3558:           }
3559:         return Character.toChars(ord);
3560:       }
3561:     catch (NumberFormatException e)
3562:       {
3563:         error("illegal characters in character reference", b.toString());
3564:         return null;
3565:       }
3566:   }
3567: 
3568:   /**
3569:    * Parses an NMTOKEN or Name production.
3570:    * @param isName if a Name, otherwise an NMTOKEN
3571:    */
3572:   private String readNmtoken(boolean isName)
3573:     throws IOException, XMLStreamException
3574:   {
3575:     return readNmtoken(isName, nmtokenBuf);
3576:   }
3577:   
3578:   /**
3579:    * Parses an NMTOKEN or Name production using the specified buffer.
3580:    * @param isName if a Name, otherwise an NMTOKEN
3581:    * @param buf the character buffer to use
3582:    */
3583:   private String readNmtoken(boolean isName, StringBuffer buf)
3584:     throws IOException, XMLStreamException
3585:   {
3586:     buf.setLength(0);
3587:     int c = readCh();
3588:     if (isName)
3589:       {
3590:         if (!isNameStartCharacter(c, input.xml11))
3591:           error("not a name start character",
3592:                 "U+" + Integer.toHexString(c));
3593:       }
3594:     else
3595:       {
3596:         if (!isNameCharacter(c, input.xml11))
3597:           error("not a name character",
3598:                 "U+" + Integer.toHexString(c));
3599:       }
3600:     buf.append(Character.toChars(c));
3601:     do
3602:       {
3603:         mark(1);
3604:         c = readCh();
3605:         switch (c)
3606:           {
3607:           case 0x25: // '%'
3608:           case 0x3c: // '<'
3609:           case 0x3e: // '>'
3610:           case 0x26: // '&'
3611:           case 0x2c: // ','
3612:           case 0x7c: // '|'
3613:           case 0x2a: // '*'
3614:           case 0x2b: // '+'
3615:           case 0x3f: // '?'
3616:           case 0x29: // ')'
3617:           case 0x3d: // '='
3618:           case 0x27: // '\''
3619:           case 0x22: // '"'
3620:           case 0x5b: // '['
3621:           case 0x20: // ' '
3622:           case 0x09: // '\t'
3623:           case 0x0a: // '\n'
3624:           case 0x0d: // '\r'
3625:           case 0x3b: // ';'
3626:           case 0x2f: // '/'
3627:           case -1:
3628:             reset();
3629:             return intern(buf.toString());
3630:           default:
3631:             if (!isNameCharacter(c, input.xml11))
3632:               error("not a name character",
3633:                     "U+" + Integer.toHexString(c));
3634:             else
3635:               buf.append(Character.toChars(c));
3636:           }
3637:       }
3638:     while (true);
3639:   }
3640: 
3641:   /**
3642:    * Indicates whether the specified Unicode character is an XML 1.1 Char.
3643:    */
3644:   public static boolean isXML11Char(int c)
3645:   {
3646:     return ((c >= 0x0001 && c <= 0xD7FF) ||
3647:             (c >= 0xE000 && c < 0xFFFE) ||
3648:             (c >= 0x10000 && c <= 0x10FFFF));
3649:   }
3650: 
3651:   /**
3652:    * Indicates whether the specified Unicode character is an XML 1.1
3653:    * RestrictedChar.
3654:    */
3655:   public static boolean isXML11RestrictedChar(int c)
3656:   {
3657:     return ((c >= 0x0001 && c <= 0x0008) ||
3658:             (c >= 0x000B && c <= 0x000C) ||
3659:             (c >= 0x000E && c <= 0x001F) ||
3660:             (c >= 0x007F && c <= 0x0084) ||
3661:             (c >= 0x0086 && c <= 0x009F));
3662:   }
3663: 
3664:   /**
3665:    * Indicates whether the specified text matches the Name or Nmtoken
3666:    * production.
3667:    */
3668:   private boolean isNmtoken(String text, boolean isName)
3669:   {
3670:     try
3671:       {
3672:         int[] cp = UnicodeReader.toCodePointArray(text);
3673:         if (cp.length == 0)
3674:           return false;
3675:         if (isName)
3676:           {
3677:             if (!isNameStartCharacter(cp[0], input.xml11))
3678:               return false;
3679:           }
3680:         else
3681:           {
3682:             if (!isNameCharacter(cp[0], input.xml11))
3683:               return false;
3684:           }
3685:         for (int i = 1; i < cp.length; i++)
3686:           {
3687:             if (!isNameCharacter(cp[i], input.xml11))
3688:               return false;
3689:           }
3690:         return true;
3691:       }
3692:     catch (IOException e)
3693:       {
3694:         return false;
3695:       }
3696:   }
3697: 
3698:   /**
3699:    * Indicates whether the specified Unicode character is a Name start
3700:    * character.
3701:    */
3702:   public static boolean isNameStartCharacter(int c, boolean xml11)
3703:   {
3704:     if (xml11)
3705:       return ((c >= 0x0041 && c <= 0x005a) ||
3706:               (c >= 0x0061 && c <= 0x007a) ||
3707:               c == 0x3a |
3708:               c == 0x5f |
3709:               (c >= 0xC0 && c <= 0xD6) ||
3710:               (c >= 0xD8 && c <= 0xF6) ||
3711:               (c >= 0xF8 && c <= 0x2FF) ||
3712:               (c >= 0x370 && c <= 0x37D) ||
3713:               (c >= 0x37F && c <= 0x1FFF) ||
3714:               (c >= 0x200C && c <= 0x200D) ||
3715:               (c >= 0x2070 && c <= 0x218F) ||
3716:               (c >= 0x2C00 && c <= 0x2FEF) ||
3717:               (c >= 0x3001 && c <= 0xD7FF) ||
3718:               (c >= 0xF900 && c <= 0xFDCF) ||
3719:               (c >= 0xFDF0 && c <= 0xFFFD) ||
3720:               (c >= 0x10000 && c <= 0xEFFFF));
3721:     else
3722:       return (c == 0x5f || c == 0x3a || isLetter(c));
3723:   }
3724: 
3725:   /**
3726:    * Indicates whether the specified Unicode character is a Name non-initial
3727:    * character.
3728:    */
3729:   public static boolean isNameCharacter(int c, boolean xml11)
3730:   {
3731:     if (xml11)
3732:       return ((c >= 0x0041 && c <= 0x005a) ||
3733:               (c >= 0x0061 && c <= 0x007a) ||
3734:               (c >= 0x0030 && c <= 0x0039) ||
3735:               c == 0x3a |
3736:               c == 0x5f |
3737:               c == 0x2d |
3738:               c == 0x2e |
3739:               c == 0xB7 |
3740:               (c >= 0xC0 && c <= 0xD6) ||
3741:               (c >= 0xD8 && c <= 0xF6) ||
3742:               (c >= 0xF8 && c <= 0x2FF) ||
3743:               (c >= 0x300 && c <= 0x37D) ||
3744:               (c >= 0x37F && c <= 0x1FFF) ||
3745:               (c >= 0x200C && c <= 0x200D) ||
3746:               (c >= 0x203F && c <= 0x2040) ||
3747:               (c >= 0x2070 && c <= 0x218F) ||
3748:               (c >= 0x2C00 && c <= 0x2FEF) ||
3749:               (c >= 0x3001 && c <= 0xD7FF) ||
3750:               (c >= 0xF900 && c <= 0xFDCF) ||
3751:               (c >= 0xFDF0 && c <= 0xFFFD) ||
3752:               (c >= 0x10000 && c <= 0xEFFFF));
3753:     else
3754:       return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3755:               isLetter(c) || isDigit(c) ||
3756:               isCombiningChar(c) || isExtender(c));
3757:   }
3758: 
3759:   /**
3760:    * Indicates whether the specified Unicode character matches the Letter
3761:    * production.
3762:    */
3763:   public static boolean isLetter(int c)
3764:   {
3765:     if ((c >= 0x0041 && c <= 0x005A) ||
3766:         (c >= 0x0061 && c <= 0x007A) ||
3767:         (c >= 0x00C0 && c <= 0x00D6) ||
3768:         (c >= 0x00D8 && c <= 0x00F6) ||
3769:         (c >= 0x00F8 && c <= 0x00FF) ||
3770:         (c >= 0x0100 && c <= 0x0131) ||
3771:         (c >= 0x0134 && c <= 0x013E) ||
3772:         (c >= 0x0141 && c <= 0x0148) ||
3773:         (c >= 0x014A && c <= 0x017E) ||
3774:         (c >= 0x0180 && c <= 0x01C3) ||
3775:         (c >= 0x01CD && c <= 0x01F0) ||
3776:         (c >= 0x01F4 && c <= 0x01F5) ||
3777:         (c >= 0x01FA && c <= 0x0217) ||
3778:         (c >= 0x0250 && c <= 0x02A8) ||
3779:         (c >= 0x02BB && c <= 0x02C1) ||
3780:         c == 0x0386 ||
3781:         (c >= 0x0388 && c <= 0x038A) ||
3782:         c == 0x038C ||
3783:         (c >= 0x038E && c <= 0x03A1) ||
3784:         (c >= 0x03A3 && c <= 0x03CE) ||
3785:         (c >= 0x03D0 && c <= 0x03D6) ||
3786:         c == 0x03DA ||
3787:       c == 0x03DC ||
3788:         c == 0x03DE ||
3789:         c == 0x03E0 ||
3790:         (c >= 0x03E2 && c <= 0x03F3) ||
3791:         (c >= 0x0401 && c <= 0x040C) ||
3792:         (c >= 0x040E && c <= 0x044F) ||
3793:         (c >= 0x0451 && c <= 0x045C) ||
3794:         (c >= 0x045E && c <= 0x0481) ||
3795:         (c >= 0x0490 && c <= 0x04C4) ||
3796:         (c >= 0x04C7 && c <= 0x04C8) ||
3797:         (c >= 0x04CB && c <= 0x04CC) ||
3798:         (c >= 0x04D0 && c <= 0x04EB) ||
3799:         (c >= 0x04EE && c <= 0x04F5) ||
3800:         (c >= 0x04F8 && c <= 0x04F9) ||
3801:         (c >= 0x0531 && c <= 0x0556) ||
3802:         c == 0x0559 ||
3803:         (c >= 0x0561 && c <= 0x0586) ||
3804:         (c >= 0x05D0 && c <= 0x05EA) ||
3805:         (c >= 0x05F0 && c <= 0x05F2) ||
3806:         (c >= 0x0621 && c <= 0x063A) ||
3807:         (c >= 0x0641 && c <= 0x064A) ||
3808:         (c >= 0x0671 && c <= 0x06B7) ||
3809:         (c >= 0x06BA && c <= 0x06BE) ||
3810:         (c >= 0x06C0 && c <= 0x06CE) ||
3811:         (c >= 0x06D0 && c <= 0x06D3) ||
3812:         c == 0x06D5 ||
3813:         (c >= 0x06E5 && c <= 0x06E6) ||
3814:         (c >= 0x0905 && c <= 0x0939) ||
3815:         c == 0x093D ||
3816:         (c >= 0x0958 && c <= 0x0961) ||
3817:         (c >= 0x0985 && c <= 0x098C) ||
3818:         (c >= 0x098F && c <= 0x0990) ||
3819:         (c >= 0x0993 && c <= 0x09A8) ||
3820:         (c >= 0x09AA && c <= 0x09B0) ||
3821:         c == 0x09B2 ||
3822:         (c >= 0x09B6 && c <= 0x09B9) ||
3823:         (c >= 0x09DC && c <= 0x09DD) ||
3824:         (c >= 0x09DF && c <= 0x09E1) ||
3825:         (c >= 0x09F0 && c <= 0x09F1) ||
3826:         (c >= 0x0A05 && c <= 0x0A0A) ||
3827:         (c >= 0x0A0F && c <= 0x0A10) ||
3828:         (c >= 0x0A13 && c <= 0x0A28) ||
3829:         (c >= 0x0A2A && c <= 0x0A30) ||
3830:         (c >= 0x0A32 && c <= 0x0A33) ||
3831:         (c >= 0x0A35 && c <= 0x0A36) ||
3832:         (c >= 0x0A38 && c <= 0x0A39) ||
3833:         (c >= 0x0A59 && c <= 0x0A5C) ||
3834:         c == 0x0A5E ||
3835:         (c >= 0x0A72 && c <= 0x0A74) ||
3836:         (c >= 0x0A85 && c <= 0x0A8B) ||
3837:         c == 0x0A8D ||
3838:         (c >= 0x0A8F && c <= 0x0A91) ||
3839:         (c >= 0x0A93 && c <= 0x0AA8) ||
3840:         (c >= 0x0AAA && c <= 0x0AB0) ||
3841:         (c >= 0x0AB2 && c <= 0x0AB3) ||
3842:         (c >= 0x0AB5 && c <= 0x0AB9) ||
3843:         c == 0x0ABD ||
3844:         c == 0x0AE0 ||
3845:         (c >= 0x0B05 && c <= 0x0B0C) ||
3846:         (c >= 0x0B0F && c <= 0x0B10) ||
3847:         (c >= 0x0B13 && c <= 0x0B28) ||
3848:         (c >= 0x0B2A && c <= 0x0B30) ||
3849:         (c >= 0x0B32 && c <= 0x0B33) ||
3850:         (c >= 0x0B36 && c <= 0x0B39) ||
3851:         c == 0x0B3D ||
3852:         (c >= 0x0B5C && c <= 0x0B5D) ||
3853:         (c >= 0x0B5F && c <= 0x0B61) ||
3854:         (c >= 0x0B85 && c <= 0x0B8A) ||
3855:         (c >= 0x0B8E && c <= 0x0B90) ||
3856:         (c >= 0x0B92 && c <= 0x0B95) ||
3857:         (c >= 0x0B99 && c <= 0x0B9A) ||
3858:         c == 0x0B9C ||
3859:         (c >= 0x0B9E && c <= 0x0B9F) ||
3860:         (c >= 0x0BA3 && c <= 0x0BA4) ||
3861:         (c >= 0x0BA8 && c <= 0x0BAA) ||
3862:         (c >= 0x0BAE && c <= 0x0BB5) ||
3863:         (c >= 0x0BB7 && c <= 0x0BB9) ||
3864:         (c >= 0x0C05 && c <= 0x0C0C) ||
3865:         (c >= 0x0C0E && c <= 0x0C10) ||
3866:         (c >= 0x0C12 && c <= 0x0C28) ||
3867:         (c >= 0x0C2A && c <= 0x0C33) ||
3868:         (c >= 0x0C35 && c <= 0x0C39) ||
3869:         (c >= 0x0C60 && c <= 0x0C61) ||
3870:         (c >= 0x0C85 && c <= 0x0C8C) ||
3871:         (c >= 0x0C8E && c <= 0x0C90) ||
3872:         (c >= 0x0C92 && c <= 0x0CA8) ||
3873:         (c >= 0x0CAA && c <= 0x0CB3) ||
3874:         (c >= 0x0CB5 && c <= 0x0CB9) ||
3875:         c == 0x0CDE ||
3876:         (c >= 0x0CE0 && c <= 0x0CE1) ||
3877:         (c >= 0x0D05 && c <= 0x0D0C) ||
3878:         (c >= 0x0D0E && c <= 0x0D10) ||
3879:         (c >= 0x0D12 && c <= 0x0D28) ||
3880:         (c >= 0x0D2A && c <= 0x0D39) ||
3881:         (c >= 0x0D60 && c <= 0x0D61) ||
3882:         (c >= 0x0E01 && c <= 0x0E2E) ||
3883:         c == 0x0E30 ||
3884:         (c >= 0x0E32 && c <= 0x0E33) ||
3885:         (c >= 0x0E40 && c <= 0x0E45) ||
3886:         (c >= 0x0E81 && c <= 0x0E82) ||
3887:         c == 0x0E84 ||
3888:         (c >= 0x0E87 && c <= 0x0E88) ||
3889:         c == 0x0E8A ||
3890:         c == 0x0E8D ||
3891:         (c >= 0x0E94 && c <= 0x0E97) ||
3892:         (c >= 0x0E99 && c <= 0x0E9F) ||
3893:         (c >= 0x0EA1 && c <= 0x0EA3) ||
3894:         c == 0x0EA5 ||
3895:         c == 0x0EA7 ||
3896:         (c >= 0x0EAA && c <= 0x0EAB) ||
3897:         (c >= 0x0EAD && c <= 0x0EAE) ||
3898:         c == 0x0EB0 ||
3899:         (c >= 0x0EB2 && c <= 0x0EB3) ||
3900:         c == 0x0EBD ||
3901:         (c >= 0x0EC0 && c <= 0x0EC4) ||
3902:         (c >= 0x0F40 && c <= 0x0F47) ||
3903:         (c >= 0x0F49 && c <= 0x0F69) ||
3904:         (c >= 0x10A0 && c <= 0x10C5) ||
3905:         (c >= 0x10D0 && c <= 0x10F6) ||
3906:         c == 0x1100 ||
3907:         (c >= 0x1102 && c <= 0x1103) ||
3908:         (c >= 0x1105 && c <= 0x1107) ||
3909:         c == 0x1109 ||
3910:         (c >= 0x110B && c <= 0x110C) ||
3911:         (c >= 0x110E && c <= 0x1112) ||
3912:         c == 0x113C ||
3913:         c == 0x113E ||
3914:         c == 0x1140 ||
3915:         c == 0x114C ||
3916:         c == 0x114E ||
3917:         c == 0x1150 ||
3918:         (c >= 0x1154 && c <= 0x1155) ||
3919:         c == 0x1159 ||
3920:         (c >= 0x115F && c <= 0x1161) ||
3921:         c == 0x1163 ||
3922:         c == 0x1165 ||
3923:         c == 0x1167 ||
3924:         c == 0x1169 ||
3925:         (c >= 0x116D && c <= 0x116E) ||
3926:         (c >= 0x1172 && c <= 0x1173) ||
3927:         c == 0x1175 ||
3928:         c == 0x119E ||
3929:         c == 0x11A8 ||
3930:         c == 0x11AB ||
3931:         (c >= 0x11AE && c <= 0x11AF) ||
3932:         (c >= 0x11B7 && c <= 0x11B8) ||
3933:         c == 0x11BA ||
3934:         (c >= 0x11BC && c <= 0x11C2) ||
3935:         c == 0x11EB ||
3936:         c == 0x11F0 ||
3937:         c == 0x11F9 ||
3938:         (c >= 0x1E00 && c <= 0x1E9B) ||
3939:         (c >= 0x1EA0 && c <= 0x1EF9) ||
3940:         (c >= 0x1F00 && c <= 0x1F15) ||
3941:         (c >= 0x1F18 && c <= 0x1F1D) ||
3942:         (c >= 0x1F20 && c <= 0x1F45) ||
3943:         (c >= 0x1F48 && c <= 0x1F4D) ||
3944:         (c >= 0x1F50 && c <= 0x1F57) ||
3945:         c == 0x1F59 ||
3946:         c == 0x1F5B ||
3947:         c == 0x1F5D ||
3948:         (c >= 0x1F5F && c <= 0x1F7D) ||
3949:         (c >= 0x1F80 && c <= 0x1FB4) ||
3950:         (c >= 0x1FB6 && c <= 0x1FBC) ||
3951:         c == 0x1FBE ||
3952:         (c >= 0x1FC2 && c <= 0x1FC4) ||
3953:         (c >= 0x1FC6 && c <= 0x1FCC) ||
3954:         (c >= 0x1FD0 && c <= 0x1FD3) ||
3955:         (c >= 0x1FD6 && c <= 0x1FDB) ||
3956:         (c >= 0x1FE0 && c <= 0x1FEC) ||
3957:         (c >= 0x1FF2 && c <= 0x1FF4) ||
3958:         (c >= 0x1FF6 && c <= 0x1FFC) ||
3959:         c == 0x2126 ||
3960:         (c >= 0x212A && c <= 0x212B) ||
3961:         c == 0x212E ||
3962:         (c >= 0x2180 && c <= 0x2182) ||
3963:         (c >= 0x3041 && c <= 0x3094) ||
3964:         (c >= 0x30A1 && c <= 0x30FA) ||
3965:         (c >= 0x3105 && c <= 0x312C) ||
3966:         (c >= 0xAC00 && c <= 0xD7A3))
3967:         return true; // BaseChar
3968:     if ((c >= 0x4e00 && c <= 0x9fa5) ||
3969:         c == 0x3007 ||
3970:         (c >= 0x3021 && c <= 0x3029))
3971:       return true; // Ideographic
3972:     return false;
3973:   }
3974: 
3975:   /**
3976:    * Indicates whether the specified Unicode character matches the Digit
3977:    * production.
3978:    */
3979:   public static boolean isDigit(int c)
3980:   {
3981:     return ((c >= 0x0030 && c <= 0x0039) ||
3982:             (c >= 0x0660 && c <= 0x0669) ||
3983:             (c >= 0x06F0 && c <= 0x06F9) ||
3984:             (c >= 0x0966 && c <= 0x096F) ||
3985:             (c >= 0x09E6 && c <= 0x09EF) ||
3986:             (c >= 0x0A66 && c <= 0x0A6F) ||
3987:             (c >= 0x0AE6 && c <= 0x0AEF) ||
3988:             (c >= 0x0B66 && c <= 0x0B6F) ||
3989:             (c >= 0x0BE7 && c <= 0x0BEF) ||
3990:             (c >= 0x0C66 && c <= 0x0C6F) ||
3991:             (c >= 0x0CE6 && c <= 0x0CEF) ||
3992:             (c >= 0x0D66 && c <= 0x0D6F) ||
3993:             (c >= 0x0E50 && c <= 0x0E59) ||
3994:             (c >= 0x0ED0 && c <= 0x0ED9) ||
3995:             (c >= 0x0F20 && c <= 0x0F29));
3996:   }
3997: 
3998:   /**
3999:    * Indicates whether the specified Unicode character matches the
4000:    * CombiningChar production.
4001:    */
4002:   public static boolean isCombiningChar(int c)
4003:   {
4004:     return ((c >= 0x0300 && c <= 0x0345) ||
4005:             (c >= 0x0360 && c <= 0x0361) ||
4006:             (c >= 0x0483 && c <= 0x0486) ||
4007:             (c >= 0x0591 && c <= 0x05A1) ||
4008:             (c >= 0x05A3 && c <= 0x05B9) ||
4009:             (c >= 0x05BB && c <= 0x05BD) ||
4010:             c == 0x05BF ||
4011:             (c >= 0x05C1 && c <= 0x05C2) ||
4012:             c == 0x05C4 ||
4013:             (c >= 0x064B && c <= 0x0652) ||
4014:             c == 0x0670 ||
4015:             (c >= 0x06D6 && c <= 0x06DC) ||
4016:             (c >= 0x06DD && c <= 0x06DF) ||
4017:             (c >= 0x06E0 && c <= 0x06E4) ||
4018:             (c >= 0x06E7 && c <= 0x06E8) ||
4019:             (c >= 0x06EA && c <= 0x06ED) ||
4020:             (c >= 0x0901 && c <= 0x0903) ||
4021:             c == 0x093C ||
4022:             (c >= 0x093E && c <= 0x094C) ||
4023:             c == 0x094D ||
4024:             (c >= 0x0951 && c <= 0x0954) ||
4025:             (c >= 0x0962 && c <= 0x0963) ||
4026:             (c >= 0x0981 && c <= 0x0983) ||
4027:             c == 0x09BC ||
4028:             c == 0x09BE ||
4029:             c == 0x09BF ||
4030:             (c >= 0x09C0 && c <= 0x09C4) ||
4031:             (c >= 0x09C7 && c <= 0x09C8) ||
4032:             (c >= 0x09CB && c <= 0x09CD) ||
4033:             c == 0x09D7 ||
4034:             (c >= 0x09E2 && c <= 0x09E3) ||
4035:             c == 0x0A02 ||
4036:             c == 0x0A3C ||
4037:             c == 0x0A3E ||
4038:             c == 0x0A3F ||
4039:             (c >= 0x0A40 && c <= 0x0A42) ||
4040:             (c >= 0x0A47 && c <= 0x0A48) ||
4041:             (c >= 0x0A4B && c <= 0x0A4D) ||
4042:             (c >= 0x0A70 && c <= 0x0A71) ||
4043:             (c >= 0x0A81 && c <= 0x0A83) ||
4044:             c == 0x0ABC ||
4045:             (c >= 0x0ABE && c <= 0x0AC5) ||
4046:             (c >= 0x0AC7 && c <= 0x0AC9) ||
4047:             (c >= 0x0ACB && c <= 0x0ACD) ||
4048:             (c >= 0x0B01 && c <= 0x0B03) ||
4049:             c == 0x0B3C ||
4050:             (c >= 0x0B3E && c <= 0x0B43) ||
4051:             (c >= 0x0B47 && c <= 0x0B48) ||
4052:             (c >= 0x0B4B && c <= 0x0B4D) ||
4053:             (c >= 0x0B56 && c <= 0x0B57) ||
4054:             (c >= 0x0B82 && c <= 0x0B83) ||
4055:             (c >= 0x0BBE && c <= 0x0BC2) ||
4056:             (c >= 0x0BC6 && c <= 0x0BC8) ||
4057:             (c >= 0x0BCA && c <= 0x0BCD) ||
4058:             c == 0x0BD7 ||
4059:             (c >= 0x0C01 && c <= 0x0C03) ||
4060:             (c >= 0x0C3E && c <= 0x0C44) ||
4061:             (c >= 0x0C46 && c <= 0x0C48) ||
4062:             (c >= 0x0C4A && c <= 0x0C4D) ||
4063:             (c >= 0x0C55 && c <= 0x0C56) ||
4064:             (c >= 0x0C82 && c <= 0x0C83) ||
4065:             (c >= 0x0CBE && c <= 0x0CC4) ||
4066:             (c >= 0x0CC6 && c <= 0x0CC8) ||
4067:             (c >= 0x0CCA && c <= 0x0CCD) ||
4068:             (c >= 0x0CD5 && c <= 0x0CD6) ||
4069:             (c >= 0x0D02 && c <= 0x0D03) ||
4070:             (c >= 0x0D3E && c <= 0x0D43) ||
4071:             (c >= 0x0D46 && c <= 0x0D48) ||
4072:             (c >= 0x0D4A && c <= 0x0D4D) ||
4073:             c == 0x0D57 ||
4074:             c == 0x0E31 ||
4075:             (c >= 0x0E34 && c <= 0x0E3A) ||
4076:             (c >= 0x0E47 && c <= 0x0E4E) ||
4077:             c == 0x0EB1 ||
4078:             (c >= 0x0EB4 && c <= 0x0EB9) ||
4079:             (c >= 0x0EBB && c <= 0x0EBC) ||
4080:             (c >= 0x0EC8 && c <= 0x0ECD) ||
4081:             (c >= 0x0F18 && c <= 0x0F19) ||
4082:             c == 0x0F35 ||
4083:             c == 0x0F37 ||
4084:             c == 0x0F39 ||
4085:             c == 0x0F3E ||
4086:             c == 0x0F3F ||
4087:             (c >= 0x0F71 && c <= 0x0F84) ||
4088:             (c >= 0x0F86 && c <= 0x0F8B) ||
4089:             (c >= 0x0F90 && c <= 0x0F95) ||
4090:             c == 0x0F97 ||
4091:             (c >= 0x0F99 && c <= 0x0FAD) ||
4092:             (c >= 0x0FB1 && c <= 0x0FB7) ||
4093:             c == 0x0FB9 ||
4094:             (c >= 0x20D0 && c <= 0x20DC) ||
4095:             c == 0x20E1 ||
4096:             (c >= 0x302A && c <= 0x302F) ||
4097:             c == 0x3099 ||
4098:             c == 0x309A);
4099:   }
4100: 
4101:   /**
4102:    * Indicates whether the specified Unicode character matches the Extender
4103:    * production.
4104:    */
4105:   public static boolean isExtender(int c)
4106:   {
4107:     return (c == 0x00B7 ||
4108:             c == 0x02D0 ||
4109:             c == 0x02D1 ||
4110:             c == 0x0387 ||
4111:             c == 0x0640 ||
4112:             c == 0x0E46 ||
4113:             c == 0x0EC6 ||
4114:             c == 0x3005 ||
4115:             (c >= 0x3031 && c <= 0x3035) ||
4116:             (c >= 0x309D && c <= 0x309E) ||
4117:             (c >= 0x30FC && c <= 0x30FE));
4118:   }
4119: 
4120:   /**
4121:    * Indicates whether the specified Unicode character matches the Char
4122:    * production.
4123:    */
4124:   public static boolean isChar(int c)
4125:   {
4126:     return (c >= 0x20 && c < 0xd800) ||
4127:       (c >= 0xe00 && c < 0xfffe) ||
4128:       (c >= 0x10000 && c < 0x110000) ||
4129:       c == 0xa || c == 0x9 || c == 0xd;
4130:   }
4131:   
4132:   /**
4133:    * Interns the specified text or not, depending on the value of
4134:    * stringInterning.
4135:    */
4136:   private String intern(String text)
4137:   {
4138:     return stringInterning ? text.intern() : text;
4139:   }
4140: 
4141:   /**
4142:    * Report a parsing error.
4143:    */
4144:   private void error(String message)
4145:     throws XMLStreamException
4146:   {
4147:     error(message, null);
4148:   }
4149:   
4150:   /**
4151:    * Report a parsing error.
4152:    */
4153:   private void error(String message, Object info)
4154:     throws XMLStreamException
4155:   {
4156:     if (info != null)
4157:       {
4158:         if (info instanceof String)
4159:           message += ": \"" + ((String) info) + "\"";
4160:         else if (info instanceof Character)
4161:           message += ": '" + ((Character) info) + "'";
4162:       }
4163:     throw new XMLStreamException(message);
4164:   }
4165: 
4166:   /**
4167:    * Perform validation of a start-element event.
4168:    */
4169:   private void validateStartElement(String elementName)
4170:     throws XMLStreamException
4171:   {
4172:     if (currentContentModel == null)
4173:       {
4174:         // root element
4175:         // VC: Root Element Type
4176:         if (!elementName.equals(doctype.rootName))
4177:           error("root element name must match name in DTD");
4178:         return;
4179:       }
4180:     // VC: Element Valid
4181:     switch (currentContentModel.type)
4182:       {
4183:       case ContentModel.EMPTY:
4184:         error("child element found in empty element", elementName);
4185:         break;
4186:       case ContentModel.ELEMENT:
4187:         LinkedList ctx = (LinkedList) validationStack.getLast();
4188:         ctx.add(elementName);
4189:         break;
4190:       case ContentModel.MIXED:
4191:         MixedContentModel mm = (MixedContentModel) currentContentModel;
4192:         if (!mm.containsName(elementName))
4193:           error("illegal element for content model", elementName);
4194:         break;
4195:       }
4196:   }
4197: 
4198:   /**
4199:    * Perform validation of an end-element event.
4200:    */
4201:   private void validateEndElement()
4202:     throws XMLStreamException
4203:   {
4204:     if (currentContentModel == null)
4205:       {
4206:         // root element
4207:         // VC: IDREF
4208:         if (!idrefs.containsAll(ids))
4209:           error("IDREF values must match the value of some ID attribute");
4210:         return;
4211:       }
4212:     // VC: Element Valid
4213:     switch (currentContentModel.type)
4214:       {
4215:       case ContentModel.ELEMENT:
4216:         LinkedList ctx = (LinkedList) validationStack.getLast();
4217:         ElementContentModel ecm = (ElementContentModel) currentContentModel;
4218:         validateElementContent(ecm, ctx);
4219:         break;
4220:       }
4221:   }
4222: 
4223:   /**
4224:    * Perform validation of character data.
4225:    */
4226:   private void validatePCData(String text)
4227:     throws XMLStreamException
4228:   {
4229:     // VC: Element Valid
4230:     switch (currentContentModel.type)
4231:       {
4232:       case ContentModel.EMPTY:
4233:         error("character data found in empty element", text);
4234:         break;
4235:       case ContentModel.ELEMENT:
4236:         boolean white = true;
4237:         int len = text.length();
4238:         for (int i = 0; i < len; i++)
4239:           {
4240:             char c = text.charAt(i);
4241:             if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4242:               {
4243:                 white = false;
4244:                 break;
4245:               }
4246:           }
4247:         if (!white)
4248:           error("character data found in element with element content", text);
4249:         else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4250:           // VC: Standalone Document Declaration
4251:           error("whitespace in element content of externally declared " +
4252:                 "element in standalone document");
4253:         break;
4254:       }
4255:   }
4256: 
4257:   /**
4258:    * Validates the specified validation context (list of child elements)
4259:    * against the element content model for the current element.
4260:    */
4261:   private void validateElementContent(ElementContentModel model,
4262:                                       LinkedList children)
4263:     throws XMLStreamException
4264:   {
4265:     // Use regular expression
4266:     StringBuffer buf = new StringBuffer();
4267:     for (Iterator i = children.iterator(); i.hasNext(); )
4268:       {
4269:         buf.append((String) i.next());
4270:         buf.append(' ');
4271:       }
4272:     String c = buf.toString();
4273:     String regex = createRegularExpression(model);
4274:     if (!c.matches(regex))
4275:       error("element content "+model.text+" does not match expression "+regex, c);
4276:   }
4277: 
4278:   /**
4279:    * Creates the regular expression used to validate an element content
4280:    * model.
4281:    */
4282:   private String createRegularExpression(ElementContentModel model)
4283:   {
4284:     if (model.regex == null)
4285:       {
4286:         StringBuffer buf = new StringBuffer();
4287:         buf.append('(');
4288:         for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4289:           {
4290:             ContentParticle cp = (ContentParticle) i.next();
4291:             if (cp.content instanceof String)
4292:               {
4293:                 buf.append('(');
4294:                 buf.append((String) cp.content);
4295:                 buf.append(' ');
4296:                 buf.append(')');
4297:                 if (cp.max == -1)
4298:                   {
4299:                     if (cp.min == 0)
4300:                       buf.append('*');
4301:                     else
4302:                       buf.append('+');
4303:                   }
4304:                 else if (cp.min == 0)
4305:                   buf.append('?');
4306:               }
4307:             else
4308:               {
4309:                 ElementContentModel ecm = (ElementContentModel) cp.content;
4310:                 buf.append(createRegularExpression(ecm));
4311:               }
4312:             if (model.or && i.hasNext())
4313:               buf.append('|');
4314:           }
4315:         buf.append(')');
4316:         if (model.max == -1)
4317:           {
4318:             if (model.min == 0)
4319:               buf.append('*');
4320:             else
4321:               buf.append('+');
4322:           }
4323:         else if (model.min == 0)
4324:           buf.append('?');
4325:         model.regex = buf.toString();
4326:       }
4327:     return model.regex;
4328:   }
4329: 
4330:   /**
4331:    * Performs validation of a document type declaration event.
4332:    */
4333:   void validateDoctype()
4334:     throws XMLStreamException
4335:   {
4336:     for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4337:       {
4338:         Map.Entry entry = (Map.Entry) i.next();
4339:         Object entity = entry.getValue();
4340:         if (entity instanceof ExternalIds)
4341:           {
4342:             ExternalIds ids = (ExternalIds) entity;
4343:             if (ids.notationName != null)
4344:               {
4345:                 // VC: Notation Declared
4346:                 ExternalIds notation = doctype.getNotation(ids.notationName);
4347:                 if (notation == null)
4348:                   error("Notation name must match the declared name of a " +
4349:                         "notation", ids.notationName);
4350:               }
4351:           }
4352:       }
4353:   }
4354: 
4355:   /**
4356:    * Simple test harness for reading an XML file.
4357:    * args[0] is the filename of the XML file
4358:    * If args[1] is "-x", enable XInclude processing
4359:    */
4360:   public static void main(String[] args)
4361:     throws Exception
4362:   {
4363:     boolean validating = false;
4364:     boolean namespaceAware = false;
4365:     boolean xIncludeAware = false;
4366:     int pos = 0;
4367:     while (pos < args.length && args[pos].startsWith("-"))
4368:       {
4369:         if ("-x".equals(args[pos]))
4370:           xIncludeAware = true;
4371:         else if ("-v".equals(args[pos]))
4372:           validating = true;
4373:         else if ("-n".equals(args[pos]))
4374:           namespaceAware = true;
4375:         pos++;
4376:       }
4377:     if (pos >= args.length)
4378:       {
4379:         System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4380:         System.out.println("\t-n: use namespace aware mode");
4381:         System.out.println("\t-v: use validating parser");
4382:         System.out.println("\t-x: use XInclude aware mode");
4383:         System.exit(2);
4384:       }
4385:     while (pos < args.length)
4386:       {
4387:         XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
4388:                                     absolutize(null, args[pos]),
4389:                                     validating, // validating
4390:                                     namespaceAware, // namespaceAware
4391:                                     true, // coalescing,
4392:                                     true, // replaceERefs
4393:                                     true, // externalEntities
4394:                                     true, // supportDTD
4395:                                     true, // baseAware
4396:                                     true, // stringInterning
4397:                                     true, // extendedEventTypes
4398:                                     null,
4399:                                     null);
4400:         XMLStreamReader reader = p;
4401:         if (xIncludeAware)
4402:           reader = new XIncludeFilter(p, args[pos], true, true, true);
4403:         try
4404:           {
4405:             int event;
4406:             //do
4407:             while (reader.hasNext())
4408:               {
4409:                 event = reader.next();
4410:                 Location loc = reader.getLocation();
4411:                 System.out.print(loc.getLineNumber() + ":" + 
4412:                                  loc.getColumnNumber() + " ");
4413:                 switch (event)
4414:                   {
4415:                   case XMLStreamConstants.START_DOCUMENT:
4416:                     System.out.println("START_DOCUMENT version=" +
4417:                                        reader.getVersion() +
4418:                                        " encoding=" +
4419:                                        reader.getEncoding());
4420:                     break;
4421:                   case XMLStreamConstants.END_DOCUMENT:
4422:                     System.out.println("END_DOCUMENT");
4423:                     break;
4424:                   case XMLStreamConstants.START_ELEMENT:
4425:                     System.out.println("START_ELEMENT " +
4426:                                        reader.getName());
4427:                     int l = reader.getNamespaceCount();
4428:                     for (int i = 0; i < l; i++)
4429:                       System.out.println("\tnamespace " +
4430:                                          reader.getNamespacePrefix(i) + "='" +
4431:                                          reader.getNamespaceURI(i)+"'");
4432:                     l = reader.getAttributeCount();
4433:                     for (int i = 0; i < l; i++)
4434:                       System.out.println("\tattribute " +
4435:                                          reader.getAttributeName(i) + "='" +
4436:                                          reader.getAttributeValue(i) + "'");
4437:                     break;
4438:                   case XMLStreamConstants.END_ELEMENT:
4439:                     System.out.println("END_ELEMENT " + reader.getName());
4440:                     break;
4441:                   case XMLStreamConstants.CHARACTERS:
4442:                     System.out.println("CHARACTERS '" +
4443:                                        encodeText(reader.getText()) + "'");
4444:                     break;
4445:                   case XMLStreamConstants.CDATA:
4446:                     System.out.println("CDATA '" +
4447:                                        encodeText(reader.getText()) + "'");
4448:                     break;
4449:                   case XMLStreamConstants.SPACE:
4450:                     System.out.println("SPACE '" +
4451:                                        encodeText(reader.getText()) + "'");
4452:                     break;
4453:                   case XMLStreamConstants.DTD:
4454:                     System.out.println("DTD " + reader.getText());
4455:                     break;
4456:                   case XMLStreamConstants.ENTITY_REFERENCE:
4457:                     System.out.println("ENTITY_REFERENCE " + reader.getText());
4458:                     break;
4459:                   case XMLStreamConstants.COMMENT:
4460:                     System.out.println("COMMENT '" +
4461:                                        encodeText(reader.getText()) + "'");
4462:                     break;
4463:                   case XMLStreamConstants.PROCESSING_INSTRUCTION:
4464:                     System.out.println("PROCESSING_INSTRUCTION " +
4465:                                        reader.getPITarget() + " " +
4466:                                        reader.getPIData());
4467:                     break;
4468:                   case START_ENTITY:
4469:                     System.out.println("START_ENTITY " + reader.getText());
4470:                     break;
4471:                   case END_ENTITY:
4472:                     System.out.println("END_ENTITY " + reader.getText());
4473:                     break;
4474:                   default:
4475:                     System.out.println("Unknown event: " + event);
4476:                   }
4477:               }
4478:           }
4479:         catch (XMLStreamException e)
4480:           {
4481:             Location l = reader.getLocation();
4482:             System.out.println("At line "+l.getLineNumber()+
4483:                                ", column "+l.getColumnNumber()+
4484:                                " of "+l.getSystemId());
4485:             throw e;
4486:           }
4487:         pos++;
4488:       }
4489:   }
4490: 
4491:   /**
4492:    * Escapes control characters in the specified text. For debugging.
4493:    */
4494:   private static String encodeText(String text)
4495:   {
4496:     StringBuffer b = new StringBuffer();
4497:     int len = text.length();
4498:     for (int i = 0; i < len; i++)
4499:       {
4500:         char c = text.charAt(i);
4501:         switch (c)
4502:           {
4503:           case '\t':
4504:             b.append("\\t");
4505:             break;
4506:           case '\n':
4507:             b.append("\\n");
4508:             break;
4509:           case '\r':
4510:             b.append("\\r");
4511:             break;
4512:           default:
4513:             b.append(c);
4514:           }
4515:       }
4516:     return b.toString();
4517:   }
4518: 
4519:   /**
4520:    * An attribute instance.
4521:    */
4522:   class Attribute
4523:   {
4524: 
4525:     /**
4526:      * Attribute name.
4527:      */
4528:     final String name;
4529: 
4530:     /**
4531:      * Attribute type as declared in the DTD, or CDATA otherwise.
4532:      */
4533:     final String type;
4534: 
4535:     /**
4536:      * Whether the attribute was specified or defaulted.
4537:      */
4538:     final boolean specified;
4539: 
4540:     /**
4541:      * The attribute value.
4542:      */
4543:     final String value;
4544: 
4545:     /**
4546:      * The namespace prefix.
4547:      */
4548:     final String prefix;
4549: 
4550:     /**
4551:      * The namespace local-name.
4552:      */
4553:     final String localName;
4554: 
4555:     Attribute(String name, String type, boolean specified, String value)
4556:     {
4557:       this.name = name;
4558:       this.type = type;
4559:       this.specified = specified;
4560:       this.value = value;
4561:       int ci = name.indexOf(':');
4562:       if (ci == -1)
4563:         {
4564:           prefix = null;
4565:           localName = intern(name);
4566:         }
4567:       else
4568:         {
4569:           prefix = intern(name.substring(0, ci));
4570:           localName = intern(name.substring(ci + 1));
4571:         }
4572:     }
4573: 
4574:     public boolean equals(Object other)
4575:     {
4576:       if (other instanceof Attribute)
4577:         {
4578:           Attribute a = (Attribute) other;
4579:           if (namespaceAware)
4580:             {
4581:               if (!a.localName.equals(localName))
4582:                 return false;
4583:               String auri = getNamespaceURI(a.prefix);
4584:               String uri = getNamespaceURI(prefix);
4585:               if (uri == null && (auri == null ||
4586:                                   (input.xml11 && "".equals(auri))))
4587:                return true; 
4588:               if (uri != null)
4589:                 {
4590:                   if ("".equals(uri) && input.xml11 && "".equals(auri))
4591:                     return true;
4592:                   return uri.equals(auri);
4593:                 }
4594:               return false;
4595:             }
4596:           else
4597:             return a.name.equals(name);
4598:         }
4599:       return false;
4600:     }
4601: 
4602:     public String toString()
4603:     {
4604:       StringBuffer buf = new StringBuffer(getClass().getName());
4605:       buf.append('[');
4606:       buf.append("name=");
4607:       buf.append(name);
4608:       if (value != null)
4609:         {
4610:           buf.append(",value=");
4611:           buf.append(value);
4612:         }
4613:       if (type != null)
4614:         {
4615:           buf.append(",type=");
4616:           buf.append(type);
4617:         }
4618:       if (specified)
4619:         buf.append(",specified");
4620:       buf.append(']');
4621:       return buf.toString();
4622:     }
4623:     
4624:   }
4625: 
4626:   /**
4627:    * Representation of a DTD.
4628:    */
4629:   class Doctype
4630:   {
4631: 
4632:     /**
4633:      * Name of the root element.
4634:      */
4635:     final String rootName;
4636: 
4637:     /**
4638:      * Public ID, if any, of external subset.
4639:      */
4640:     final String publicId;
4641: 
4642:     /**
4643:      * System ID (URL), if any, of external subset.
4644:      */
4645:     final String systemId;
4646: 
4647:     /**
4648:      * Map of element names to content models.
4649:      */
4650:     private final LinkedHashMap elements = new LinkedHashMap();
4651: 
4652:     /**
4653:      * Map of element names to maps of attribute declarations.
4654:      */
4655:     private final LinkedHashMap attlists = new LinkedHashMap();
4656: 
4657:     /**
4658:      * Map of entity names to entities (String or ExternalIds).
4659:      */
4660:     private final LinkedHashMap entities = new LinkedHashMap();
4661: 
4662:     /**
4663:      * Map of notation names to ExternalIds.
4664:      */
4665:     private final LinkedHashMap notations = new LinkedHashMap();
4666: 
4667:     /**
4668:      * Map of anonymous keys to comments.
4669:      */    
4670:     private final LinkedHashMap comments = new LinkedHashMap();
4671: 
4672:     /**
4673:      * Map of anonymous keys to processing instructions (String[2]
4674:      * containing {target, data}).
4675:      */
4676:     private final LinkedHashMap pis = new LinkedHashMap();
4677: 
4678:     /**
4679:      * List of keys to all markup entries in the DTD.
4680:      */
4681:     private final LinkedList entries = new LinkedList();
4682: 
4683:     /**
4684:      * Set of the entities defined in the external subset.
4685:      */
4686:     private final HashSet externalEntities = new HashSet();
4687: 
4688:     /**
4689:      * Set of the notations defined in the external subset.
4690:      */
4691:     private final HashSet externalNotations = new HashSet();
4692: 
4693:     /**
4694:      * Counter for making anonymous keys.
4695:      */
4696:     private int anon = 1;
4697: 
4698:     /**
4699:      * Constructor.
4700:      */
4701:     Doctype(String rootName, String publicId, String systemId)
4702:     {
4703:       this.rootName = rootName;
4704:       this.publicId = publicId;
4705:       this.systemId = systemId;
4706:     }
4707: 
4708:     /**
4709:      * Adds an element declaration.
4710:      * @param name the element name
4711:      * @param text the content model text
4712:      * @param model the parsed content model
4713:      */
4714:     void addElementDecl(String name, String text, ContentModel model)
4715:     {
4716:       if (elements.containsKey(name))
4717:         return;
4718:       model.text = text;
4719:       model.external = (inputStack.size() != 1);
4720:       elements.put(name, model);
4721:       entries.add("E" + name);
4722:     }
4723: 
4724:     /**
4725:      * Adds an attribute declaration.
4726:      * @param ename the element name
4727:      * @param aname the attribute name
4728:      * @param decl the attribute declaration details
4729:      */
4730:     void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4731:     {
4732:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4733:       if (attlist == null)
4734:         {
4735:           attlist = new LinkedHashMap();
4736:           attlists.put(ename, attlist);
4737:         }
4738:       else if (attlist.containsKey(aname))
4739:         return;
4740:       attlist.put(aname, decl);
4741:       String key = "A" + ename;
4742:       if (!entries.contains(key))
4743:         entries.add(key);
4744:     }
4745: 
4746:     /**
4747:      * Adds an entity declaration.
4748:      * @param name the entity name
4749:      * @param text the entity replacement text
4750:      * @param inExternalSubset if we are in the exernal subset
4751:      */
4752:     void addEntityDecl(String name, String text, boolean inExternalSubset)
4753:     {
4754:       if (entities.containsKey(name))
4755:         return;
4756:       entities.put(name, text);
4757:       entries.add("e" + name);
4758:       if (inExternalSubset)
4759:         externalEntities.add(name);
4760:     }
4761:     
4762:     /**
4763:      * Adds an entity declaration.
4764:      * @param name the entity name
4765:      * @param ids the external IDs
4766:      * @param inExternalSubset if we are in the exernal subset
4767:      */
4768:     void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4769:     {
4770:       if (entities.containsKey(name))
4771:         return;
4772:       entities.put(name, ids);
4773:       entries.add("e" + name);
4774:       if (inExternalSubset)
4775:         externalEntities.add(name);
4776:     }
4777: 
4778:     /**
4779:      * Adds a notation declaration.
4780:      * @param name the notation name
4781:      * @param ids the external IDs
4782:      * @param inExternalSubset if we are in the exernal subset
4783:      */
4784:     void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4785:     {
4786:       if (notations.containsKey(name))
4787:         return;
4788:       notations.put(name, ids);
4789:       entries.add("n" + name);
4790:       if (inExternalSubset)
4791:         externalNotations.add(name);
4792:     }
4793: 
4794:     /**
4795:      * Adds a comment.
4796:      */
4797:     void addComment(String text)
4798:     {
4799:       String key = Integer.toString(anon++);
4800:       comments.put(key, text);
4801:       entries.add("c" + key);
4802:     }
4803: 
4804:     /**
4805:      * Adds a processing instruction.
4806:      */
4807:     void addPI(String target, String data)
4808:     {
4809:       String key = Integer.toString(anon++);
4810:       pis.put(key, new String[] {target, data});
4811:       entries.add("p" + key);
4812:     }
4813: 
4814:     /**
4815:      * Returns the content model for the specified element.
4816:      * @param name the element name
4817:      */
4818:     ContentModel getElementModel(String name)
4819:     {
4820:       return (ContentModel) elements.get(name);
4821:     }
4822: 
4823:     /**
4824:      * Returns the attribute definition for the given attribute
4825:      * @param ename the element name
4826:      * @param aname the attribute name
4827:      */
4828:     AttributeDecl getAttributeDecl(String ename, String aname)
4829:     {
4830:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4831:       return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4832:     }
4833: 
4834:     /**
4835:      * Indicates whether the specified attribute was declared in the DTD.
4836:      * @param ename the element name
4837:      * @param aname the attribute name
4838:      */
4839:     boolean isAttributeDeclared(String ename, String aname)
4840:     {
4841:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4842:       return (attlist == null) ? false : attlist.containsKey(aname);
4843:     }
4844: 
4845:     /**
4846:      * Returns an iterator over the entries in the attribute list for the
4847:      * given element.
4848:      * @param ename the element name
4849:      */
4850:     Iterator attlistIterator(String ename)
4851:     {
4852:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4853:       return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4854:         attlist.entrySet().iterator();
4855:     }
4856: 
4857:     /**
4858:      * Returns the entity (String or ExternalIds) for the given entity name.
4859:      */
4860:     Object getEntity(String name)
4861:     {
4862:       return entities.get(name);
4863:     }
4864: 
4865:     /**
4866:      * Indicates whether the specified entity was declared in the external
4867:      * subset.
4868:      */
4869:     boolean isEntityExternal(String name)
4870:     {
4871:       return externalEntities.contains(name);
4872:     }
4873: 
4874:     /**
4875:      * Returns an iterator over the entity map entries.
4876:      */
4877:     Iterator entityIterator()
4878:     {
4879:       return entities.entrySet().iterator();
4880:     }
4881: 
4882:     /**
4883:      * Returns the notation IDs for the given notation name.
4884:      */
4885:     ExternalIds getNotation(String name)
4886:     {
4887:       return (ExternalIds) notations.get(name);
4888:     }
4889: 
4890:     /**
4891:      * Indicates whether the specified notation was declared in the external
4892:      * subset.
4893:      */
4894:     boolean isNotationExternal(String name)
4895:     {
4896:       return externalNotations.contains(name);
4897:     }
4898: 
4899:     /**
4900:      * Returns the comment associated with the specified (anonymous) key.
4901:      */
4902:     String getComment(String key)
4903:     {
4904:       return (String) comments.get(key);
4905:     }
4906: 
4907:     /**
4908:      * Returns the processing instruction associated with the specified
4909:      * (anonymous) key.
4910:      */
4911:     String[] getPI(String key)
4912:     {
4913:       return (String[]) pis.get(key);
4914:     }
4915: 
4916:     /**
4917:      * Returns an iterator over the keys of the markup entries in this DTD,
4918:      * in the order declared.
4919:      */
4920:     Iterator entryIterator()
4921:     {
4922:       return entries.iterator();
4923:     }
4924:     
4925:   }
4926: 
4927:   /**
4928:    * Combination of an ExternalID and an optional NDataDecl.
4929:    */
4930:   class ExternalIds
4931:   {
4932: 
4933:     /**
4934:      * The public ID.
4935:      */
4936:     String publicId;
4937: 
4938:     /**
4939:      * The system ID.
4940:      */
4941:     String systemId;
4942: 
4943:     /**
4944:      * The notation name declared with the NDATA keyword.
4945:      */
4946:     String notationName;
4947:   }
4948: 
4949:   /**
4950:    * A content model.
4951:    */
4952:   abstract class ContentModel
4953:   {
4954:     static final int EMPTY = 0;
4955:     static final int ANY = 1;
4956:     static final int ELEMENT = 2;
4957:     static final int MIXED = 3;
4958:     
4959:     int min;
4960:     int max;
4961:     final int type;
4962:     String text;
4963:     boolean external;
4964: 
4965:     ContentModel(int type)
4966:     {
4967:       this.type = type;
4968:       min = 1;
4969:       max = 1;
4970:     }
4971:     
4972:   }
4973: 
4974:   /**
4975:    * The EMPTY content model.
4976:    */
4977:   class EmptyContentModel
4978:     extends ContentModel
4979:   {
4980:     
4981:     EmptyContentModel()
4982:     {
4983:       super(ContentModel.EMPTY);
4984:       min = 0;
4985:       max = 0;
4986:     }
4987:     
4988:   }
4989: 
4990:   /**
4991:    * The ANY content model.
4992:    */
4993:   class AnyContentModel
4994:     extends ContentModel
4995:   {
4996:     
4997:     AnyContentModel()
4998:     {
4999:       super(ContentModel.ANY);
5000:       min = 0;
5001:       max = -1;
5002:     }
5003:     
5004:   }
5005: 
5006:   /**
5007:    * An element content model.
5008:    */
5009:   class ElementContentModel
5010:     extends ContentModel
5011:   {
5012: 
5013:     LinkedList contentParticles;
5014:     boolean or;
5015:     String regex; // regular expression cache
5016:     
5017:     ElementContentModel()
5018:     {
5019:       super(ContentModel.ELEMENT);
5020:       contentParticles = new LinkedList();
5021:     }
5022: 
5023:     void addContentParticle(ContentParticle cp)
5024:     {
5025:       contentParticles.add(cp);
5026:     }
5027:     
5028:   }
5029: 
5030:   class ContentParticle
5031:   {
5032: 
5033:     int min = 1;
5034:     int max = 1;
5035:     Object content; // Name (String) or ElementContentModel
5036:     
5037:   }
5038: 
5039:   /**
5040:    * A mixed content model.
5041:    */
5042:   class MixedContentModel
5043:     extends ContentModel
5044:   {
5045: 
5046:     private HashSet names;
5047:     
5048:     MixedContentModel()
5049:     {
5050:       super(ContentModel.MIXED);
5051:       names = new HashSet();
5052:     }
5053: 
5054:     void addName(String name)
5055:     {
5056:       names.add(name);
5057:     }
5058: 
5059:     boolean containsName(String name)
5060:     {
5061:       return names.contains(name);
5062:     }
5063:     
5064:   }
5065: 
5066:   /**
5067:    * An attribute definition.
5068:    */
5069:   class AttributeDecl
5070:   {
5071:     
5072:     /**
5073:      * The attribute type (CDATA, ID, etc).
5074:      */
5075:     final String type;
5076: 
5077:     /**
5078:      * The default value.
5079:      */
5080:     final String value;
5081: 
5082:     /**
5083:      * The value type (#FIXED, #IMPLIED, etc).
5084:      */
5085:     final int valueType;
5086: 
5087:     /**
5088:      * The enumeration text.
5089:      */
5090:     final String enumeration;
5091: 
5092:     /**
5093:      * The enumeration tokens.
5094:      */
5095:     final HashSet values;
5096: 
5097:     /**
5098:      * Whether this attribute declaration occurred in the external subset.
5099:      */
5100:     final boolean external;
5101: 
5102:     AttributeDecl(String type, String value,
5103:                   int valueType, String enumeration,
5104:                   HashSet values, boolean external)
5105:     {
5106:       this.type = type;
5107:       this.value = value;
5108:       this.valueType = valueType;
5109:       this.enumeration = enumeration;
5110:       this.values = values;
5111:       this.external = external;
5112:     }
5113:     
5114:   }
5115: 
5116:   /**
5117:    * An XML input source.
5118:    */
5119:   static class Input
5120:     implements Location
5121:   {
5122:     
5123:     int line = 1, markLine;
5124:     int column, markColumn;
5125:     int offset, markOffset;
5126:     final String publicId, systemId, name;
5127:     final boolean report; // report start- and end-entity
5128:     final boolean normalize; // normalize CR, etc to LF
5129:     
5130:     InputStream in;
5131:     Reader reader;
5132:     UnicodeReader unicodeReader;
5133:     boolean initialized;
5134:     boolean encodingDetected;
5135:     String inputEncoding;
5136:     boolean xml11;
5137: 
5138:     Input(InputStream in, Reader reader, String publicId, String systemId,
5139:           String name, String inputEncoding, boolean report,
5140:           boolean normalize)
5141:     {
5142:       if (inputEncoding == null)
5143:         inputEncoding = "UTF-8";
5144:       this.inputEncoding = inputEncoding;
5145:       this.publicId = publicId;
5146:       this.systemId = systemId;
5147:       this.name = name;
5148:       this.report = report;
5149:       this.normalize = normalize;
5150:       if (in != null)
5151:         {
5152:           if (reader != null)
5153:             throw new IllegalStateException("both byte and char streams "+
5154:                                             "specified");
5155:           if (normalize)
5156:             in = new CRLFInputStream(in);
5157:           in = new BufferedInputStream(in);
5158:           this.in = in;
5159:         }
5160:       else
5161:         {
5162:           this.reader = normalize ? new CRLFReader(reader) : reader;
5163:           unicodeReader = new UnicodeReader(this.reader);
5164:         }
5165:       initialized = false;
5166:     }
5167: 
5168:     // -- Location --
5169:     
5170:     public int getCharacterOffset()
5171:     {
5172:       return offset;
5173:     }
5174:     
5175:     public int getColumnNumber()
5176:     {
5177:       return column;
5178:     }
5179: 
5180:     public int getLineNumber()
5181:     {
5182:       return line;
5183:     }
5184: 
5185:     public String getPublicId()
5186:     {
5187:       return publicId;
5188:     }
5189: 
5190:     public String getSystemId()
5191:     {
5192:       return systemId;
5193:     }
5194: 
5195:     void init()
5196:       throws IOException
5197:     {
5198:       if (initialized)
5199:         return;
5200:       if (in != null)
5201:         detectEncoding();
5202:       initialized = true;
5203:     }
5204: 
5205:     void mark(int len)
5206:       throws IOException
5207:     {
5208:       markOffset = offset;
5209:       markLine = line;
5210:       markColumn = column;
5211:       if (unicodeReader != null)
5212:         unicodeReader.mark(len);
5213:       else
5214:         in.mark(len);
5215:     }
5216: 
5217:     /**
5218:      * Character read.
5219:      */
5220:     int read()
5221:       throws IOException
5222:     {
5223:       offset++;
5224:       int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5225:       if (normalize &&
5226:           (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5227:         {
5228:           // Normalize CR etc to LF
5229:           ret = 0x0a;
5230:         }
5231:       // Locator handling
5232:       if (ret == 0x0a)
5233:         {
5234:           line++;
5235:           column = 0;
5236:         }
5237:       else
5238:         column++;
5239:       return ret;
5240:     }
5241: 
5242:     /**
5243:      * Block read.
5244:      */
5245:     int read(int[] b, int off, int len)
5246:       throws IOException
5247:     {
5248:       int ret;
5249:       if (unicodeReader != null)
5250:         {
5251:           ret = unicodeReader.read(b, off, len);
5252:         }
5253:       else
5254:         {
5255:           byte[] b2 = new byte[len];
5256:           ret = in.read(b2, 0, len);
5257:           if (ret != -1)
5258:             {
5259:               String s = new String(b2, 0, ret, inputEncoding);
5260:               int[] c = UnicodeReader.toCodePointArray(s);
5261:               ret = c.length;
5262:               System.arraycopy(c, 0, b, off, ret);
5263:             }
5264:         }
5265:       if (ret != -1)
5266:         {
5267:           // Locator handling
5268:           for (int i = 0; i < ret; i++)
5269:             {
5270:               int c = b[off + i];
5271:               if (normalize &&
5272:                   (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5273:                 {
5274:                   // Normalize CR etc to LF
5275:                   c = 0x0a;
5276:                   b[off + i] = c;
5277:                 }
5278:               if (c == 0x0a)
5279:                 {
5280:                   line++;
5281:                   column = 0;
5282:                 }
5283:               else
5284:                 column++;
5285:             }
5286:         }
5287:       return ret;
5288:     }
5289: 
5290:     void reset()
5291:       throws IOException
5292:     {
5293:       if (unicodeReader != null)
5294:         unicodeReader.reset();
5295:       else
5296:         in.reset();
5297:       offset = markOffset;
5298:       line = markLine;
5299:       column = markColumn;
5300:     }
5301: 
5302:     // Detection of input encoding
5303:     
5304:     private static final int[] SIGNATURE_UCS_4_1234 =
5305:       new int[] { 0x00, 0x00, 0x00, 0x3c };
5306:     private static final int[] SIGNATURE_UCS_4_4321 =
5307:       new int[] { 0x3c, 0x00, 0x00, 0x00 };
5308:     private static final int[] SIGNATURE_UCS_4_2143 =
5309:       new int[] { 0x00, 0x00, 0x3c, 0x00 };
5310:     private static final int[] SIGNATURE_UCS_4_3412 =
5311:       new int[] { 0x00, 0x3c, 0x00, 0x00 };
5312:     private static final int[] SIGNATURE_UCS_2_12 =
5313:       new int[] { 0xfe, 0xff };
5314:     private static final int[] SIGNATURE_UCS_2_21 =
5315:       new int[] { 0xff, 0xfe };
5316:     private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5317:       new int[] { 0x00, 0x3c, 0x00, 0x3f };
5318:     private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5319:       new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5320:     private static final int[] SIGNATURE_UTF_8 =
5321:       new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5322:     private static final int[] SIGNATURE_UTF_8_BOM =
5323:       new int[] { 0xef, 0xbb, 0xbf };
5324:     
5325:     /**
5326:      * Detect the input encoding.
5327:      */
5328:     private void detectEncoding()
5329:       throws IOException
5330:     {
5331:       int[] signature = new int[4];
5332:       in.mark(4);
5333:       for (int i = 0; i < 4; i++)
5334:         signature[i] = in.read();
5335:       in.reset();
5336: 
5337:       // 4-byte encodings
5338:       if (equals(SIGNATURE_UCS_4_1234, signature))
5339:         {
5340:           in.read();
5341:           in.read();
5342:           in.read();
5343:           in.read();
5344:           setInputEncoding("UTF-32BE");
5345:           encodingDetected = true;
5346:         }
5347:       else if (equals(SIGNATURE_UCS_4_4321, signature))
5348:         {
5349:           in.read();
5350:           in.read();
5351:           in.read();
5352:           in.read();
5353:           setInputEncoding("UTF-32LE");
5354:           encodingDetected = true;
5355:         }
5356:       else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5357:                equals(SIGNATURE_UCS_4_3412, signature))
5358:         throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5359:       
5360:       // 2-byte encodings
5361:       else if (equals(SIGNATURE_UCS_2_12, signature))
5362:         {
5363:           in.read();
5364:           in.read();
5365:           setInputEncoding("UTF-16BE");
5366:           encodingDetected = true;
5367:         }
5368:       else if (equals(SIGNATURE_UCS_2_21, signature))
5369:         {
5370:           in.read();
5371:           in.read();
5372:           setInputEncoding("UTF-16LE");
5373:           encodingDetected = true;
5374:         }
5375:       else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5376:         {
5377:           //setInputEncoding("UTF-16BE");
5378:           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5379:         }
5380:       else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5381:         {
5382:           //setInputEncoding("UTF-16LE");
5383:           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5384:         }
5385:       // ASCII-derived encodings
5386:       else if (equals(SIGNATURE_UTF_8, signature))
5387:         {
5388:           // UTF-8 input encoding implied, TextDecl
5389:         }
5390:       else if (equals(SIGNATURE_UTF_8_BOM, signature))
5391:         {
5392:           in.read();
5393:           in.read();
5394:           in.read();
5395:           setInputEncoding("UTF-8");
5396:           encodingDetected = true;
5397:         }
5398:     }
5399: 
5400:     private static boolean equals(int[] b1, int[] b2)
5401:     {
5402:       for (int i = 0; i < b1.length; i++)
5403:         {
5404:           if (b1[i] != b2[i])
5405:             return false;
5406:         }
5407:       return true;
5408:     }
5409:     
5410:     void setInputEncoding(String encoding)
5411:       throws IOException
5412:     {
5413:       if (encoding.equals(inputEncoding))
5414:         return;
5415:       if ("UTF-16".equalsIgnoreCase(encoding) &&
5416:           inputEncoding.startsWith("UTF-16"))
5417:         return;
5418:       if (encodingDetected)
5419:         throw new UnsupportedEncodingException("document is not in its " +
5420:                                                "declared encoding " +
5421:                                                inputEncoding +
5422:                                                ": " + encoding);
5423:       inputEncoding = encoding;
5424:       finalizeEncoding();
5425:     }
5426: 
5427:     void finalizeEncoding()
5428:       throws IOException
5429:     {
5430:       if (reader != null)
5431:         return;
5432:       reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5433:       unicodeReader = new UnicodeReader(reader);
5434:       mark(1);
5435:     }
5436: 
5437:   }
5438: 
5439: }