Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.lang;
  40: 
  41: import gnu.java.lang.CharData;
  42: 
  43: import java.io.Serializable;
  44: import java.text.Collator;
  45: import java.util.Locale;
  46: 
  47: /**
  48:  * Wrapper class for the primitive char data type.  In addition, this class
  49:  * allows one to retrieve property information and perform transformations
  50:  * on the defined characters in the Unicode Standard, Version 4.0.0.
  51:  * java.lang.Character is designed to be very dynamic, and as such, it
  52:  * retrieves information on the Unicode character set from a separate
  53:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  54:  *
  55:  * <p>For predicates, boundaries are used to describe
  56:  * the set of characters for which the method will return true.
  57:  * This syntax uses fairly normal regular expression notation.
  58:  * See 5.13 of the Unicode Standard, Version 4.0, for the
  59:  * boundary specification.
  60:  *
  61:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  62:  * for more information on the Unicode Standard.
  63:  *
  64:  * @author Tom Tromey (tromey@cygnus.com)
  65:  * @author Paul N. Fisher
  66:  * @author Jochen Hoenicke
  67:  * @author Eric Blake (ebb9@email.byu.edu)
  68:  * @see CharData
  69:  * @since 1.0
  70:  * @status updated to 1.4
  71:  */
  72: public final class Character implements Serializable, Comparable
  73: {
  74:   /**
  75:    * A subset of Unicode blocks.
  76:    *
  77:    * @author Paul N. Fisher
  78:    * @author Eric Blake (ebb9@email.byu.edu)
  79:    * @since 1.2
  80:    */
  81:   public static class Subset
  82:   {
  83:     /** The name of the subset. */
  84:     private final String name;
  85: 
  86:     /**
  87:      * Construct a new subset of characters.
  88:      *
  89:      * @param name the name of the subset
  90:      * @throws NullPointerException if name is null
  91:      */
  92:     protected Subset(String name)
  93:     {
  94:       // Note that name.toString() is name, unless name was null.
  95:       this.name = name.toString();
  96:     }
  97: 
  98:     /**
  99:      * Compares two Subsets for equality. This is <code>final</code>, and
 100:      * restricts the comparison on the <code>==</code> operator, so it returns
 101:      * true only for the same object.
 102:      *
 103:      * @param o the object to compare
 104:      * @return true if o is this
 105:      */
 106:     public final boolean equals(Object o)
 107:     {
 108:       return o == this;
 109:     }
 110: 
 111:     /**
 112:      * Makes the original hashCode of Object final, to be consistent with
 113:      * equals.
 114:      *
 115:      * @return the hash code for this object
 116:      */
 117:     public final int hashCode()
 118:     {
 119:       return super.hashCode();
 120:     }
 121: 
 122:     /**
 123:      * Returns the name of the subset.
 124:      *
 125:      * @return the name
 126:      */
 127:     public final String toString()
 128:     {
 129:       return name;
 130:     }
 131:   } // class Subset
 132: 
 133:   /**
 134:    * A family of character subsets in the Unicode specification. A character
 135:    * is in at most one of these blocks.
 136:    *
 137:    * This inner class was generated automatically from
 138:    * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts.
 139:    * This Unicode definition file can be found on the
 140:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 141:    * JDK 1.5 uses Unicode version 4.0.0.
 142:    *
 143:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 144:    * @since 1.2
 145:    */
 146:   public static final class UnicodeBlock extends Subset
 147:   {
 148:     /** The start of the subset. */
 149:     private final int start;
 150: 
 151:     /** The end of the subset. */
 152:     private final int end;
 153: 
 154:     /** The canonical name of the block according to the Unicode standard. */
 155:     private final String canonicalName;
 156: 
 157:     /** Constants for the <code>forName()</code> method */
 158:     private static final int CANONICAL_NAME = 0;
 159:     private static final int NO_SPACES_NAME = 1;
 160:     private static final int CONSTANT_NAME = 2;
 161: 
 162:     /**
 163:      * Constructor for strictly defined blocks.
 164:      *
 165:      * @param start the start character of the range
 166:      * @param end the end character of the range
 167:      * @param name the block name
 168:      * @param canonicalName the name of the block as defined in the Unicode
 169:      *        standard.
 170:      */
 171:     private UnicodeBlock(int start, int end, String name,
 172:              String canonicalName)
 173:     {
 174:       super(name);
 175:       this.start = start;
 176:       this.end = end;
 177:       this.canonicalName = canonicalName;
 178:     }
 179: 
 180:     /**
 181:      * Returns the Unicode character block which a character belongs to.
 182:      * <strong>Note</strong>: This method does not support the use of
 183:      * supplementary characters.  For such support, <code>of(int)</code>
 184:      * should be used instead.
 185:      *
 186:      * @param ch the character to look up
 187:      * @return the set it belongs to, or null if it is not in one
 188:      */
 189:     public static UnicodeBlock of(char ch)
 190:     {
 191:       return of((int) ch);
 192:     }
 193: 
 194:     /**
 195:      * Returns the Unicode character block which a code point belongs to.
 196:      *
 197:      * @param codePoint the character to look up
 198:      * @return the set it belongs to, or null if it is not in one.
 199:      * @throws IllegalArgumentException if the specified code point is
 200:      *         invalid.
 201:      * @since 1.5
 202:      */
 203:     public static UnicodeBlock of(int codePoint)
 204:     {
 205:       if (codePoint > MAX_CODE_POINT)
 206:     throw new IllegalArgumentException("The supplied integer value is " +
 207:                        "too large to be a codepoint.");
 208:       // Simple binary search for the correct block.
 209:       int low = 0;
 210:       int hi = sets.length - 1;
 211:       while (low <= hi)
 212:         {
 213:           int mid = (low + hi) >> 1;
 214:           UnicodeBlock b = sets[mid];
 215:           if (codePoint < b.start)
 216:             hi = mid - 1;
 217:           else if (codePoint > b.end)
 218:             low = mid + 1;
 219:           else
 220:             return b;
 221:         }
 222:       return null;
 223:     }
 224: 
 225:     /**
 226:      * <p>
 227:      * Returns the <code>UnicodeBlock</code> with the given name, as defined
 228:      * by the Unicode standard.  The version of Unicode in use is defined by
 229:      * the <code>Character</code> class, and the names are given in the
 230:      * <code>Blocks-<version>.txt</code> file corresponding to that version.
 231:      * The name may be specified in one of three ways:
 232:      * </p>
 233:      * <ol>
 234:      * <li>The canonical, human-readable name used by the Unicode standard.
 235:      * This is the name with all spaces and hyphens retained.  For example,
 236:      * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 237:      * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 238:      * <li>The name used for the constants specified by this class, which
 239:      * is the canonical name with all spaces and hyphens replaced with
 240:      * underscores e.g. `BASIC_LATIN'</li>
 241:      * </ol>
 242:      * <p>
 243:      * The names are compared case-insensitively using the case comparison
 244:      * associated with the U.S. English locale.  The method recognises the
 245:      * previous names used for blocks as well as the current ones.  At
 246:      * present, this simply means that the deprecated `SURROGATES_AREA'
 247:      * will be recognised by this method (the <code>of()</code> methods
 248:      * only return one of the three new surrogate blocks).
 249:      * </p>
 250:      *
 251:      * @param blockName the name of the block to look up.
 252:      * @return the specified block.
 253:      * @throws NullPointerException if the <code>blockName</code> is
 254:      *         <code>null</code>.
 255:      * @throws IllegalArgumentException if the name does not match any Unicode
 256:      *         block.
 257:      * @since 1.5
 258:      */
 259:     public static final UnicodeBlock forName(String blockName)
 260:     {
 261:       int type;
 262:       if (blockName.indexOf(' ') != -1)
 263:         type = CANONICAL_NAME;
 264:       else if (blockName.indexOf('_') != -1)
 265:         type = CONSTANT_NAME;
 266:       else
 267:         type = NO_SPACES_NAME;
 268:       Collator usCollator = Collator.getInstance(Locale.US);
 269:       usCollator.setStrength(Collator.PRIMARY);
 270:       /* Special case for deprecated blocks not in sets */
 271:       switch (type)
 272:       {
 273:         case CANONICAL_NAME:
 274:           if (usCollator.compare(blockName, "Surrogates Area") == 0)
 275:             return SURROGATES_AREA;
 276:           break;
 277:         case NO_SPACES_NAME:
 278:           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 279:             return SURROGATES_AREA;
 280:           break;
 281:         case CONSTANT_NAME:
 282:           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 283:             return SURROGATES_AREA;
 284:           break;
 285:       }
 286:       /* Other cases */
 287:       int setLength = sets.length;
 288:       switch (type)
 289:       {
 290:         case CANONICAL_NAME:
 291:           for (int i = 0; i < setLength; i++)
 292:             {
 293:               UnicodeBlock block = sets[i];
 294:               if (usCollator.compare(blockName, block.canonicalName) == 0)
 295:                 return block;
 296:             }
 297:           break;
 298:         case NO_SPACES_NAME:
 299:           for (int i = 0; i < setLength; i++)
 300:             {
 301:               UnicodeBlock block = sets[i];
 302:               String nsName = block.canonicalName.replaceAll(" ","");
 303:               if (usCollator.compare(blockName, nsName) == 0)
 304:                 return block;
 305:             }        
 306:           break;
 307:         case CONSTANT_NAME:
 308:           for (int i = 0; i < setLength; i++)
 309:             {
 310:               UnicodeBlock block = sets[i];
 311:               if (usCollator.compare(blockName, block.toString()) == 0)
 312:                 return block;
 313:             }
 314:           break;
 315:       }
 316:       throw new IllegalArgumentException("No Unicode block found for " +
 317:                                          blockName + ".");
 318:     }
 319: 
 320:     /**
 321:      * Basic Latin.
 322:      * 0x0000 - 0x007F.
 323:      */
 324:     public static final UnicodeBlock BASIC_LATIN
 325:       = new UnicodeBlock(0x0000, 0x007F,
 326:                          "BASIC_LATIN", 
 327:                          "Basic Latin");
 328: 
 329:     /**
 330:      * Latin-1 Supplement.
 331:      * 0x0080 - 0x00FF.
 332:      */
 333:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 334:       = new UnicodeBlock(0x0080, 0x00FF,
 335:                          "LATIN_1_SUPPLEMENT", 
 336:                          "Latin-1 Supplement");
 337: 
 338:     /**
 339:      * Latin Extended-A.
 340:      * 0x0100 - 0x017F.
 341:      */
 342:     public static final UnicodeBlock LATIN_EXTENDED_A
 343:       = new UnicodeBlock(0x0100, 0x017F,
 344:                          "LATIN_EXTENDED_A", 
 345:                          "Latin Extended-A");
 346: 
 347:     /**
 348:      * Latin Extended-B.
 349:      * 0x0180 - 0x024F.
 350:      */
 351:     public static final UnicodeBlock LATIN_EXTENDED_B
 352:       = new UnicodeBlock(0x0180, 0x024F,
 353:                          "LATIN_EXTENDED_B", 
 354:                          "Latin Extended-B");
 355: 
 356:     /**
 357:      * IPA Extensions.
 358:      * 0x0250 - 0x02AF.
 359:      */
 360:     public static final UnicodeBlock IPA_EXTENSIONS
 361:       = new UnicodeBlock(0x0250, 0x02AF,
 362:                          "IPA_EXTENSIONS", 
 363:                          "IPA Extensions");
 364: 
 365:     /**
 366:      * Spacing Modifier Letters.
 367:      * 0x02B0 - 0x02FF.
 368:      */
 369:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 370:       = new UnicodeBlock(0x02B0, 0x02FF,
 371:                          "SPACING_MODIFIER_LETTERS", 
 372:                          "Spacing Modifier Letters");
 373: 
 374:     /**
 375:      * Combining Diacritical Marks.
 376:      * 0x0300 - 0x036F.
 377:      */
 378:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 379:       = new UnicodeBlock(0x0300, 0x036F,
 380:                          "COMBINING_DIACRITICAL_MARKS", 
 381:                          "Combining Diacritical Marks");
 382: 
 383:     /**
 384:      * Greek.
 385:      * 0x0370 - 0x03FF.
 386:      */
 387:     public static final UnicodeBlock GREEK
 388:       = new UnicodeBlock(0x0370, 0x03FF,
 389:                          "GREEK", 
 390:                          "Greek");
 391: 
 392:     /**
 393:      * Cyrillic.
 394:      * 0x0400 - 0x04FF.
 395:      */
 396:     public static final UnicodeBlock CYRILLIC
 397:       = new UnicodeBlock(0x0400, 0x04FF,
 398:                          "CYRILLIC", 
 399:                          "Cyrillic");
 400: 
 401:     /**
 402:      * Cyrillic Supplementary.
 403:      * 0x0500 - 0x052F.
 404:      * @since 1.5
 405:      */
 406:     public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 407:       = new UnicodeBlock(0x0500, 0x052F,
 408:                          "CYRILLIC_SUPPLEMENTARY", 
 409:                          "Cyrillic Supplementary");
 410: 
 411:     /**
 412:      * Armenian.
 413:      * 0x0530 - 0x058F.
 414:      */
 415:     public static final UnicodeBlock ARMENIAN
 416:       = new UnicodeBlock(0x0530, 0x058F,
 417:                          "ARMENIAN", 
 418:                          "Armenian");
 419: 
 420:     /**
 421:      * Hebrew.
 422:      * 0x0590 - 0x05FF.
 423:      */
 424:     public static final UnicodeBlock HEBREW
 425:       = new UnicodeBlock(0x0590, 0x05FF,
 426:                          "HEBREW", 
 427:                          "Hebrew");
 428: 
 429:     /**
 430:      * Arabic.
 431:      * 0x0600 - 0x06FF.
 432:      */
 433:     public static final UnicodeBlock ARABIC
 434:       = new UnicodeBlock(0x0600, 0x06FF,
 435:                          "ARABIC", 
 436:                          "Arabic");
 437: 
 438:     /**
 439:      * Syriac.
 440:      * 0x0700 - 0x074F.
 441:      * @since 1.4
 442:      */
 443:     public static final UnicodeBlock SYRIAC
 444:       = new UnicodeBlock(0x0700, 0x074F,
 445:                          "SYRIAC", 
 446:                          "Syriac");
 447: 
 448:     /**
 449:      * Thaana.
 450:      * 0x0780 - 0x07BF.
 451:      * @since 1.4
 452:      */
 453:     public static final UnicodeBlock THAANA
 454:       = new UnicodeBlock(0x0780, 0x07BF,
 455:                          "THAANA", 
 456:                          "Thaana");
 457: 
 458:     /**
 459:      * Devanagari.
 460:      * 0x0900 - 0x097F.
 461:      */
 462:     public static final UnicodeBlock DEVANAGARI
 463:       = new UnicodeBlock(0x0900, 0x097F,
 464:                          "DEVANAGARI", 
 465:                          "Devanagari");
 466: 
 467:     /**
 468:      * Bengali.
 469:      * 0x0980 - 0x09FF.
 470:      */
 471:     public static final UnicodeBlock BENGALI
 472:       = new UnicodeBlock(0x0980, 0x09FF,
 473:                          "BENGALI", 
 474:                          "Bengali");
 475: 
 476:     /**
 477:      * Gurmukhi.
 478:      * 0x0A00 - 0x0A7F.
 479:      */
 480:     public static final UnicodeBlock GURMUKHI
 481:       = new UnicodeBlock(0x0A00, 0x0A7F,
 482:                          "GURMUKHI", 
 483:                          "Gurmukhi");
 484: 
 485:     /**
 486:      * Gujarati.
 487:      * 0x0A80 - 0x0AFF.
 488:      */
 489:     public static final UnicodeBlock GUJARATI
 490:       = new UnicodeBlock(0x0A80, 0x0AFF,
 491:                          "GUJARATI", 
 492:                          "Gujarati");
 493: 
 494:     /**
 495:      * Oriya.
 496:      * 0x0B00 - 0x0B7F.
 497:      */
 498:     public static final UnicodeBlock ORIYA
 499:       = new UnicodeBlock(0x0B00, 0x0B7F,
 500:                          "ORIYA", 
 501:                          "Oriya");
 502: 
 503:     /**
 504:      * Tamil.
 505:      * 0x0B80 - 0x0BFF.
 506:      */
 507:     public static final UnicodeBlock TAMIL
 508:       = new UnicodeBlock(0x0B80, 0x0BFF,
 509:                          "TAMIL", 
 510:                          "Tamil");
 511: 
 512:     /**
 513:      * Telugu.
 514:      * 0x0C00 - 0x0C7F.
 515:      */
 516:     public static final UnicodeBlock TELUGU
 517:       = new UnicodeBlock(0x0C00, 0x0C7F,
 518:                          "TELUGU", 
 519:                          "Telugu");
 520: 
 521:     /**
 522:      * Kannada.
 523:      * 0x0C80 - 0x0CFF.
 524:      */
 525:     public static final UnicodeBlock KANNADA
 526:       = new UnicodeBlock(0x0C80, 0x0CFF,
 527:                          "KANNADA", 
 528:                          "Kannada");
 529: 
 530:     /**
 531:      * Malayalam.
 532:      * 0x0D00 - 0x0D7F.
 533:      */
 534:     public static final UnicodeBlock MALAYALAM
 535:       = new UnicodeBlock(0x0D00, 0x0D7F,
 536:                          "MALAYALAM", 
 537:                          "Malayalam");
 538: 
 539:     /**
 540:      * Sinhala.
 541:      * 0x0D80 - 0x0DFF.
 542:      * @since 1.4
 543:      */
 544:     public static final UnicodeBlock SINHALA
 545:       = new UnicodeBlock(0x0D80, 0x0DFF,
 546:                          "SINHALA", 
 547:                          "Sinhala");
 548: 
 549:     /**
 550:      * Thai.
 551:      * 0x0E00 - 0x0E7F.
 552:      */
 553:     public static final UnicodeBlock THAI
 554:       = new UnicodeBlock(0x0E00, 0x0E7F,
 555:                          "THAI", 
 556:                          "Thai");
 557: 
 558:     /**
 559:      * Lao.
 560:      * 0x0E80 - 0x0EFF.
 561:      */
 562:     public static final UnicodeBlock LAO
 563:       = new UnicodeBlock(0x0E80, 0x0EFF,
 564:                          "LAO", 
 565:                          "Lao");
 566: 
 567:     /**
 568:      * Tibetan.
 569:      * 0x0F00 - 0x0FFF.
 570:      */
 571:     public static final UnicodeBlock TIBETAN
 572:       = new UnicodeBlock(0x0F00, 0x0FFF,
 573:                          "TIBETAN", 
 574:                          "Tibetan");
 575: 
 576:     /**
 577:      * Myanmar.
 578:      * 0x1000 - 0x109F.
 579:      * @since 1.4
 580:      */
 581:     public static final UnicodeBlock MYANMAR
 582:       = new UnicodeBlock(0x1000, 0x109F,
 583:                          "MYANMAR", 
 584:                          "Myanmar");
 585: 
 586:     /**
 587:      * Georgian.
 588:      * 0x10A0 - 0x10FF.
 589:      */
 590:     public static final UnicodeBlock GEORGIAN
 591:       = new UnicodeBlock(0x10A0, 0x10FF,
 592:                          "GEORGIAN", 
 593:                          "Georgian");
 594: 
 595:     /**
 596:      * Hangul Jamo.
 597:      * 0x1100 - 0x11FF.
 598:      */
 599:     public static final UnicodeBlock HANGUL_JAMO
 600:       = new UnicodeBlock(0x1100, 0x11FF,
 601:                          "HANGUL_JAMO", 
 602:                          "Hangul Jamo");
 603: 
 604:     /**
 605:      * Ethiopic.
 606:      * 0x1200 - 0x137F.
 607:      * @since 1.4
 608:      */
 609:     public static final UnicodeBlock ETHIOPIC
 610:       = new UnicodeBlock(0x1200, 0x137F,
 611:                          "ETHIOPIC", 
 612:                          "Ethiopic");
 613: 
 614:     /**
 615:      * Cherokee.
 616:      * 0x13A0 - 0x13FF.
 617:      * @since 1.4
 618:      */
 619:     public static final UnicodeBlock CHEROKEE
 620:       = new UnicodeBlock(0x13A0, 0x13FF,
 621:                          "CHEROKEE", 
 622:                          "Cherokee");
 623: 
 624:     /**
 625:      * Unified Canadian Aboriginal Syllabics.
 626:      * 0x1400 - 0x167F.
 627:      * @since 1.4
 628:      */
 629:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 630:       = new UnicodeBlock(0x1400, 0x167F,
 631:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 632:                          "Unified Canadian Aboriginal Syllabics");
 633: 
 634:     /**
 635:      * Ogham.
 636:      * 0x1680 - 0x169F.
 637:      * @since 1.4
 638:      */
 639:     public static final UnicodeBlock OGHAM
 640:       = new UnicodeBlock(0x1680, 0x169F,
 641:                          "OGHAM", 
 642:                          "Ogham");
 643: 
 644:     /**
 645:      * Runic.
 646:      * 0x16A0 - 0x16FF.
 647:      * @since 1.4
 648:      */
 649:     public static final UnicodeBlock RUNIC
 650:       = new UnicodeBlock(0x16A0, 0x16FF,
 651:                          "RUNIC", 
 652:                          "Runic");
 653: 
 654:     /**
 655:      * Tagalog.
 656:      * 0x1700 - 0x171F.
 657:      * @since 1.5
 658:      */
 659:     public static final UnicodeBlock TAGALOG
 660:       = new UnicodeBlock(0x1700, 0x171F,
 661:                          "TAGALOG", 
 662:                          "Tagalog");
 663: 
 664:     /**
 665:      * Hanunoo.
 666:      * 0x1720 - 0x173F.
 667:      * @since 1.5
 668:      */
 669:     public static final UnicodeBlock HANUNOO
 670:       = new UnicodeBlock(0x1720, 0x173F,
 671:                          "HANUNOO", 
 672:                          "Hanunoo");
 673: 
 674:     /**
 675:      * Buhid.
 676:      * 0x1740 - 0x175F.
 677:      * @since 1.5
 678:      */
 679:     public static final UnicodeBlock BUHID
 680:       = new UnicodeBlock(0x1740, 0x175F,
 681:                          "BUHID", 
 682:                          "Buhid");
 683: 
 684:     /**
 685:      * Tagbanwa.
 686:      * 0x1760 - 0x177F.
 687:      * @since 1.5
 688:      */
 689:     public static final UnicodeBlock TAGBANWA
 690:       = new UnicodeBlock(0x1760, 0x177F,
 691:                          "TAGBANWA", 
 692:                          "Tagbanwa");
 693: 
 694:     /**
 695:      * Khmer.
 696:      * 0x1780 - 0x17FF.
 697:      * @since 1.4
 698:      */
 699:     public static final UnicodeBlock KHMER
 700:       = new UnicodeBlock(0x1780, 0x17FF,
 701:                          "KHMER", 
 702:                          "Khmer");
 703: 
 704:     /**
 705:      * Mongolian.
 706:      * 0x1800 - 0x18AF.
 707:      * @since 1.4
 708:      */
 709:     public static final UnicodeBlock MONGOLIAN
 710:       = new UnicodeBlock(0x1800, 0x18AF,
 711:                          "MONGOLIAN", 
 712:                          "Mongolian");
 713: 
 714:     /**
 715:      * Limbu.
 716:      * 0x1900 - 0x194F.
 717:      * @since 1.5
 718:      */
 719:     public static final UnicodeBlock LIMBU
 720:       = new UnicodeBlock(0x1900, 0x194F,
 721:                          "LIMBU", 
 722:                          "Limbu");
 723: 
 724:     /**
 725:      * Tai Le.
 726:      * 0x1950 - 0x197F.
 727:      * @since 1.5
 728:      */
 729:     public static final UnicodeBlock TAI_LE
 730:       = new UnicodeBlock(0x1950, 0x197F,
 731:                          "TAI_LE", 
 732:                          "Tai Le");
 733: 
 734:     /**
 735:      * Khmer Symbols.
 736:      * 0x19E0 - 0x19FF.
 737:      * @since 1.5
 738:      */
 739:     public static final UnicodeBlock KHMER_SYMBOLS
 740:       = new UnicodeBlock(0x19E0, 0x19FF,
 741:                          "KHMER_SYMBOLS", 
 742:                          "Khmer Symbols");
 743: 
 744:     /**
 745:      * Phonetic Extensions.
 746:      * 0x1D00 - 0x1D7F.
 747:      * @since 1.5
 748:      */
 749:     public static final UnicodeBlock PHONETIC_EXTENSIONS
 750:       = new UnicodeBlock(0x1D00, 0x1D7F,
 751:                          "PHONETIC_EXTENSIONS", 
 752:                          "Phonetic Extensions");
 753: 
 754:     /**
 755:      * Latin Extended Additional.
 756:      * 0x1E00 - 0x1EFF.
 757:      */
 758:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 759:       = new UnicodeBlock(0x1E00, 0x1EFF,
 760:                          "LATIN_EXTENDED_ADDITIONAL", 
 761:                          "Latin Extended Additional");
 762: 
 763:     /**
 764:      * Greek Extended.
 765:      * 0x1F00 - 0x1FFF.
 766:      */
 767:     public static final UnicodeBlock GREEK_EXTENDED
 768:       = new UnicodeBlock(0x1F00, 0x1FFF,
 769:                          "GREEK_EXTENDED", 
 770:                          "Greek Extended");
 771: 
 772:     /**
 773:      * General Punctuation.
 774:      * 0x2000 - 0x206F.
 775:      */
 776:     public static final UnicodeBlock GENERAL_PUNCTUATION
 777:       = new UnicodeBlock(0x2000, 0x206F,
 778:                          "GENERAL_PUNCTUATION", 
 779:                          "General Punctuation");
 780: 
 781:     /**
 782:      * Superscripts and Subscripts.
 783:      * 0x2070 - 0x209F.
 784:      */
 785:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 786:       = new UnicodeBlock(0x2070, 0x209F,
 787:                          "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 788:                          "Superscripts and Subscripts");
 789: 
 790:     /**
 791:      * Currency Symbols.
 792:      * 0x20A0 - 0x20CF.
 793:      */
 794:     public static final UnicodeBlock CURRENCY_SYMBOLS
 795:       = new UnicodeBlock(0x20A0, 0x20CF,
 796:                          "CURRENCY_SYMBOLS", 
 797:                          "Currency Symbols");
 798: 
 799:     /**
 800:      * Combining Marks for Symbols.
 801:      * 0x20D0 - 0x20FF.
 802:      */
 803:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 804:       = new UnicodeBlock(0x20D0, 0x20FF,
 805:                          "COMBINING_MARKS_FOR_SYMBOLS", 
 806:                          "Combining Marks for Symbols");
 807: 
 808:     /**
 809:      * Letterlike Symbols.
 810:      * 0x2100 - 0x214F.
 811:      */
 812:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 813:       = new UnicodeBlock(0x2100, 0x214F,
 814:                          "LETTERLIKE_SYMBOLS", 
 815:                          "Letterlike Symbols");
 816: 
 817:     /**
 818:      * Number Forms.
 819:      * 0x2150 - 0x218F.
 820:      */
 821:     public static final UnicodeBlock NUMBER_FORMS
 822:       = new UnicodeBlock(0x2150, 0x218F,
 823:                          "NUMBER_FORMS", 
 824:                          "Number Forms");
 825: 
 826:     /**
 827:      * Arrows.
 828:      * 0x2190 - 0x21FF.
 829:      */
 830:     public static final UnicodeBlock ARROWS
 831:       = new UnicodeBlock(0x2190, 0x21FF,
 832:                          "ARROWS", 
 833:                          "Arrows");
 834: 
 835:     /**
 836:      * Mathematical Operators.
 837:      * 0x2200 - 0x22FF.
 838:      */
 839:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 840:       = new UnicodeBlock(0x2200, 0x22FF,
 841:                          "MATHEMATICAL_OPERATORS", 
 842:                          "Mathematical Operators");
 843: 
 844:     /**
 845:      * Miscellaneous Technical.
 846:      * 0x2300 - 0x23FF.
 847:      */
 848:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 849:       = new UnicodeBlock(0x2300, 0x23FF,
 850:                          "MISCELLANEOUS_TECHNICAL", 
 851:                          "Miscellaneous Technical");
 852: 
 853:     /**
 854:      * Control Pictures.
 855:      * 0x2400 - 0x243F.
 856:      */
 857:     public static final UnicodeBlock CONTROL_PICTURES
 858:       = new UnicodeBlock(0x2400, 0x243F,
 859:                          "CONTROL_PICTURES", 
 860:                          "Control Pictures");
 861: 
 862:     /**
 863:      * Optical Character Recognition.
 864:      * 0x2440 - 0x245F.
 865:      */
 866:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 867:       = new UnicodeBlock(0x2440, 0x245F,
 868:                          "OPTICAL_CHARACTER_RECOGNITION", 
 869:                          "Optical Character Recognition");
 870: 
 871:     /**
 872:      * Enclosed Alphanumerics.
 873:      * 0x2460 - 0x24FF.
 874:      */
 875:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 876:       = new UnicodeBlock(0x2460, 0x24FF,
 877:                          "ENCLOSED_ALPHANUMERICS", 
 878:                          "Enclosed Alphanumerics");
 879: 
 880:     /**
 881:      * Box Drawing.
 882:      * 0x2500 - 0x257F.
 883:      */
 884:     public static final UnicodeBlock BOX_DRAWING
 885:       = new UnicodeBlock(0x2500, 0x257F,
 886:                          "BOX_DRAWING", 
 887:                          "Box Drawing");
 888: 
 889:     /**
 890:      * Block Elements.
 891:      * 0x2580 - 0x259F.
 892:      */
 893:     public static final UnicodeBlock BLOCK_ELEMENTS
 894:       = new UnicodeBlock(0x2580, 0x259F,
 895:                          "BLOCK_ELEMENTS", 
 896:                          "Block Elements");
 897: 
 898:     /**
 899:      * Geometric Shapes.
 900:      * 0x25A0 - 0x25FF.
 901:      */
 902:     public static final UnicodeBlock GEOMETRIC_SHAPES
 903:       = new UnicodeBlock(0x25A0, 0x25FF,
 904:                          "GEOMETRIC_SHAPES", 
 905:                          "Geometric Shapes");
 906: 
 907:     /**
 908:      * Miscellaneous Symbols.
 909:      * 0x2600 - 0x26FF.
 910:      */
 911:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 912:       = new UnicodeBlock(0x2600, 0x26FF,
 913:                          "MISCELLANEOUS_SYMBOLS", 
 914:                          "Miscellaneous Symbols");
 915: 
 916:     /**
 917:      * Dingbats.
 918:      * 0x2700 - 0x27BF.
 919:      */
 920:     public static final UnicodeBlock DINGBATS
 921:       = new UnicodeBlock(0x2700, 0x27BF,
 922:                          "DINGBATS", 
 923:                          "Dingbats");
 924: 
 925:     /**
 926:      * Miscellaneous Mathematical Symbols-A.
 927:      * 0x27C0 - 0x27EF.
 928:      * @since 1.5
 929:      */
 930:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
 931:       = new UnicodeBlock(0x27C0, 0x27EF,
 932:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
 933:                          "Miscellaneous Mathematical Symbols-A");
 934: 
 935:     /**
 936:      * Supplemental Arrows-A.
 937:      * 0x27F0 - 0x27FF.
 938:      * @since 1.5
 939:      */
 940:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
 941:       = new UnicodeBlock(0x27F0, 0x27FF,
 942:                          "SUPPLEMENTAL_ARROWS_A", 
 943:                          "Supplemental Arrows-A");
 944: 
 945:     /**
 946:      * Braille Patterns.
 947:      * 0x2800 - 0x28FF.
 948:      * @since 1.4
 949:      */
 950:     public static final UnicodeBlock BRAILLE_PATTERNS
 951:       = new UnicodeBlock(0x2800, 0x28FF,
 952:                          "BRAILLE_PATTERNS", 
 953:                          "Braille Patterns");
 954: 
 955:     /**
 956:      * Supplemental Arrows-B.
 957:      * 0x2900 - 0x297F.
 958:      * @since 1.5
 959:      */
 960:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
 961:       = new UnicodeBlock(0x2900, 0x297F,
 962:                          "SUPPLEMENTAL_ARROWS_B", 
 963:                          "Supplemental Arrows-B");
 964: 
 965:     /**
 966:      * Miscellaneous Mathematical Symbols-B.
 967:      * 0x2980 - 0x29FF.
 968:      * @since 1.5
 969:      */
 970:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 971:       = new UnicodeBlock(0x2980, 0x29FF,
 972:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
 973:                          "Miscellaneous Mathematical Symbols-B");
 974: 
 975:     /**
 976:      * Supplemental Mathematical Operators.
 977:      * 0x2A00 - 0x2AFF.
 978:      * @since 1.5
 979:      */
 980:     public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
 981:       = new UnicodeBlock(0x2A00, 0x2AFF,
 982:                          "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
 983:                          "Supplemental Mathematical Operators");
 984: 
 985:     /**
 986:      * Miscellaneous Symbols and Arrows.
 987:      * 0x2B00 - 0x2BFF.
 988:      * @since 1.5
 989:      */
 990:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
 991:       = new UnicodeBlock(0x2B00, 0x2BFF,
 992:                          "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 
 993:                          "Miscellaneous Symbols and Arrows");
 994: 
 995:     /**
 996:      * CJK Radicals Supplement.
 997:      * 0x2E80 - 0x2EFF.
 998:      * @since 1.4
 999:      */
1000:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1001:       = new UnicodeBlock(0x2E80, 0x2EFF,
1002:                          "CJK_RADICALS_SUPPLEMENT", 
1003:                          "CJK Radicals Supplement");
1004: 
1005:     /**
1006:      * Kangxi Radicals.
1007:      * 0x2F00 - 0x2FDF.
1008:      * @since 1.4
1009:      */
1010:     public static final UnicodeBlock KANGXI_RADICALS
1011:       = new UnicodeBlock(0x2F00, 0x2FDF,
1012:                          "KANGXI_RADICALS", 
1013:                          "Kangxi Radicals");
1014: 
1015:     /**
1016:      * Ideographic Description Characters.
1017:      * 0x2FF0 - 0x2FFF.
1018:      * @since 1.4
1019:      */
1020:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1021:       = new UnicodeBlock(0x2FF0, 0x2FFF,
1022:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 
1023:                          "Ideographic Description Characters");
1024: 
1025:     /**
1026:      * CJK Symbols and Punctuation.
1027:      * 0x3000 - 0x303F.
1028:      */
1029:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1030:       = new UnicodeBlock(0x3000, 0x303F,
1031:                          "CJK_SYMBOLS_AND_PUNCTUATION", 
1032:                          "CJK Symbols and Punctuation");
1033: 
1034:     /**
1035:      * Hiragana.
1036:      * 0x3040 - 0x309F.
1037:      */
1038:     public static final UnicodeBlock HIRAGANA
1039:       = new UnicodeBlock(0x3040, 0x309F,
1040:                          "HIRAGANA", 
1041:                          "Hiragana");
1042: 
1043:     /**
1044:      * Katakana.
1045:      * 0x30A0 - 0x30FF.
1046:      */
1047:     public static final UnicodeBlock KATAKANA
1048:       = new UnicodeBlock(0x30A0, 0x30FF,
1049:                          "KATAKANA", 
1050:                          "Katakana");
1051: 
1052:     /**
1053:      * Bopomofo.
1054:      * 0x3100 - 0x312F.
1055:      */
1056:     public static final UnicodeBlock BOPOMOFO
1057:       = new UnicodeBlock(0x3100, 0x312F,
1058:                          "BOPOMOFO", 
1059:                          "Bopomofo");
1060: 
1061:     /**
1062:      * Hangul Compatibility Jamo.
1063:      * 0x3130 - 0x318F.
1064:      */
1065:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1066:       = new UnicodeBlock(0x3130, 0x318F,
1067:                          "HANGUL_COMPATIBILITY_JAMO", 
1068:                          "Hangul Compatibility Jamo");
1069: 
1070:     /**
1071:      * Kanbun.
1072:      * 0x3190 - 0x319F.
1073:      */
1074:     public static final UnicodeBlock KANBUN
1075:       = new UnicodeBlock(0x3190, 0x319F,
1076:                          "KANBUN", 
1077:                          "Kanbun");
1078: 
1079:     /**
1080:      * Bopomofo Extended.
1081:      * 0x31A0 - 0x31BF.
1082:      * @since 1.4
1083:      */
1084:     public static final UnicodeBlock BOPOMOFO_EXTENDED
1085:       = new UnicodeBlock(0x31A0, 0x31BF,
1086:                          "BOPOMOFO_EXTENDED", 
1087:                          "Bopomofo Extended");
1088: 
1089:     /**
1090:      * Katakana Phonetic Extensions.
1091:      * 0x31F0 - 0x31FF.
1092:      * @since 1.5
1093:      */
1094:     public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1095:       = new UnicodeBlock(0x31F0, 0x31FF,
1096:                          "KATAKANA_PHONETIC_EXTENSIONS", 
1097:                          "Katakana Phonetic Extensions");
1098: 
1099:     /**
1100:      * Enclosed CJK Letters and Months.
1101:      * 0x3200 - 0x32FF.
1102:      */
1103:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1104:       = new UnicodeBlock(0x3200, 0x32FF,
1105:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS", 
1106:                          "Enclosed CJK Letters and Months");
1107: 
1108:     /**
1109:      * CJK Compatibility.
1110:      * 0x3300 - 0x33FF.
1111:      */
1112:     public static final UnicodeBlock CJK_COMPATIBILITY
1113:       = new UnicodeBlock(0x3300, 0x33FF,
1114:                          "CJK_COMPATIBILITY", 
1115:                          "CJK Compatibility");
1116: 
1117:     /**
1118:      * CJK Unified Ideographs Extension A.
1119:      * 0x3400 - 0x4DBF.
1120:      * @since 1.4
1121:      */
1122:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1123:       = new UnicodeBlock(0x3400, 0x4DBF,
1124:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 
1125:                          "CJK Unified Ideographs Extension A");
1126: 
1127:     /**
1128:      * Yijing Hexagram Symbols.
1129:      * 0x4DC0 - 0x4DFF.
1130:      * @since 1.5
1131:      */
1132:     public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1133:       = new UnicodeBlock(0x4DC0, 0x4DFF,
1134:                          "YIJING_HEXAGRAM_SYMBOLS", 
1135:                          "Yijing Hexagram Symbols");
1136: 
1137:     /**
1138:      * CJK Unified Ideographs.
1139:      * 0x4E00 - 0x9FFF.
1140:      */
1141:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1142:       = new UnicodeBlock(0x4E00, 0x9FFF,
1143:                          "CJK_UNIFIED_IDEOGRAPHS", 
1144:                          "CJK Unified Ideographs");
1145: 
1146:     /**
1147:      * Yi Syllables.
1148:      * 0xA000 - 0xA48F.
1149:      * @since 1.4
1150:      */
1151:     public static final UnicodeBlock YI_SYLLABLES
1152:       = new UnicodeBlock(0xA000, 0xA48F,
1153:                          "YI_SYLLABLES", 
1154:                          "Yi Syllables");
1155: 
1156:     /**
1157:      * Yi Radicals.
1158:      * 0xA490 - 0xA4CF.
1159:      * @since 1.4
1160:      */
1161:     public static final UnicodeBlock YI_RADICALS
1162:       = new UnicodeBlock(0xA490, 0xA4CF,
1163:                          "YI_RADICALS", 
1164:                          "Yi Radicals");
1165: 
1166:     /**
1167:      * Hangul Syllables.
1168:      * 0xAC00 - 0xD7AF.
1169:      */
1170:     public static final UnicodeBlock HANGUL_SYLLABLES
1171:       = new UnicodeBlock(0xAC00, 0xD7AF,
1172:                          "HANGUL_SYLLABLES", 
1173:                          "Hangul Syllables");
1174: 
1175:     /**
1176:      * High Surrogates.
1177:      * 0xD800 - 0xDB7F.
1178:      * @since 1.5
1179:      */
1180:     public static final UnicodeBlock HIGH_SURROGATES
1181:       = new UnicodeBlock(0xD800, 0xDB7F,
1182:                          "HIGH_SURROGATES", 
1183:                          "High Surrogates");
1184: 
1185:     /**
1186:      * High Private Use Surrogates.
1187:      * 0xDB80 - 0xDBFF.
1188:      * @since 1.5
1189:      */
1190:     public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1191:       = new UnicodeBlock(0xDB80, 0xDBFF,
1192:                          "HIGH_PRIVATE_USE_SURROGATES", 
1193:                          "High Private Use Surrogates");
1194: 
1195:     /**
1196:      * Low Surrogates.
1197:      * 0xDC00 - 0xDFFF.
1198:      * @since 1.5
1199:      */
1200:     public static final UnicodeBlock LOW_SURROGATES
1201:       = new UnicodeBlock(0xDC00, 0xDFFF,
1202:                          "LOW_SURROGATES", 
1203:                          "Low Surrogates");
1204: 
1205:     /**
1206:      * Private Use Area.
1207:      * 0xE000 - 0xF8FF.
1208:      */
1209:     public static final UnicodeBlock PRIVATE_USE_AREA
1210:       = new UnicodeBlock(0xE000, 0xF8FF,
1211:                          "PRIVATE_USE_AREA", 
1212:                          "Private Use Area");
1213: 
1214:     /**
1215:      * CJK Compatibility Ideographs.
1216:      * 0xF900 - 0xFAFF.
1217:      */
1218:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1219:       = new UnicodeBlock(0xF900, 0xFAFF,
1220:                          "CJK_COMPATIBILITY_IDEOGRAPHS", 
1221:                          "CJK Compatibility Ideographs");
1222: 
1223:     /**
1224:      * Alphabetic Presentation Forms.
1225:      * 0xFB00 - 0xFB4F.
1226:      */
1227:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1228:       = new UnicodeBlock(0xFB00, 0xFB4F,
1229:                          "ALPHABETIC_PRESENTATION_FORMS", 
1230:                          "Alphabetic Presentation Forms");
1231: 
1232:     /**
1233:      * Arabic Presentation Forms-A.
1234:      * 0xFB50 - 0xFDFF.
1235:      */
1236:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1237:       = new UnicodeBlock(0xFB50, 0xFDFF,
1238:                          "ARABIC_PRESENTATION_FORMS_A", 
1239:                          "Arabic Presentation Forms-A");
1240: 
1241:     /**
1242:      * Variation Selectors.
1243:      * 0xFE00 - 0xFE0F.
1244:      * @since 1.5
1245:      */
1246:     public static final UnicodeBlock VARIATION_SELECTORS
1247:       = new UnicodeBlock(0xFE00, 0xFE0F,
1248:                          "VARIATION_SELECTORS", 
1249:                          "Variation Selectors");
1250: 
1251:     /**
1252:      * Combining Half Marks.
1253:      * 0xFE20 - 0xFE2F.
1254:      */
1255:     public static final UnicodeBlock COMBINING_HALF_MARKS
1256:       = new UnicodeBlock(0xFE20, 0xFE2F,
1257:                          "COMBINING_HALF_MARKS", 
1258:                          "Combining Half Marks");
1259: 
1260:     /**
1261:      * CJK Compatibility Forms.
1262:      * 0xFE30 - 0xFE4F.
1263:      */
1264:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1265:       = new UnicodeBlock(0xFE30, 0xFE4F,
1266:                          "CJK_COMPATIBILITY_FORMS", 
1267:                          "CJK Compatibility Forms");
1268: 
1269:     /**
1270:      * Small Form Variants.
1271:      * 0xFE50 - 0xFE6F.
1272:      */
1273:     public static final UnicodeBlock SMALL_FORM_VARIANTS
1274:       = new UnicodeBlock(0xFE50, 0xFE6F,
1275:                          "SMALL_FORM_VARIANTS", 
1276:                          "Small Form Variants");
1277: 
1278:     /**
1279:      * Arabic Presentation Forms-B.
1280:      * 0xFE70 - 0xFEFF.
1281:      */
1282:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1283:       = new UnicodeBlock(0xFE70, 0xFEFF,
1284:                          "ARABIC_PRESENTATION_FORMS_B", 
1285:                          "Arabic Presentation Forms-B");
1286: 
1287:     /**
1288:      * Halfwidth and Fullwidth Forms.
1289:      * 0xFF00 - 0xFFEF.
1290:      */
1291:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1292:       = new UnicodeBlock(0xFF00, 0xFFEF,
1293:                          "HALFWIDTH_AND_FULLWIDTH_FORMS", 
1294:                          "Halfwidth and Fullwidth Forms");
1295: 
1296:     /**
1297:      * Specials.
1298:      * 0xFFF0 - 0xFFFF.
1299:      */
1300:     public static final UnicodeBlock SPECIALS
1301:       = new UnicodeBlock(0xFFF0, 0xFFFF,
1302:                          "SPECIALS", 
1303:                          "Specials");
1304: 
1305:     /**
1306:      * Linear B Syllabary.
1307:      * 0x10000 - 0x1007F.
1308:      * @since 1.5
1309:      */
1310:     public static final UnicodeBlock LINEAR_B_SYLLABARY
1311:       = new UnicodeBlock(0x10000, 0x1007F,
1312:                          "LINEAR_B_SYLLABARY", 
1313:                          "Linear B Syllabary");
1314: 
1315:     /**
1316:      * Linear B Ideograms.
1317:      * 0x10080 - 0x100FF.
1318:      * @since 1.5
1319:      */
1320:     public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1321:       = new UnicodeBlock(0x10080, 0x100FF,
1322:                          "LINEAR_B_IDEOGRAMS", 
1323:                          "Linear B Ideograms");
1324: 
1325:     /**
1326:      * Aegean Numbers.
1327:      * 0x10100 - 0x1013F.
1328:      * @since 1.5
1329:      */
1330:     public static final UnicodeBlock AEGEAN_NUMBERS
1331:       = new UnicodeBlock(0x10100, 0x1013F,
1332:                          "AEGEAN_NUMBERS", 
1333:                          "Aegean Numbers");
1334: 
1335:     /**
1336:      * Old Italic.
1337:      * 0x10300 - 0x1032F.
1338:      * @since 1.5
1339:      */
1340:     public static final UnicodeBlock OLD_ITALIC
1341:       = new UnicodeBlock(0x10300, 0x1032F,
1342:                          "OLD_ITALIC", 
1343:                          "Old Italic");
1344: 
1345:     /**
1346:      * Gothic.
1347:      * 0x10330 - 0x1034F.
1348:      * @since 1.5
1349:      */
1350:     public static final UnicodeBlock GOTHIC
1351:       = new UnicodeBlock(0x10330, 0x1034F,
1352:                          "GOTHIC", 
1353:                          "Gothic");
1354: 
1355:     /**
1356:      * Ugaritic.
1357:      * 0x10380 - 0x1039F.
1358:      * @since 1.5
1359:      */
1360:     public static final UnicodeBlock UGARITIC
1361:       = new UnicodeBlock(0x10380, 0x1039F,
1362:                          "UGARITIC", 
1363:                          "Ugaritic");
1364: 
1365:     /**
1366:      * Deseret.
1367:      * 0x10400 - 0x1044F.
1368:      * @since 1.5
1369:      */
1370:     public static final UnicodeBlock DESERET
1371:       = new UnicodeBlock(0x10400, 0x1044F,
1372:                          "DESERET", 
1373:                          "Deseret");
1374: 
1375:     /**
1376:      * Shavian.
1377:      * 0x10450 - 0x1047F.
1378:      * @since 1.5
1379:      */
1380:     public static final UnicodeBlock SHAVIAN
1381:       = new UnicodeBlock(0x10450, 0x1047F,
1382:                          "SHAVIAN", 
1383:                          "Shavian");
1384: 
1385:     /**
1386:      * Osmanya.
1387:      * 0x10480 - 0x104AF.
1388:      * @since 1.5
1389:      */
1390:     public static final UnicodeBlock OSMANYA
1391:       = new UnicodeBlock(0x10480, 0x104AF,
1392:                          "OSMANYA", 
1393:                          "Osmanya");
1394: 
1395:     /**
1396:      * Cypriot Syllabary.
1397:      * 0x10800 - 0x1083F.
1398:      * @since 1.5
1399:      */
1400:     public static final UnicodeBlock CYPRIOT_SYLLABARY
1401:       = new UnicodeBlock(0x10800, 0x1083F,
1402:                          "CYPRIOT_SYLLABARY", 
1403:                          "Cypriot Syllabary");
1404: 
1405:     /**
1406:      * Byzantine Musical Symbols.
1407:      * 0x1D000 - 0x1D0FF.
1408:      * @since 1.5
1409:      */
1410:     public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1411:       = new UnicodeBlock(0x1D000, 0x1D0FF,
1412:                          "BYZANTINE_MUSICAL_SYMBOLS", 
1413:                          "Byzantine Musical Symbols");
1414: 
1415:     /**
1416:      * Musical Symbols.
1417:      * 0x1D100 - 0x1D1FF.
1418:      * @since 1.5
1419:      */
1420:     public static final UnicodeBlock MUSICAL_SYMBOLS
1421:       = new UnicodeBlock(0x1D100, 0x1D1FF,
1422:                          "MUSICAL_SYMBOLS", 
1423:                          "Musical Symbols");
1424: 
1425:     /**
1426:      * Tai Xuan Jing Symbols.
1427:      * 0x1D300 - 0x1D35F.
1428:      * @since 1.5
1429:      */
1430:     public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1431:       = new UnicodeBlock(0x1D300, 0x1D35F,
1432:                          "TAI_XUAN_JING_SYMBOLS", 
1433:                          "Tai Xuan Jing Symbols");
1434: 
1435:     /**
1436:      * Mathematical Alphanumeric Symbols.
1437:      * 0x1D400 - 0x1D7FF.
1438:      * @since 1.5
1439:      */
1440:     public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1441:       = new UnicodeBlock(0x1D400, 0x1D7FF,
1442:                          "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1443:                          "Mathematical Alphanumeric Symbols");
1444: 
1445:     /**
1446:      * CJK Unified Ideographs Extension B.
1447:      * 0x20000 - 0x2A6DF.
1448:      * @since 1.5
1449:      */
1450:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1451:       = new UnicodeBlock(0x20000, 0x2A6DF,
1452:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1453:                          "CJK Unified Ideographs Extension B");
1454: 
1455:     /**
1456:      * CJK Compatibility Ideographs Supplement.
1457:      * 0x2F800 - 0x2FA1F.
1458:      * @since 1.5
1459:      */
1460:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1461:       = new UnicodeBlock(0x2F800, 0x2FA1F,
1462:                          "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 
1463:                          "CJK Compatibility Ideographs Supplement");
1464: 
1465:     /**
1466:      * Tags.
1467:      * 0xE0000 - 0xE007F.
1468:      * @since 1.5
1469:      */
1470:     public static final UnicodeBlock TAGS
1471:       = new UnicodeBlock(0xE0000, 0xE007F,
1472:                          "TAGS", 
1473:                          "Tags");
1474: 
1475:     /**
1476:      * Variation Selectors Supplement.
1477:      * 0xE0100 - 0xE01EF.
1478:      * @since 1.5
1479:      */
1480:     public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1481:       = new UnicodeBlock(0xE0100, 0xE01EF,
1482:                          "VARIATION_SELECTORS_SUPPLEMENT", 
1483:                          "Variation Selectors Supplement");
1484: 
1485:     /**
1486:      * Supplementary Private Use Area-A.
1487:      * 0xF0000 - 0xFFFFF.
1488:      * @since 1.5
1489:      */
1490:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1491:       = new UnicodeBlock(0xF0000, 0xFFFFF,
1492:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1493:                          "Supplementary Private Use Area-A");
1494: 
1495:     /**
1496:      * Supplementary Private Use Area-B.
1497:      * 0x100000 - 0x10FFFF.
1498:      * @since 1.5
1499:      */
1500:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1501:       = new UnicodeBlock(0x100000, 0x10FFFF,
1502:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 
1503:                          "Supplementary Private Use Area-B");
1504: 
1505:     /**
1506:      * Surrogates Area.
1507:      * 'D800' - 'DFFF'.
1508:      * @deprecated As of 1.5, the three areas, 
1509:      * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1510:      * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1511:      * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1512:      * by the Unicode standard, should be used in preference to
1513:      * this.  These are also returned from calls to <code>of(int)</code>
1514:      * and <code>of(char)</code>.
1515:      */
1516:     public static final UnicodeBlock SURROGATES_AREA
1517:       = new UnicodeBlock(0xD800, 0xDFFF,
1518:                          "SURROGATES_AREA",
1519:              "Surrogates Area");
1520: 
1521:     /**
1522:      * The defined subsets.
1523:      */
1524:     private static final UnicodeBlock sets[] = {
1525:       BASIC_LATIN,
1526:       LATIN_1_SUPPLEMENT,
1527:       LATIN_EXTENDED_A,
1528:       LATIN_EXTENDED_B,
1529:       IPA_EXTENSIONS,
1530:       SPACING_MODIFIER_LETTERS,
1531:       COMBINING_DIACRITICAL_MARKS,
1532:       GREEK,
1533:       CYRILLIC,
1534:       CYRILLIC_SUPPLEMENTARY,
1535:       ARMENIAN,
1536:       HEBREW,
1537:       ARABIC,
1538:       SYRIAC,
1539:       THAANA,
1540:       DEVANAGARI,
1541:       BENGALI,
1542:       GURMUKHI,
1543:       GUJARATI,
1544:       ORIYA,
1545:       TAMIL,
1546:       TELUGU,
1547:       KANNADA,
1548:       MALAYALAM,
1549:       SINHALA,
1550:       THAI,
1551:       LAO,
1552:       TIBETAN,
1553:       MYANMAR,
1554:       GEORGIAN,
1555:       HANGUL_JAMO,
1556:       ETHIOPIC,
1557:       CHEROKEE,
1558:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1559:       OGHAM,
1560:       RUNIC,
1561:       TAGALOG,
1562:       HANUNOO,
1563:       BUHID,
1564:       TAGBANWA,
1565:       KHMER,
1566:       MONGOLIAN,
1567:       LIMBU,
1568:       TAI_LE,
1569:       KHMER_SYMBOLS,
1570:       PHONETIC_EXTENSIONS,
1571:       LATIN_EXTENDED_ADDITIONAL,
1572:       GREEK_EXTENDED,
1573:       GENERAL_PUNCTUATION,
1574:       SUPERSCRIPTS_AND_SUBSCRIPTS,
1575:       CURRENCY_SYMBOLS,
1576:       COMBINING_MARKS_FOR_SYMBOLS,
1577:       LETTERLIKE_SYMBOLS,
1578:       NUMBER_FORMS,
1579:       ARROWS,
1580:       MATHEMATICAL_OPERATORS,
1581:       MISCELLANEOUS_TECHNICAL,
1582:       CONTROL_PICTURES,
1583:       OPTICAL_CHARACTER_RECOGNITION,
1584:       ENCLOSED_ALPHANUMERICS,
1585:       BOX_DRAWING,
1586:       BLOCK_ELEMENTS,
1587:       GEOMETRIC_SHAPES,
1588:       MISCELLANEOUS_SYMBOLS,
1589:       DINGBATS,
1590:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1591:       SUPPLEMENTAL_ARROWS_A,
1592:       BRAILLE_PATTERNS,
1593:       SUPPLEMENTAL_ARROWS_B,
1594:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1595:       SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1596:       MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1597:       CJK_RADICALS_SUPPLEMENT,
1598:       KANGXI_RADICALS,
1599:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1600:       CJK_SYMBOLS_AND_PUNCTUATION,
1601:       HIRAGANA,
1602:       KATAKANA,
1603:       BOPOMOFO,
1604:       HANGUL_COMPATIBILITY_JAMO,
1605:       KANBUN,
1606:       BOPOMOFO_EXTENDED,
1607:       KATAKANA_PHONETIC_EXTENSIONS,
1608:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
1609:       CJK_COMPATIBILITY,
1610:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1611:       YIJING_HEXAGRAM_SYMBOLS,
1612:       CJK_UNIFIED_IDEOGRAPHS,
1613:       YI_SYLLABLES,
1614:       YI_RADICALS,
1615:       HANGUL_SYLLABLES,
1616:       HIGH_SURROGATES,
1617:       HIGH_PRIVATE_USE_SURROGATES,
1618:       LOW_SURROGATES,
1619:       PRIVATE_USE_AREA,
1620:       CJK_COMPATIBILITY_IDEOGRAPHS,
1621:       ALPHABETIC_PRESENTATION_FORMS,
1622:       ARABIC_PRESENTATION_FORMS_A,
1623:       VARIATION_SELECTORS,
1624:       COMBINING_HALF_MARKS,
1625:       CJK_COMPATIBILITY_FORMS,
1626:       SMALL_FORM_VARIANTS,
1627:       ARABIC_PRESENTATION_FORMS_B,
1628:       HALFWIDTH_AND_FULLWIDTH_FORMS,
1629:       SPECIALS,
1630:       LINEAR_B_SYLLABARY,
1631:       LINEAR_B_IDEOGRAMS,
1632:       AEGEAN_NUMBERS,
1633:       OLD_ITALIC,
1634:       GOTHIC,
1635:       UGARITIC,
1636:       DESERET,
1637:       SHAVIAN,
1638:       OSMANYA,
1639:       CYPRIOT_SYLLABARY,
1640:       BYZANTINE_MUSICAL_SYMBOLS,
1641:       MUSICAL_SYMBOLS,
1642:       TAI_XUAN_JING_SYMBOLS,
1643:       MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1644:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1645:       CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1646:       TAGS,
1647:       VARIATION_SELECTORS_SUPPLEMENT,
1648:       SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1649:       SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1650:     };
1651:   } // class UnicodeBlock
1652: 
1653:   /**
1654:    * A class to encompass all the properties of characters in the 
1655:    * private use blocks in the Unicode standard.  This class extends
1656:    * UnassignedCharacters because the return type from getType() is 
1657:    * different.
1658:    * @author Anthony Balkissoon abalkiss at redhat dot com
1659:    *
1660:    */
1661:   private static class PrivateUseCharacters extends UnassignedCharacters
1662:   {
1663:     /**
1664:      * Returns the type of the character cp.
1665:      */
1666:     static int getType(int cp)
1667:     {
1668:       // The upper 2 code points in any plane are considered unassigned, 
1669:       // even in the private-use planes.
1670:       if ((cp & 0xffff) >= 0xfffe)
1671:         return UnassignedCharacters.getType(cp);
1672:       return PRIVATE_USE;
1673:     }
1674:     
1675:     /**
1676:      * Returns true if the character cp is defined.
1677:      */
1678:     static boolean isDefined(int cp)
1679:     {
1680:       // The upper 2 code points in any plane are considered unassigned, 
1681:       // even in the private-use planes.
1682:       if ((cp & 0xffff) >= 0xfffe)
1683:         return UnassignedCharacters.isDefined(cp);
1684:       return true;
1685:     }
1686:     
1687:     /**
1688:      * Gets the directionality for the character cp.
1689:      */
1690:     static byte getDirectionality(int cp)
1691:     {
1692:       if ((cp & 0xffff) >= 0xfffe)
1693:         return UnassignedCharacters.getDirectionality(cp);
1694:       return DIRECTIONALITY_LEFT_TO_RIGHT;
1695:     }
1696:   }
1697:   
1698:   /**
1699:    * A class to encompass all the properties of code points that are 
1700:    * currently undefined in the Unicode standard.
1701:    * @author Anthony Balkissoon abalkiss at redhat dot com
1702:    *
1703:    */
1704:   private static class UnassignedCharacters
1705:   {
1706:     /**
1707:      * Returns the numeric value for the unassigned characters.
1708:      * @param cp the character
1709:      * @param radix the radix (not used)
1710:      * @return the numeric value of this character in this radix
1711:      */
1712:     static int digit(int cp, int radix)
1713:     {
1714:       return -1;
1715:     }
1716: 
1717:     /**
1718:      * Returns the Unicode directionality property for unassigned 
1719:      * characters.
1720:      * @param cp the character
1721:      * @return DIRECTIONALITY_UNDEFINED
1722:      */
1723:     static byte getDirectionality(int cp)
1724:     {
1725:       return DIRECTIONALITY_UNDEFINED;
1726:     }
1727: 
1728:     /**
1729:      * Returns -1, the numeric value for unassigned Unicode characters.
1730:      * @param cp the character
1731:      * @return -1
1732:      */
1733:     static int getNumericValue(int cp)
1734:     {
1735:       return -1;
1736:     }
1737: 
1738:     /**
1739:      * Returns UNASSIGNED, the type of unassigned Unicode characters.
1740:      * @param cp the character
1741:      * @return UNASSIGNED
1742:      */
1743:     static int getType(int cp)
1744:     {
1745:       return UNASSIGNED;
1746:     }
1747:     
1748:     /**
1749:      * Returns false to indiciate that the character is not defined in the 
1750:      * Unicode standard.
1751:      * @param cp the character
1752:      * @return false
1753:      */
1754:     static boolean isDefined(int cp)
1755:     {
1756:       return false;
1757:     }
1758: 
1759:     /**
1760:      * Returns false to indicate that the character is not a digit.
1761:      * @param cp the character
1762:      * @return false
1763:      */
1764:     static boolean isDigit(int cp)
1765:     {
1766:       return false;
1767:     }
1768: 
1769:     /**
1770:      * Returns false to indicate that the character cannot be ignored 
1771:      * within an identifier
1772:      * @param cp the character
1773:      * @return false
1774:      */
1775:     static boolean isIdentifierIgnorable(int cp)
1776:     {
1777:       return false;
1778:     }
1779: 
1780:     /**
1781:      * Returns false to indicate that the character cannot be part of a 
1782:      * Java identifier.
1783:      * @param cp the character
1784:      * @return false
1785:      */
1786:     static boolean isJavaIdentifierPart(int cp)
1787:     {
1788:       return false;
1789:     }
1790: 
1791:     /**
1792:      * Returns false to indicate that the character cannot be start a 
1793:      * Java identifier.
1794:      * @param cp the character
1795:      * @return false
1796:      */
1797:     static boolean isJavaIdentiferStart(int cp)
1798:     {
1799:       return false;
1800:     }
1801: 
1802:     /**
1803:      * Returns false to indicate that the character is not a letter.
1804:      * @param cp the character
1805:      * @return false
1806:      */
1807:     static boolean isLetter(int cp)
1808:     {
1809:       return false;
1810:     }
1811: 
1812:     /**
1813:      * Returns false to indicate that the character cannot is neither a letter
1814:      * nor a digit.
1815:      * @param cp the character
1816:      * @return false
1817:      */
1818:     static boolean isLetterOrDigit(int cp)
1819:     {
1820:       return false;
1821:     }
1822: 
1823:     /**
1824:      * Returns false to indicate that the character is not a lowercase letter.
1825:      * @param cp the character
1826:      * @return false
1827:      */
1828:     static boolean isLowerCase(int cp)
1829:     {
1830:       return false;
1831:     }
1832:     
1833:     /**
1834:      * Returns false to indicate that the character cannot is not mirrored.
1835:      * @param cp the character
1836:      * @return false
1837:      */
1838:     static boolean isMirrored(int cp)
1839:     {
1840:       return false;
1841:     }
1842: 
1843:     /**
1844:      * Returns false to indicate that the character is not a space character.
1845:      * @param cp the character
1846:      * @return false
1847:      */
1848:     static boolean isSpaceChar(int cp)
1849:     {
1850:       return false;
1851:     }
1852:     
1853:     /**
1854:      * Returns false to indicate that the character it not a titlecase letter.
1855:      * @param cp the character
1856:      * @return false
1857:      */
1858:     static boolean isTitleCase(int cp)
1859:     {
1860:       return false;
1861:     }
1862:     
1863:     /**
1864:      * Returns false to indicate that the character cannot be part of a 
1865:      * Unicode identifier.
1866:      * @param cp the character
1867:      * @return false
1868:      */
1869:     static boolean isUnicodeIdentifierPart(int cp)
1870:     {
1871:       return false;
1872:     }
1873: 
1874:     /**
1875:      * Returns false to indicate that the character cannot start a 
1876:      * Unicode identifier.
1877:      * @param cp the character
1878:      * @return false
1879:      */
1880:     static boolean isUnicodeIdentifierStart(int cp)
1881:     {
1882:       return false;
1883:     }
1884: 
1885:     /**
1886:      * Returns false to indicate that the character is not an uppercase letter.
1887:      * @param cp the character
1888:      * @return false
1889:      */
1890:     static boolean isUpperCase(int cp)
1891:     {
1892:       return false;
1893:     }
1894: 
1895:     /**
1896:      * Returns false to indicate that the character is not a whitespace
1897:      * character.
1898:      * @param cp the character
1899:      * @return false
1900:      */
1901:     static boolean isWhiteSpace(int cp)
1902:     {
1903:       return false;
1904:     }
1905: 
1906:     /**
1907:      * Returns cp to indicate this character has no lowercase conversion.
1908:      * @param cp the character
1909:      * @return cp
1910:      */
1911:     static int toLowerCase(int cp)
1912:     {
1913:       return cp;
1914:     }
1915:     
1916:     /**
1917:      * Returns cp to indicate this character has no titlecase conversion.
1918:      * @param cp the character
1919:      * @return cp
1920:      */
1921:     static int toTitleCase(int cp)
1922:     {
1923:       return cp;
1924:     }
1925: 
1926:     /**
1927:      * Returns cp to indicate this character has no uppercase conversion.
1928:      * @param cp the character
1929:      * @return cp
1930:      */
1931:     static int toUpperCase(int cp)
1932:     {
1933:       return cp;
1934:     }    
1935:   }
1936: 
1937:   /**
1938:    * The immutable value of this Character.
1939:    *
1940:    * @serial the value of this Character
1941:    */
1942:   private final char value;
1943: 
1944:   /**
1945:    * Compatible with JDK 1.0+.
1946:    */
1947:   private static final long serialVersionUID = 3786198910865385080L;
1948: 
1949:   /**
1950:    * Smallest value allowed for radix arguments in Java. This value is 2.
1951:    *
1952:    * @see #digit(char, int)
1953:    * @see #forDigit(int, int)
1954:    * @see Integer#toString(int, int)
1955:    * @see Integer#valueOf(String)
1956:    */
1957:   public static final int MIN_RADIX = 2;
1958: 
1959:   /**
1960:    * Largest value allowed for radix arguments in Java. This value is 36.
1961:    *
1962:    * @see #digit(char, int)
1963:    * @see #forDigit(int, int)
1964:    * @see Integer#toString(int, int)
1965:    * @see Integer#valueOf(String)
1966:    */
1967:   public static final int MAX_RADIX = 36;
1968: 
1969:   /**
1970:    * The minimum value the char data type can hold.
1971:    * This value is <code>'\\u0000'</code>.
1972:    */
1973:   public static final char MIN_VALUE = '\u0000';
1974: 
1975:   /**
1976:    * The maximum value the char data type can hold.
1977:    * This value is <code>'\\uFFFF'</code>.
1978:    */
1979:   public static final char MAX_VALUE = '\uFFFF';
1980: 
1981:   /**
1982:    * Class object representing the primitive char data type.
1983:    *
1984:    * @since 1.1
1985:    */
1986:   public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1987: 
1988:   /**
1989:    * The number of bits needed to represent a <code>char</code>.
1990:    * @since 1.5
1991:    */
1992:   public static final int SIZE = 16;
1993: 
1994:   // This caches some Character values, and is used by boxing
1995:   // conversions via valueOf().  We must cache at least 0..127;
1996:   // this constant controls how much we actually cache.
1997:   private static final int MAX_CACHE = 127;
1998:   private static Character[] charCache = new Character[MAX_CACHE + 1];
1999: 
2000:   /**
2001:    * Lu = Letter, Uppercase (Informative).
2002:    *
2003:    * @since 1.1
2004:    */
2005:   public static final byte UPPERCASE_LETTER = 1;
2006: 
2007:   /**
2008:    * Ll = Letter, Lowercase (Informative).
2009:    *
2010:    * @since 1.1
2011:    */
2012:   public static final byte LOWERCASE_LETTER = 2;
2013: 
2014:   /**
2015:    * Lt = Letter, Titlecase (Informative).
2016:    *
2017:    * @since 1.1
2018:    */
2019:   public static final byte TITLECASE_LETTER = 3;
2020: 
2021:   /**
2022:    * Mn = Mark, Non-Spacing (Normative).
2023:    *
2024:    * @since 1.1
2025:    */
2026:   public static final byte NON_SPACING_MARK = 6;
2027: 
2028:   /**
2029:    * Mc = Mark, Spacing Combining (Normative).
2030:    *
2031:    * @since 1.1
2032:    */
2033:   public static final byte COMBINING_SPACING_MARK = 8;
2034: 
2035:   /**
2036:    * Me = Mark, Enclosing (Normative).
2037:    *
2038:    * @since 1.1
2039:    */
2040:   public static final byte ENCLOSING_MARK = 7;
2041: 
2042:   /**
2043:    * Nd = Number, Decimal Digit (Normative).
2044:    *
2045:    * @since 1.1
2046:    */
2047:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
2048: 
2049:   /**
2050:    * Nl = Number, Letter (Normative).
2051:    *
2052:    * @since 1.1
2053:    */
2054:   public static final byte LETTER_NUMBER = 10;
2055: 
2056:   /**
2057:    * No = Number, Other (Normative).
2058:    *
2059:    * @since 1.1
2060:    */
2061:   public static final byte OTHER_NUMBER = 11;
2062: 
2063:   /**
2064:    * Zs = Separator, Space (Normative).
2065:    *
2066:    * @since 1.1
2067:    */
2068:   public static final byte SPACE_SEPARATOR = 12;
2069: 
2070:   /**
2071:    * Zl = Separator, Line (Normative).
2072:    *
2073:    * @since 1.1
2074:    */
2075:   public static final byte LINE_SEPARATOR = 13;
2076: 
2077:   /**
2078:    * Zp = Separator, Paragraph (Normative).
2079:    *
2080:    * @since 1.1
2081:    */
2082:   public static final byte PARAGRAPH_SEPARATOR = 14;
2083: 
2084:   /**
2085:    * Cc = Other, Control (Normative).
2086:    *
2087:    * @since 1.1
2088:    */
2089:   public static final byte CONTROL = 15;
2090: 
2091:   /**
2092:    * Cf = Other, Format (Normative).
2093:    *
2094:    * @since 1.1
2095:    */
2096:   public static final byte FORMAT = 16;
2097: 
2098:   /**
2099:    * Cs = Other, Surrogate (Normative).
2100:    *
2101:    * @since 1.1
2102:    */
2103:   public static final byte SURROGATE = 19;
2104: 
2105:   /**
2106:    * Co = Other, Private Use (Normative).
2107:    *
2108:    * @since 1.1
2109:    */
2110:   public static final byte PRIVATE_USE = 18;
2111: 
2112:   /**
2113:    * Cn = Other, Not Assigned (Normative).
2114:    *
2115:    * @since 1.1
2116:    */
2117:   public static final byte UNASSIGNED = 0;
2118: 
2119:   /**
2120:    * Lm = Letter, Modifier (Informative).
2121:    *
2122:    * @since 1.1
2123:    */
2124:   public static final byte MODIFIER_LETTER = 4;
2125: 
2126:   /**
2127:    * Lo = Letter, Other (Informative).
2128:    *
2129:    * @since 1.1
2130:    */
2131:   public static final byte OTHER_LETTER = 5;
2132: 
2133:   /**
2134:    * Pc = Punctuation, Connector (Informative).
2135:    *
2136:    * @since 1.1
2137:    */
2138:   public static final byte CONNECTOR_PUNCTUATION = 23;
2139: 
2140:   /**
2141:    * Pd = Punctuation, Dash (Informative).
2142:    *
2143:    * @since 1.1
2144:    */
2145:   public static final byte DASH_PUNCTUATION = 20;
2146: 
2147:   /**
2148:    * Ps = Punctuation, Open (Informative).
2149:    *
2150:    * @since 1.1
2151:    */
2152:   public static final byte START_PUNCTUATION = 21;
2153: 
2154:   /**
2155:    * Pe = Punctuation, Close (Informative).
2156:    *
2157:    * @since 1.1
2158:    */
2159:   public static final byte END_PUNCTUATION = 22;
2160: 
2161:   /**
2162:    * Pi = Punctuation, Initial Quote (Informative).
2163:    *
2164:    * @since 1.4
2165:    */
2166:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
2167: 
2168:   /**
2169:    * Pf = Punctuation, Final Quote (Informative).
2170:    *
2171:    * @since 1.4
2172:    */
2173:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
2174: 
2175:   /**
2176:    * Po = Punctuation, Other (Informative).
2177:    *
2178:    * @since 1.1
2179:    */
2180:   public static final byte OTHER_PUNCTUATION = 24;
2181: 
2182:   /**
2183:    * Sm = Symbol, Math (Informative).
2184:    *
2185:    * @since 1.1
2186:    */
2187:   public static final byte MATH_SYMBOL = 25;
2188: 
2189:   /**
2190:    * Sc = Symbol, Currency (Informative).
2191:    *
2192:    * @since 1.1
2193:    */
2194:   public static final byte CURRENCY_SYMBOL = 26;
2195: 
2196:   /**
2197:    * Sk = Symbol, Modifier (Informative).
2198:    *
2199:    * @since 1.1
2200:    */
2201:   public static final byte MODIFIER_SYMBOL = 27;
2202: 
2203:   /**
2204:    * So = Symbol, Other (Informative).
2205:    *
2206:    * @since 1.1
2207:    */
2208:   public static final byte OTHER_SYMBOL = 28;
2209: 
2210:   /**
2211:    * Undefined bidirectional character type. Undefined char values have
2212:    * undefined directionality in the Unicode specification.
2213:    *
2214:    * @since 1.4
2215:    */
2216:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
2217: 
2218:   /**
2219:    * Strong bidirectional character type "L".
2220:    *
2221:    * @since 1.4
2222:    */
2223:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
2224: 
2225:   /**
2226:    * Strong bidirectional character type "R".
2227:    *
2228:    * @since 1.4
2229:    */
2230:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
2231: 
2232:   /**
2233:    * Strong bidirectional character type "AL".
2234:    *
2235:    * @since 1.4
2236:    */
2237:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
2238: 
2239:   /**
2240:    * Weak bidirectional character type "EN".
2241:    *
2242:    * @since 1.4
2243:    */
2244:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
2245: 
2246:   /**
2247:    * Weak bidirectional character type "ES".
2248:    *
2249:    * @since 1.4
2250:    */
2251:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
2252: 
2253:   /**
2254:    * Weak bidirectional character type "ET".
2255:    *
2256:    * @since 1.4
2257:    */
2258:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
2259: 
2260:   /**
2261:    * Weak bidirectional character type "AN".
2262:    *
2263:    * @since 1.4
2264:    */
2265:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
2266: 
2267:   /**
2268:    * Weak bidirectional character type "CS".
2269:    *
2270:    * @since 1.4
2271:    */
2272:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
2273: 
2274:   /**
2275:    * Weak bidirectional character type "NSM".
2276:    *
2277:    * @since 1.4
2278:    */
2279:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2280: 
2281:   /**
2282:    * Weak bidirectional character type "BN".
2283:    *
2284:    * @since 1.4
2285:    */
2286:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2287: 
2288:   /**
2289:    * Neutral bidirectional character type "B".
2290:    *
2291:    * @since 1.4
2292:    */
2293:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2294: 
2295:   /**
2296:    * Neutral bidirectional character type "S".
2297:    *
2298:    * @since 1.4
2299:    */
2300:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2301: 
2302:   /**
2303:    * Strong bidirectional character type "WS".
2304:    *
2305:    * @since 1.4
2306:    */
2307:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
2308: 
2309:   /**
2310:    * Neutral bidirectional character type "ON".
2311:    *
2312:    * @since 1.4
2313:    */
2314:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2315: 
2316:   /**
2317:    * Strong bidirectional character type "LRE".
2318:    *
2319:    * @since 1.4
2320:    */
2321:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2322: 
2323:   /**
2324:    * Strong bidirectional character type "LRO".
2325:    *
2326:    * @since 1.4
2327:    */
2328:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2329: 
2330:   /**
2331:    * Strong bidirectional character type "RLE".
2332:    *
2333:    * @since 1.4
2334:    */
2335:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2336: 
2337:   /**
2338:    * Strong bidirectional character type "RLO".
2339:    *
2340:    * @since 1.4
2341:    */
2342:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2343: 
2344:   /**
2345:    * Weak bidirectional character type "PDF".
2346:    *
2347:    * @since 1.4
2348:    */
2349:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2350: 
2351:   /**
2352:    * Stores unicode block offset lookup table. Exploit package visibility of
2353:    * String.value to avoid copying the array.
2354:    * @see #readCodePoint(int)
2355:    * @see CharData#BLOCKS
2356:    */
2357:   private static final char[][] blocks = 
2358:     new char[][]{
2359:                  String.zeroBasedStringValue(CharData.BLOCKS[0]),
2360:                  String.zeroBasedStringValue(CharData.BLOCKS[1]),
2361:                  String.zeroBasedStringValue(CharData.BLOCKS[2]),
2362:                  String.zeroBasedStringValue(CharData.BLOCKS[3]),
2363:                  String.zeroBasedStringValue(CharData.BLOCKS[4]),
2364:                  String.zeroBasedStringValue(CharData.BLOCKS[5]),
2365:                  String.zeroBasedStringValue(CharData.BLOCKS[6]),
2366:                  String.zeroBasedStringValue(CharData.BLOCKS[7]),
2367:                  String.zeroBasedStringValue(CharData.BLOCKS[8]),
2368:                  String.zeroBasedStringValue(CharData.BLOCKS[9]),
2369:                  String.zeroBasedStringValue(CharData.BLOCKS[10]),
2370:                  String.zeroBasedStringValue(CharData.BLOCKS[11]),
2371:                  String.zeroBasedStringValue(CharData.BLOCKS[12]),
2372:                  String.zeroBasedStringValue(CharData.BLOCKS[13]),
2373:                  String.zeroBasedStringValue(CharData.BLOCKS[14]),
2374:                  String.zeroBasedStringValue(CharData.BLOCKS[15]),
2375:                  String.zeroBasedStringValue(CharData.BLOCKS[16])};
2376: 
2377:   /**
2378:    * Stores unicode attribute offset lookup table. Exploit package visibility
2379:    * of String.value to avoid copying the array.
2380:    * @see CharData#DATA
2381:    */  
2382:   private static final char[][] data = 
2383:     new char[][]{
2384:                  String.zeroBasedStringValue(CharData.DATA[0]),
2385:                  String.zeroBasedStringValue(CharData.DATA[1]),
2386:                  String.zeroBasedStringValue(CharData.DATA[2]),
2387:                  String.zeroBasedStringValue(CharData.DATA[3]),
2388:                  String.zeroBasedStringValue(CharData.DATA[4]),
2389:                  String.zeroBasedStringValue(CharData.DATA[5]),
2390:                  String.zeroBasedStringValue(CharData.DATA[6]),
2391:                  String.zeroBasedStringValue(CharData.DATA[7]),
2392:                  String.zeroBasedStringValue(CharData.DATA[8]),
2393:                  String.zeroBasedStringValue(CharData.DATA[9]),
2394:                  String.zeroBasedStringValue(CharData.DATA[10]),
2395:                  String.zeroBasedStringValue(CharData.DATA[11]),
2396:                  String.zeroBasedStringValue(CharData.DATA[12]),
2397:                  String.zeroBasedStringValue(CharData.DATA[13]),
2398:                  String.zeroBasedStringValue(CharData.DATA[14]),
2399:                  String.zeroBasedStringValue(CharData.DATA[15]),
2400:                  String.zeroBasedStringValue(CharData.DATA[16])};
2401: 
2402:   /**
2403:    * Stores unicode numeric value attribute table. Exploit package visibility
2404:    * of String.value to avoid copying the array.
2405:    * @see CharData#NUM_VALUE
2406:    */
2407:   private static final char[][] numValue = 
2408:     new char[][]{
2409:                  String.zeroBasedStringValue(CharData.NUM_VALUE[0]),
2410:                  String.zeroBasedStringValue(CharData.NUM_VALUE[1]),
2411:                  String.zeroBasedStringValue(CharData.NUM_VALUE[2]),
2412:                  String.zeroBasedStringValue(CharData.NUM_VALUE[3]),
2413:                  String.zeroBasedStringValue(CharData.NUM_VALUE[4]),
2414:                  String.zeroBasedStringValue(CharData.NUM_VALUE[5]),
2415:                  String.zeroBasedStringValue(CharData.NUM_VALUE[6]),
2416:                  String.zeroBasedStringValue(CharData.NUM_VALUE[7]),
2417:                  String.zeroBasedStringValue(CharData.NUM_VALUE[8]),
2418:                  String.zeroBasedStringValue(CharData.NUM_VALUE[9]),
2419:                  String.zeroBasedStringValue(CharData.NUM_VALUE[10]),
2420:                  String.zeroBasedStringValue(CharData.NUM_VALUE[11]),
2421:                  String.zeroBasedStringValue(CharData.NUM_VALUE[12]),
2422:                  String.zeroBasedStringValue(CharData.NUM_VALUE[13]),
2423:                  String.zeroBasedStringValue(CharData.NUM_VALUE[14]),
2424:                  String.zeroBasedStringValue(CharData.NUM_VALUE[15]),
2425:                  String.zeroBasedStringValue(CharData.NUM_VALUE[16])};
2426: 
2427:   /**
2428:    * Stores unicode uppercase attribute table. Exploit package visibility
2429:    * of String.value to avoid copying the array.
2430:    * @see CharData#UPPER
2431:    */  
2432:   private static final char[][] upper = 
2433:     new char[][]{
2434:                  String.zeroBasedStringValue(CharData.UPPER[0]),
2435:                  String.zeroBasedStringValue(CharData.UPPER[1]),
2436:                  String.zeroBasedStringValue(CharData.UPPER[2]),
2437:                  String.zeroBasedStringValue(CharData.UPPER[3]),
2438:                  String.zeroBasedStringValue(CharData.UPPER[4]),
2439:                  String.zeroBasedStringValue(CharData.UPPER[5]),
2440:                  String.zeroBasedStringValue(CharData.UPPER[6]),
2441:                  String.zeroBasedStringValue(CharData.UPPER[7]),
2442:                  String.zeroBasedStringValue(CharData.UPPER[8]),
2443:                  String.zeroBasedStringValue(CharData.UPPER[9]),
2444:                  String.zeroBasedStringValue(CharData.UPPER[10]),
2445:                  String.zeroBasedStringValue(CharData.UPPER[11]),
2446:                  String.zeroBasedStringValue(CharData.UPPER[12]),
2447:                  String.zeroBasedStringValue(CharData.UPPER[13]),
2448:                  String.zeroBasedStringValue(CharData.UPPER[14]),
2449:                  String.zeroBasedStringValue(CharData.UPPER[15]),
2450:                  String.zeroBasedStringValue(CharData.UPPER[16])};
2451: 
2452:   /**
2453:    * Stores unicode lowercase attribute table. Exploit package visibility
2454:    * of String.value to avoid copying the array.
2455:    * @see CharData#LOWER
2456:    */
2457:   private static final char[][] lower = 
2458:     new char[][]{
2459:                  String.zeroBasedStringValue(CharData.LOWER[0]),
2460:                  String.zeroBasedStringValue(CharData.LOWER[1]),
2461:                  String.zeroBasedStringValue(CharData.LOWER[2]),
2462:                  String.zeroBasedStringValue(CharData.LOWER[3]),
2463:                  String.zeroBasedStringValue(CharData.LOWER[4]),
2464:                  String.zeroBasedStringValue(CharData.LOWER[5]),
2465:                  String.zeroBasedStringValue(CharData.LOWER[6]),
2466:                  String.zeroBasedStringValue(CharData.LOWER[7]),
2467:                  String.zeroBasedStringValue(CharData.LOWER[8]),
2468:                  String.zeroBasedStringValue(CharData.LOWER[9]),
2469:                  String.zeroBasedStringValue(CharData.LOWER[10]),
2470:                  String.zeroBasedStringValue(CharData.LOWER[11]),
2471:                  String.zeroBasedStringValue(CharData.LOWER[12]),
2472:                  String.zeroBasedStringValue(CharData.LOWER[13]),
2473:                  String.zeroBasedStringValue(CharData.LOWER[14]),
2474:                  String.zeroBasedStringValue(CharData.LOWER[15]),
2475:                  String.zeroBasedStringValue(CharData.LOWER[16])};
2476: 
2477:   /**
2478:    * Stores unicode direction attribute table. Exploit package visibility
2479:    * of String.value to avoid copying the array.
2480:    * @see CharData#DIRECTION
2481:    */
2482:   // Package visible for use by String.
2483:   static final char[][] direction = 
2484:     new char[][]{
2485:                  String.zeroBasedStringValue(CharData.DIRECTION[0]),
2486:                  String.zeroBasedStringValue(CharData.DIRECTION[1]),
2487:                  String.zeroBasedStringValue(CharData.DIRECTION[2]),
2488:                  String.zeroBasedStringValue(CharData.DIRECTION[3]),
2489:                  String.zeroBasedStringValue(CharData.DIRECTION[4]),
2490:                  String.zeroBasedStringValue(CharData.DIRECTION[5]),
2491:                  String.zeroBasedStringValue(CharData.DIRECTION[6]),
2492:                  String.zeroBasedStringValue(CharData.DIRECTION[7]),
2493:                  String.zeroBasedStringValue(CharData.DIRECTION[8]),
2494:                  String.zeroBasedStringValue(CharData.DIRECTION[9]),
2495:                  String.zeroBasedStringValue(CharData.DIRECTION[10]),
2496:                  String.zeroBasedStringValue(CharData.DIRECTION[11]),
2497:                  String.zeroBasedStringValue(CharData.DIRECTION[12]),
2498:                  String.zeroBasedStringValue(CharData.DIRECTION[13]),
2499:                  String.zeroBasedStringValue(CharData.DIRECTION[14]),
2500:                  String.zeroBasedStringValue(CharData.DIRECTION[15]),
2501:                  String.zeroBasedStringValue(CharData.DIRECTION[16])};
2502: 
2503:   /**
2504:    * Stores unicode titlecase table. Exploit package visibility of
2505:    * String.value to avoid copying the array.
2506:    * @see CharData#TITLE
2507:    */
2508:   private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);  
2509: 
2510:   /**
2511:    * Mask for grabbing the type out of the contents of data.
2512:    * @see CharData#DATA
2513:    */
2514:   private static final int TYPE_MASK = 0x1F;
2515: 
2516:   /**
2517:    * Mask for grabbing the non-breaking space flag out of the contents of
2518:    * data.
2519:    * @see CharData#DATA
2520:    */
2521:   private static final int NO_BREAK_MASK = 0x20;
2522: 
2523:   /**
2524:    * Mask for grabbing the mirrored directionality flag out of the contents
2525:    * of data.
2526:    * @see CharData#DATA
2527:    */
2528:   private static final int MIRROR_MASK = 0x40;
2529: 
2530:   /**
2531:    * Min value for supplementary code point.
2532:    *
2533:    * @since 1.5
2534:    */
2535:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
2536: 
2537:   /**
2538:    * Min value for code point.
2539:    *
2540:    * @since 1.5
2541:    */
2542:   public static final int MIN_CODE_POINT = 0; 
2543:  
2544:  
2545:   /**
2546:    * Max value for code point.
2547:    *
2548:    * @since 1.5
2549:    */
2550:   public static final int MAX_CODE_POINT = 0x010ffff;
2551: 
2552: 
2553:   /**
2554:    * Minimum high surrogate code in UTF-16 encoding.
2555:    *
2556:    * @since 1.5
2557:    */
2558:   public static final char MIN_HIGH_SURROGATE = '\ud800';
2559: 
2560:   /**
2561:    * Maximum high surrogate code in UTF-16 encoding.
2562:    *
2563:    * @since 1.5
2564:    */
2565:   public static final char MAX_HIGH_SURROGATE = '\udbff';
2566:  
2567:   /**
2568:    * Minimum low surrogate code in UTF-16 encoding.
2569:    *
2570:    * @since 1.5
2571:    */
2572:   public static final char MIN_LOW_SURROGATE = '\udc00';
2573: 
2574:   /**
2575:    * Maximum low surrogate code in UTF-16 encoding.
2576:    *
2577:    * @since 1.5
2578:    */
2579:   public static final char MAX_LOW_SURROGATE = '\udfff';
2580: 
2581:   /**
2582:    * Minimum surrogate code in UTF-16 encoding.
2583:    *
2584:    * @since 1.5
2585:    */
2586:   public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
2587: 
2588:   /**
2589:    * Maximum low surrogate code in UTF-16 encoding.
2590:    *
2591:    * @since 1.5
2592:    */
2593:   public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
2594: 
2595:   /**
2596:    * Grabs an attribute offset from the Unicode attribute database. The lower
2597:    * 5 bits are the character type, the next 2 bits are flags, and the top
2598:    * 9 bits are the offset into the attribute tables.
2599:    *
2600:    * @param codePoint the character to look up
2601:    * @return the character's attribute offset and type
2602:    * @see #TYPE_MASK
2603:    * @see #NO_BREAK_MASK
2604:    * @see #MIRROR_MASK
2605:    * @see CharData#DATA
2606:    * @see CharData#SHIFT
2607:    */
2608:   static char readCodePoint(int codePoint)
2609:   {
2610:     int plane = codePoint >>> 16;
2611:     char offset = (char) (codePoint & 0xffff);
2612:     return data[plane][(char) (blocks[plane][offset >> CharData.SHIFT[plane]] + offset)];
2613:   }
2614: 
2615:   /**
2616:    * Wraps up a character.
2617:    *
2618:    * @param value the character to wrap
2619:    */
2620:   public Character(char value)
2621:   {
2622:     this.value = value;
2623:   }
2624: 
2625:   /**
2626:    * Returns the character which has been wrapped by this class.
2627:    *
2628:    * @return the character wrapped
2629:    */
2630:   public char charValue()
2631:   {
2632:     return value;
2633:   }
2634: 
2635:   /**
2636:    * Returns the numerical value (unsigned) of the wrapped character.
2637:    * Range of returned values: 0x0000-0xFFFF.
2638:    *
2639:    * @return the value of the wrapped character
2640:    */
2641:   public int hashCode()
2642:   {
2643:     return value;
2644:   }
2645: 
2646:   /**
2647:    * Determines if an object is equal to this object. This is only true for
2648:    * another Character object wrapping the same value.
2649:    *
2650:    * @param o object to compare
2651:    * @return true if o is a Character with the same value
2652:    */
2653:   public boolean equals(Object o)
2654:   {
2655:     return o instanceof Character && value == ((Character) o).value;
2656:   }
2657: 
2658:   /**
2659:    * Converts the wrapped character into a String.
2660:    *
2661:    * @return a String containing one character -- the wrapped character
2662:    *         of this instance
2663:    */
2664:   public String toString()
2665:   {
2666:     // Package constructor avoids an array copy.
2667:     return new String(new char[] { value }, 0, 1, true);
2668:   }
2669: 
2670:   /**
2671:    * Returns a String of length 1 representing the specified character.
2672:    *
2673:    * @param ch the character to convert
2674:    * @return a String containing the character
2675:    * @since 1.4
2676:    */
2677:   public static String toString(char ch)
2678:   {
2679:     // Package constructor avoids an array copy.
2680:     return new String(new char[] { ch }, 0, 1, true);
2681:   }
2682: 
2683:   /**
2684:    * Determines if a character is a Unicode lowercase letter. For example,
2685:    * <code>'a'</code> is lowercase.  Returns true if getType() returns
2686:    * LOWERCASE_LETTER.
2687:    * <br>
2688:    * lowercase = [Ll]
2689:    *
2690:    * @param ch character to test
2691:    * @return true if ch is a Unicode lowercase letter, else false
2692:    * @see #isUpperCase(char)
2693:    * @see #isTitleCase(char)
2694:    * @see #toLowerCase(char)
2695:    * @see #getType(char)
2696:    */
2697:   public static boolean isLowerCase(char ch)
2698:   {
2699:     return isLowerCase((int)ch);
2700:   }
2701:   
2702:   /**
2703:    * Determines if a character is a Unicode lowercase letter. For example,
2704:    * <code>'a'</code> is lowercase.  Returns true if getType() returns
2705:    * LOWERCASE_LETTER.
2706:    * <br>
2707:    * lowercase = [Ll]
2708:    *
2709:    * @param codePoint character to test
2710:    * @return true if ch is a Unicode lowercase letter, else false
2711:    * @see #isUpperCase(char)
2712:    * @see #isTitleCase(char)
2713:    * @see #toLowerCase(char)
2714:    * @see #getType(char)
2715:    * 
2716:    * @since 1.5
2717:    */
2718:   public static boolean isLowerCase(int codePoint)
2719:   {
2720:     return getType(codePoint) == LOWERCASE_LETTER;
2721:   }
2722: 
2723:   /**
2724:    * Determines if a character is a Unicode uppercase letter. For example,
2725:    * <code>'A'</code> is uppercase.  Returns true if getType() returns
2726:    * UPPERCASE_LETTER.
2727:    * <br>
2728:    * uppercase = [Lu]
2729:    *
2730:    * @param ch character to test
2731:    * @return true if ch is a Unicode uppercase letter, else false
2732:    * @see #isLowerCase(char)
2733:    * @see #isTitleCase(char)
2734:    * @see #toUpperCase(char)
2735:    * @see #getType(char)
2736:    */
2737:   public static boolean isUpperCase(char ch)
2738:   {
2739:     return isUpperCase((int)ch);
2740:   }
2741:   
2742:   /**
2743:    * Determines if a character is a Unicode uppercase letter. For example,
2744:    * <code>'A'</code> is uppercase.  Returns true if getType() returns
2745:    * UPPERCASE_LETTER.
2746:    * <br>
2747:    * uppercase = [Lu]
2748:    *
2749:    * @param codePoint character to test
2750:    * @return true if ch is a Unicode uppercase letter, else false
2751:    * @see #isLowerCase(char)
2752:    * @see #isTitleCase(char)
2753:    * @see #toUpperCase(char)
2754:    * @see #getType(char)
2755:    * 
2756:    * @since 1.5
2757:    */
2758:   public static boolean isUpperCase(int codePoint)
2759:   {
2760:     return getType(codePoint) == UPPERCASE_LETTER;
2761:   }
2762: 
2763:   /**
2764:    * Determines if a character is a Unicode titlecase letter. For example,
2765:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2766:    * True if getType() returns TITLECASE_LETTER.
2767:    * <br>
2768:    * titlecase = [Lt]
2769:    *
2770:    * @param ch character to test
2771:    * @return true if ch is a Unicode titlecase letter, else false
2772:    * @see #isLowerCase(char)
2773:    * @see #isUpperCase(char)
2774:    * @see #toTitleCase(char)
2775:    * @see #getType(char)
2776:    */
2777:   public static boolean isTitleCase(char ch)
2778:   {
2779:     return isTitleCase((int)ch);
2780:   }
2781:   
2782:   /**
2783:    * Determines if a character is a Unicode titlecase letter. For example,
2784:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2785:    * True if getType() returns TITLECASE_LETTER.
2786:    * <br>
2787:    * titlecase = [Lt]
2788:    *
2789:    * @param codePoint character to test
2790:    * @return true if ch is a Unicode titlecase letter, else false
2791:    * @see #isLowerCase(char)
2792:    * @see #isUpperCase(char)
2793:    * @see #toTitleCase(char)
2794:    * @see #getType(char)
2795:    * 
2796:    * @since 1.5
2797:    */
2798:   public static boolean isTitleCase(int codePoint)
2799:   {
2800:     return getType(codePoint) == TITLECASE_LETTER;
2801:   }
2802:   
2803: 
2804:   /**
2805:    * Determines if a character is a Unicode decimal digit. For example,
2806:    * <code>'0'</code> is a digit.  A character is a Unicode digit if
2807:    * getType() returns DECIMAL_DIGIT_NUMBER.
2808:    * <br>
2809:    * Unicode decimal digit = [Nd]
2810:    *
2811:    * @param ch character to test
2812:    * @return true if ch is a Unicode decimal digit, else false
2813:    * @see #digit(char, int)
2814:    * @see #forDigit(int, int)
2815:    * @see #getType(char)
2816:    */
2817:   public static boolean isDigit(char ch)
2818:   {
2819:     return isDigit((int)ch);
2820:   }
2821:   
2822:   /**
2823:    * Determines if a character is a Unicode decimal digit. For example,
2824:    * <code>'0'</code> is a digit. A character is a Unicode digit if
2825:    * getType() returns DECIMAL_DIGIT_NUMBER.
2826:    * <br>
2827:    * Unicode decimal digit = [Nd]
2828:    *
2829:    * @param codePoint character to test
2830:    * @return true if ch is a Unicode decimal digit, else false
2831:    * @see #digit(char, int)
2832:    * @see #forDigit(int, int)
2833:    * @see #getType(char)
2834:    * 
2835:    * @since 1.5
2836:    */
2837: 
2838:   public static boolean isDigit(int codePoint)
2839:   {
2840:     return getType(codePoint) == DECIMAL_DIGIT_NUMBER;
2841:   }
2842: 
2843:   /**
2844:    * Determines if a character is part of the Unicode Standard. This is an
2845:    * evolving standard, but covers every character in the data file.
2846:    * <br>
2847:    * defined = not [Cn]
2848:    *
2849:    * @param ch character to test
2850:    * @return true if ch is a Unicode character, else false
2851:    * @see #isDigit(char)
2852:    * @see #isLetter(char)
2853:    * @see #isLetterOrDigit(char)
2854:    * @see #isLowerCase(char)
2855:    * @see #isTitleCase(char)
2856:    * @see #isUpperCase(char)
2857:    */
2858:   public static boolean isDefined(char ch)
2859:   {
2860:     return isDefined((int)ch);
2861:   }
2862:   
2863:   /**
2864:    * Determines if a character is part of the Unicode Standard. This is an
2865:    * evolving standard, but covers every character in the data file.
2866:    * <br>
2867:    * defined = not [Cn]
2868:    *
2869:    * @param codePoint character to test
2870:    * @return true if ch is a Unicode character, else false
2871:    * @see #isDigit(char)
2872:    * @see #isLetter(char)
2873:    * @see #isLetterOrDigit(char)
2874:    * @see #isLowerCase(char)
2875:    * @see #isTitleCase(char)
2876:    * @see #isUpperCase(char)
2877:    * 
2878:    * @since 1.5
2879:    */
2880:   public static boolean isDefined(int codePoint)
2881:   {
2882:     return getType(codePoint) != UNASSIGNED;
2883:   }
2884: 
2885:   /**
2886:    * Determines if a character is a Unicode letter. Not all letters have case,
2887:    * so this may return true when isLowerCase and isUpperCase return false.
2888:    * A character is a Unicode letter if getType() returns one of 
2889:    * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2890:    * or OTHER_LETTER.
2891:    * <br>
2892:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2893:    *
2894:    * @param ch character to test
2895:    * @return true if ch is a Unicode letter, else false
2896:    * @see #isDigit(char)
2897:    * @see #isJavaIdentifierStart(char)
2898:    * @see #isJavaLetter(char)
2899:    * @see #isJavaLetterOrDigit(char)
2900:    * @see #isLetterOrDigit(char)
2901:    * @see #isLowerCase(char)
2902:    * @see #isTitleCase(char)
2903:    * @see #isUnicodeIdentifierStart(char)
2904:    * @see #isUpperCase(char)
2905:    */
2906:   public static boolean isLetter(char ch)
2907:   {
2908:     return isLetter((int)ch);
2909:   }
2910:   
2911:   /**
2912:    * Determines if a character is a Unicode letter. Not all letters have case,
2913:    * so this may return true when isLowerCase and isUpperCase return false.
2914:    * A character is a Unicode letter if getType() returns one of 
2915:    * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2916:    * or OTHER_LETTER.
2917:    * <br>
2918:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2919:    *
2920:    * @param codePoint character to test
2921:    * @return true if ch is a Unicode letter, else false
2922:    * @see #isDigit(char)
2923:    * @see #isJavaIdentifierStart(char)
2924:    * @see #isJavaLetter(char)
2925:    * @see #isJavaLetterOrDigit(char)
2926:    * @see #isLetterOrDigit(char)
2927:    * @see #isLowerCase(char)
2928:    * @see #isTitleCase(char)
2929:    * @see #isUnicodeIdentifierStart(char)
2930:    * @see #isUpperCase(char)
2931:    * 
2932:    * @since 1.5
2933:    */
2934:   public static boolean isLetter(int codePoint)
2935:   {
2936:     return ((1 << getType(codePoint))
2937:         & ((1 << UPPERCASE_LETTER)
2938:             | (1 << LOWERCASE_LETTER)
2939:             | (1 << TITLECASE_LETTER)
2940:             | (1 << MODIFIER_LETTER)
2941:             | (1 << OTHER_LETTER))) != 0;
2942:   }
2943:   /**
2944:    * Returns the index into the given CharSequence that is offset
2945:    * <code>codePointOffset</code> code points from <code>index</code>.
2946:    * @param seq the CharSequence
2947:    * @param index the start position in the CharSequence
2948:    * @param codePointOffset the number of code points offset from the start
2949:    * position
2950:    * @return the index into the CharSequence that is codePointOffset code 
2951:    * points offset from index
2952:    * 
2953:    * @throws NullPointerException if seq is null
2954:    * @throws IndexOutOfBoundsException if index is negative or greater than the
2955:    * length of the sequence.
2956:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the 
2957:    * subsequence from index to the end of seq has fewer than codePointOffset
2958:    * code points
2959:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2960:    * subsequence from the start of seq to index has fewer than 
2961:    * (-codePointOffset) code points
2962:    * @since 1.5
2963:    */
2964:   public static int offsetByCodePoints(CharSequence seq,
2965:                                        int index,
2966:                                        int codePointOffset)
2967:   {
2968:     int len = seq.length();
2969:     if (index < 0 || index > len)
2970:       throw new IndexOutOfBoundsException();
2971:     
2972:     int numToGo = codePointOffset;
2973:     int offset = index;
2974:     int adjust = 1;
2975:     if (numToGo >= 0)
2976:       {
2977:         for (; numToGo > 0; offset++)
2978:           {
2979:             numToGo--;
2980:             if (Character.isHighSurrogate(seq.charAt(offset))
2981:                 && (offset + 1) < len
2982:                 && Character.isLowSurrogate(seq.charAt(offset + 1)))
2983:               offset++;
2984:           }
2985:         return offset;
2986:       }
2987:     else
2988:       {
2989:         numToGo *= -1;
2990:         for (; numToGo > 0;)
2991:           {
2992:             numToGo--;
2993:             offset--;
2994:             if (Character.isLowSurrogate(seq.charAt(offset))
2995:                 && (offset - 1) >= 0
2996:                 && Character.isHighSurrogate(seq.charAt(offset - 1)))
2997:               offset--;
2998:           }
2999:         return offset;
3000:       }
3001:   }
3002:   
3003:   /**
3004:    * Returns the index into the given char subarray that is offset
3005:    * <code>codePointOffset</code> code points from <code>index</code>.
3006:    * @param a the char array
3007:    * @param start the start index of the subarray
3008:    * @param count the length of the subarray
3009:    * @param index the index to be offset
3010:    * @param codePointOffset the number of code points offset from <code>index
3011:    * </code>
3012:    * @return the index into the char array
3013:    * 
3014:    * @throws NullPointerException if a is null
3015:    * @throws IndexOutOfBoundsException if start or count is negative or if
3016:    * start + count is greater than the length of the array
3017:    * @throws IndexOutOfBoundsException if index is less than start or larger 
3018:    * than start + count
3019:    * @throws IndexOutOfBoundsException if codePointOffset is positive and the
3020:    * subarray from index to start + count - 1 has fewer than codePointOffset
3021:    * code points.
3022:    * @throws IndexOutOfBoundsException if codePointOffset is negative and the
3023:    * subarray from start to index - 1 has fewer than (-codePointOffset) code
3024:    * points
3025:    * 
3026:    * @since 1.5
3027:    */
3028:   public static int offsetByCodePoints(char[] a,
3029:                                        int start,
3030:                                        int count,
3031:                                        int index,
3032:                                        int codePointOffset)
3033:   {
3034:     int len = a.length;
3035:     int end = start + count;
3036:     if (start < 0 || count < 0 || end > len || index < start || index > end)
3037:       throw new IndexOutOfBoundsException();
3038:     
3039:     int numToGo = codePointOffset;
3040:     int offset = index;
3041:     int adjust = 1;
3042:     if (numToGo >= 0)
3043:       {
3044:         for (; numToGo > 0; offset++)
3045:           {
3046:             numToGo--;
3047:             if (Character.isHighSurrogate(a[offset])
3048:                 && (offset + 1) < len
3049:                 && Character.isLowSurrogate(a[offset + 1]))
3050:               offset++;
3051:           }
3052:         return offset;
3053:       }
3054:     else
3055:       {
3056:         numToGo *= -1;
3057:         for (; numToGo > 0;)
3058:           {
3059:             numToGo--;
3060:             offset--;
3061:             if (Character.isLowSurrogate(a[offset])
3062:                 && (offset - 1) >= 0
3063:                 && Character.isHighSurrogate(a[offset - 1]))
3064:               offset--;
3065:             if (offset < start)
3066:               throw new IndexOutOfBoundsException();
3067:           }
3068:         return offset;
3069:       }
3070: 
3071:   }
3072:   
3073:   /**
3074:    * Returns the number of Unicode code points in the specified range of the
3075:    * given CharSequence.  The first char in the range is at position
3076:    * beginIndex and the last one is at position endIndex - 1.  Paired 
3077:    * surrogates (supplementary characters are represented by a pair of chars - 
3078:    * one from the high surrogates and one from the low surrogates) 
3079:    * count as just one code point.
3080:    * @param seq the CharSequence to inspect
3081:    * @param beginIndex the beginning of the range
3082:    * @param endIndex the end of the range
3083:    * @return the number of Unicode code points in the given range of the 
3084:    * sequence
3085:    * @throws NullPointerException if seq is null
3086:    * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
3087:    * larger than the length of seq, or if beginIndex is greater than endIndex.
3088:    * @since 1.5
3089:    */
3090:   public static int codePointCount(CharSequence seq, int beginIndex,
3091:                                    int endIndex)
3092:   {
3093:     int len = seq.length();
3094:     if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
3095:       throw new IndexOutOfBoundsException();
3096:         
3097:     int count = 0;
3098:     for (int i = beginIndex; i < endIndex; i++)
3099:       {
3100:         count++;
3101:         // If there is a pairing, count it only once.
3102:         if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
3103:             && isLowSurrogate(seq.charAt(i + 1)))
3104:           i ++;
3105:       }    
3106:     return count;
3107:   }
3108:   
3109:   /**
3110:    * Returns the number of Unicode code points in the specified range of the
3111:    * given char array.  The first char in the range is at position
3112:    * offset and the length of the range is count.  Paired surrogates
3113:    * (supplementary characters are represented by a pair of chars - 
3114:    * one from the high surrogates and one from the low surrogates) 
3115:    * count as just one code point.
3116:    * @param a the char array to inspect
3117:    * @param offset the beginning of the range
3118:    * @param count the length of the range
3119:    * @return the number of Unicode code points in the given range of the 
3120:    * array
3121:    * @throws NullPointerException if a is null
3122:    * @throws IndexOutOfBoundsException if offset or count is negative or if 
3123:    * offset + countendIndex is larger than the length of a.
3124:    * @since 1.5
3125:    */
3126:   public static int codePointCount(char[] a, int offset,
3127:                                    int count)
3128:   {
3129:     int len = a.length;
3130:     int end = offset + count;
3131:     if (offset < 0 || count < 0 || end > len)
3132:       throw new IndexOutOfBoundsException();
3133:         
3134:     int counter = 0;
3135:     for (int i = offset; i < end; i++)
3136:       {
3137:         counter++;
3138:         // If there is a pairing, count it only once.
3139:         if (isHighSurrogate(a[i]) && (i + 1) < end
3140:             && isLowSurrogate(a[i + 1]))
3141:           i ++;
3142:       }    
3143:     return counter;
3144:   }
3145: 
3146:   /**
3147:    * Determines if a character is a Unicode letter or a Unicode digit. This
3148:    * is the combination of isLetter and isDigit.
3149:    * <br>
3150:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3151:    *
3152:    * @param ch character to test
3153:    * @return true if ch is a Unicode letter or a Unicode digit, else false
3154:    * @see #isDigit(char)
3155:    * @see #isJavaIdentifierPart(char)
3156:    * @see #isJavaLetter(char)
3157:    * @see #isJavaLetterOrDigit(char)
3158:    * @see #isLetter(char)
3159:    * @see #isUnicodeIdentifierPart(char)
3160:    */
3161:   public static boolean isLetterOrDigit(char ch)
3162:   {
3163:     return isLetterOrDigit((int)ch);
3164:   }
3165: 
3166:   /**
3167:    * Determines if a character is a Unicode letter or a Unicode digit. This
3168:    * is the combination of isLetter and isDigit.
3169:    * <br>
3170:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3171:    *
3172:    * @param codePoint character to test
3173:    * @return true if ch is a Unicode letter or a Unicode digit, else false
3174:    * @see #isDigit(char)
3175:    * @see #isJavaIdentifierPart(char)
3176:    * @see #isJavaLetter(char)
3177:    * @see #isJavaLetterOrDigit(char)
3178:    * @see #isLetter(char)
3179:    * @see #isUnicodeIdentifierPart(char)
3180:    * 
3181:    * @since 1.5
3182:    */
3183:   public static boolean isLetterOrDigit(int codePoint)
3184:   {
3185:     return ((1 << getType(codePoint))
3186:         & ((1 << UPPERCASE_LETTER)
3187:            | (1 << LOWERCASE_LETTER)
3188:            | (1 << TITLECASE_LETTER)
3189:            | (1 << MODIFIER_LETTER)
3190:            | (1 << OTHER_LETTER)
3191:            | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
3192:   }
3193:   
3194:   /**
3195:    * Determines if a character can start a Java identifier. This is the
3196:    * combination of isLetter, any character where getType returns
3197:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3198:    * (like '_').
3199:    *
3200:    * @param ch character to test
3201:    * @return true if ch can start a Java identifier, else false
3202:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
3203:    * @see #isJavaLetterOrDigit(char)
3204:    * @see #isJavaIdentifierStart(char)
3205:    * @see #isJavaIdentifierPart(char)
3206:    * @see #isLetter(char)
3207:    * @see #isLetterOrDigit(char)
3208:    * @see #isUnicodeIdentifierStart(char)
3209:    */
3210:   public static boolean isJavaLetter(char ch)
3211:   {
3212:     return isJavaIdentifierStart(ch);
3213:   }
3214: 
3215:   /**
3216:    * Determines if a character can follow the first letter in
3217:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3218:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3219:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3220:    * or isIdentifierIgnorable.
3221:    *
3222:    * @param ch character to test
3223:    * @return true if ch can follow the first letter in a Java identifier
3224:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
3225:    * @see #isJavaLetter(char)
3226:    * @see #isJavaIdentifierStart(char)
3227:    * @see #isJavaIdentifierPart(char)
3228:    * @see #isLetter(char)
3229:    * @see #isLetterOrDigit(char)
3230:    * @see #isUnicodeIdentifierPart(char)
3231:    * @see #isIdentifierIgnorable(char)
3232:    */
3233:   public static boolean isJavaLetterOrDigit(char ch)
3234:   {
3235:     return isJavaIdentifierPart(ch);
3236:   }
3237: 
3238:   /**
3239:    * Determines if a character can start a Java identifier. This is the
3240:    * combination of isLetter, any character where getType returns
3241:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3242:    * (like '_').
3243:    * <br>
3244:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3245:    *
3246:    * @param ch character to test
3247:    * @return true if ch can start a Java identifier, else false
3248:    * @see #isJavaIdentifierPart(char)
3249:    * @see #isLetter(char)
3250:    * @see #isUnicodeIdentifierStart(char)
3251:    * @since 1.1
3252:    */
3253:   public static boolean isJavaIdentifierStart(char ch)
3254:   {
3255:     return isJavaIdentifierStart((int)ch);
3256:   }
3257: 
3258:   /**
3259:    * Determines if a character can start a Java identifier. This is the
3260:    * combination of isLetter, any character where getType returns
3261:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3262:    * (like '_').
3263:    * <br>
3264:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3265:    *
3266:    * @param codePoint character to test
3267:    * @return true if ch can start a Java identifier, else false
3268:    * @see #isJavaIdentifierPart(char)
3269:    * @see #isLetter(char)
3270:    * @see #isUnicodeIdentifierStart(char)
3271:    * @since 1.5
3272:    */
3273:   public static boolean isJavaIdentifierStart(int codePoint)
3274:   {
3275:     return ((1 << getType(codePoint))
3276:             & ((1 << UPPERCASE_LETTER)
3277:                | (1 << LOWERCASE_LETTER)
3278:                | (1 << TITLECASE_LETTER)
3279:                | (1 << MODIFIER_LETTER)
3280:                | (1 << OTHER_LETTER)
3281:                | (1 << LETTER_NUMBER)
3282:                | (1 << CURRENCY_SYMBOL)
3283:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
3284:   }
3285:   
3286:   /**
3287:    * Determines if a character can follow the first letter in
3288:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3289:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3290:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3291:    * or isIdentifierIgnorable.
3292:    * <br>
3293:    * Java identifier extender =
3294:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3295:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3296:    *
3297:    * @param ch character to test
3298:    * @return true if ch can follow the first letter in a Java identifier
3299:    * @see #isIdentifierIgnorable(char)
3300:    * @see #isJavaIdentifierStart(char)
3301:    * @see #isLetterOrDigit(char)
3302:    * @see #isUnicodeIdentifierPart(char)
3303:    * @since 1.1
3304:    */
3305:   public static boolean isJavaIdentifierPart(char ch)
3306:   {
3307:     return isJavaIdentifierPart((int)ch);
3308:   }
3309:   
3310:   /**
3311:    * Determines if a character can follow the first letter in
3312:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3313:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3314:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3315:    * or isIdentifierIgnorable.
3316:    * <br>
3317:    * Java identifier extender =
3318:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3319:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3320:    *
3321:    * @param codePoint character to test
3322:    * @return true if ch can follow the first letter in a Java identifier
3323:    * @see #isIdentifierIgnorable(char)
3324:    * @see #isJavaIdentifierStart(char)
3325:    * @see #isLetterOrDigit(char)
3326:    * @see #isUnicodeIdentifierPart(char)
3327:    * @since 1.5
3328:    */
3329:   public static boolean isJavaIdentifierPart(int codePoint)
3330:   {
3331:     int category = getType(codePoint);
3332:     return ((1 << category)
3333:             & ((1 << UPPERCASE_LETTER)
3334:                | (1 << LOWERCASE_LETTER)
3335:                | (1 << TITLECASE_LETTER)
3336:                | (1 << MODIFIER_LETTER)
3337:                | (1 << OTHER_LETTER)
3338:                | (1 << NON_SPACING_MARK)
3339:                | (1 << COMBINING_SPACING_MARK)
3340:                | (1 << DECIMAL_DIGIT_NUMBER)
3341:                | (1 << LETTER_NUMBER)
3342:                | (1 << CURRENCY_SYMBOL)
3343:                | (1 << CONNECTOR_PUNCTUATION)
3344:                | (1 << FORMAT))) != 0
3345:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
3346:   }
3347: 
3348:   /**
3349:    * Determines if a character can start a Unicode identifier.  Only
3350:    * letters can start a Unicode identifier, but this includes characters
3351:    * in LETTER_NUMBER.
3352:    * <br>
3353:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3354:    *
3355:    * @param ch character to test
3356:    * @return true if ch can start a Unicode identifier, else false
3357:    * @see #isJavaIdentifierStart(char)
3358:    * @see #isLetter(char)
3359:    * @see #isUnicodeIdentifierPart(char)
3360:    * @since 1.1
3361:    */
3362:   public static boolean isUnicodeIdentifierStart(char ch)
3363:   {
3364:     return isUnicodeIdentifierStart((int)ch);
3365:   }
3366: 
3367:   /**
3368:    * Determines if a character can start a Unicode identifier.  Only
3369:    * letters can start a Unicode identifier, but this includes characters
3370:    * in LETTER_NUMBER.
3371:    * <br>
3372:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3373:    *
3374:    * @param codePoint character to test
3375:    * @return true if ch can start a Unicode identifier, else false
3376:    * @see #isJavaIdentifierStart(char)
3377:    * @see #isLetter(char)
3378:    * @see #isUnicodeIdentifierPart(char)
3379:    * @since 1.5
3380:    */
3381:   public static boolean isUnicodeIdentifierStart(int codePoint)
3382:   {
3383:     return ((1 << getType(codePoint))
3384:             & ((1 << UPPERCASE_LETTER)
3385:                | (1 << LOWERCASE_LETTER)
3386:                | (1 << TITLECASE_LETTER)
3387:                | (1 << MODIFIER_LETTER)
3388:                | (1 << OTHER_LETTER)
3389:                | (1 << LETTER_NUMBER))) != 0;
3390:   }
3391: 
3392:   /**
3393:    * Determines if a character can follow the first letter in
3394:    * a Unicode identifier. This includes letters, connecting punctuation,
3395:    * digits, numeric letters, combining marks, non-spacing marks, and
3396:    * isIdentifierIgnorable.
3397:    * <br>
3398:    * Unicode identifier extender =
3399:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3400:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3401:    *
3402:    * @param ch character to test
3403:    * @return true if ch can follow the first letter in a Unicode identifier
3404:    * @see #isIdentifierIgnorable(char)
3405:    * @see #isJavaIdentifierPart(char)
3406:    * @see #isLetterOrDigit(char)
3407:    * @see #isUnicodeIdentifierStart(char)
3408:    * @since 1.1
3409:    */
3410:   public static boolean isUnicodeIdentifierPart(char ch)
3411:   {
3412:     return isUnicodeIdentifierPart((int)ch);
3413:   }
3414:   
3415:   /**
3416:    * Determines if a character can follow the first letter in
3417:    * a Unicode identifier. This includes letters, connecting punctuation,
3418:    * digits, numeric letters, combining marks, non-spacing marks, and
3419:    * isIdentifierIgnorable.
3420:    * <br>
3421:    * Unicode identifier extender =
3422:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3423:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3424:    *
3425:    * @param codePoint character to test
3426:    * @return true if ch can follow the first letter in a Unicode identifier
3427:    * @see #isIdentifierIgnorable(char)
3428:    * @see #isJavaIdentifierPart(char)
3429:    * @see #isLetterOrDigit(char)
3430:    * @see #isUnicodeIdentifierStart(char)
3431:    * @since 1.5
3432:    */
3433:   public static boolean isUnicodeIdentifierPart(int codePoint)
3434:   {
3435:     int category = getType(codePoint);
3436:     return ((1 << category)
3437:             & ((1 << UPPERCASE_LETTER)
3438:                | (1 << LOWERCASE_LETTER)
3439:                | (1 << TITLECASE_LETTER)
3440:                | (1 << MODIFIER_LETTER)
3441:                | (1 << OTHER_LETTER)
3442:                | (1 << NON_SPACING_MARK)
3443:                | (1 << COMBINING_SPACING_MARK)
3444:                | (1 << DECIMAL_DIGIT_NUMBER)
3445:                | (1 << LETTER_NUMBER)
3446:                | (1 << CONNECTOR_PUNCTUATION)
3447:                | (1 << FORMAT))) != 0
3448:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
3449:   }
3450: 
3451:   /**
3452:    * Determines if a character is ignorable in a Unicode identifier. This
3453:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3454:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3455:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3456:    * <code>'\u009F'</code>), and FORMAT characters.
3457:    * <br>
3458:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3459:    *    |U+007F-U+009F
3460:    *
3461:    * @param ch character to test
3462:    * @return true if ch is ignorable in a Unicode or Java identifier
3463:    * @see #isJavaIdentifierPart(char)
3464:    * @see #isUnicodeIdentifierPart(char)
3465:    * @since 1.1
3466:    */
3467:   public static boolean isIdentifierIgnorable(char ch)
3468:   {
3469:     return isIdentifierIgnorable((int)ch);
3470:   }
3471:   /**
3472:    * Determines if a character is ignorable in a Unicode identifier. This
3473:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3474:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3475:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3476:    * <code>'\u009F'</code>), and FORMAT characters.
3477:    * <br>
3478:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3479:    *    |U+007F-U+009F
3480:    *
3481:    * @param codePoint character to test
3482:    * @return true if ch is ignorable in a Unicode or Java identifier
3483:    * @see #isJavaIdentifierPart(char)
3484:    * @see #isUnicodeIdentifierPart(char)
3485:    * @since 1.5
3486:    */
3487:   public static boolean isIdentifierIgnorable(int codePoint)
3488:   {
3489:     if ((codePoint >= 0 && codePoint <= 0x0008)
3490:         || (codePoint >= 0x000E && codePoint <= 0x001B)
3491:         || (codePoint >= 0x007F && codePoint <= 0x009F)
3492:         || getType(codePoint) == FORMAT)
3493:       return true;
3494:     return false;
3495:   }
3496: 
3497:   /**
3498:    * Converts a Unicode character into its lowercase equivalent mapping.
3499:    * If a mapping does not exist, then the character passed is returned.
3500:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3501:    *
3502:    * @param ch character to convert to lowercase
3503:    * @return lowercase mapping of ch, or ch if lowercase mapping does
3504:    *         not exist
3505:    * @see #isLowerCase(char)
3506:    * @see #isUpperCase(char)
3507:    * @see #toTitleCase(char)
3508:    * @see #toUpperCase(char)
3509:    */
3510:   public static char toLowerCase(char ch)
3511:   {
3512:     return (char) (lower[0][readCodePoint((int)ch) >>> 7] + ch);
3513:   }
3514:   
3515:   /**
3516:    * Converts a Unicode character into its lowercase equivalent mapping.
3517:    * If a mapping does not exist, then the character passed is returned.
3518:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3519:    *
3520:    * @param codePoint character to convert to lowercase
3521:    * @return lowercase mapping of ch, or ch if lowercase mapping does
3522:    *         not exist
3523:    * @see #isLowerCase(char)
3524:    * @see #isUpperCase(char)
3525:    * @see #toTitleCase(char)
3526:    * @see #toUpperCase(char)
3527:    * 
3528:    * @since 1.5
3529:    */
3530:   public static int toLowerCase(int codePoint)
3531:   {
3532:     // If the code point is unassigned or in one of the private use areas
3533:     // then we delegate the call to the appropriate private static inner class.
3534:     int plane = codePoint >>> 16;
3535:     if (plane > 2 && plane < 14)
3536:       return UnassignedCharacters.toLowerCase(codePoint);
3537:     if (plane > 14)
3538:       return PrivateUseCharacters.toLowerCase(codePoint);
3539:     
3540:     // The short value stored in lower[plane] is the signed difference between
3541:     // codePoint and its lowercase conversion.
3542:     return ((short)lower[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3543:   }
3544: 
3545:   /**
3546:    * Converts a Unicode character into its uppercase equivalent mapping.
3547:    * If a mapping does not exist, then the character passed is returned.
3548:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3549:    *
3550:    * @param ch character to convert to uppercase
3551:    * @return uppercase mapping of ch, or ch if uppercase mapping does
3552:    *         not exist
3553:    * @see #isLowerCase(char)
3554:    * @see #isUpperCase(char)
3555:    * @see #toLowerCase(char)
3556:    * @see #toTitleCase(char)
3557:    */
3558:   public static char toUpperCase(char ch)
3559:   {
3560:     return (char) (upper[0][readCodePoint((int)ch) >>> 7] + ch);
3561:   }
3562:   
3563:   /**
3564:    * Converts a Unicode character into its uppercase equivalent mapping.
3565:    * If a mapping does not exist, then the character passed is returned.
3566:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3567:    *
3568:    * @param codePoint character to convert to uppercase
3569:    * @return uppercase mapping of ch, or ch if uppercase mapping does
3570:    *         not exist
3571:    * @see #isLowerCase(char)
3572:    * @see #isUpperCase(char)
3573:    * @see #toLowerCase(char)
3574:    * @see #toTitleCase(char)
3575:    * 
3576:    * @since 1.5
3577:    */
3578:   public static int toUpperCase(int codePoint)
3579:   {
3580:     // If the code point is unassigned or in one of the private use areas
3581:     // then we delegate the call to the appropriate private static inner class.
3582:     int plane = codePoint >>> 16;
3583:     if (plane > 2 && plane < 14)
3584:       return UnassignedCharacters.toUpperCase(codePoint);
3585:     if (plane > 14)
3586:       return PrivateUseCharacters.toUpperCase(codePoint);
3587:         
3588:     // The short value stored in upper[plane] is the signed difference between
3589:     // codePoint and its uppercase conversion.
3590:     return ((short)upper[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3591:   }
3592: 
3593:   /**
3594:    * Converts a Unicode character into its titlecase equivalent mapping.
3595:    * If a mapping does not exist, then the character passed is returned.
3596:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3597:    *
3598:    * @param ch character to convert to titlecase
3599:    * @return titlecase mapping of ch, or ch if titlecase mapping does
3600:    *         not exist
3601:    * @see #isTitleCase(char)
3602:    * @see #toLowerCase(char)
3603:    * @see #toUpperCase(char)
3604:    */
3605:   public static char toTitleCase(char ch)
3606:   {
3607:     // As title is short, it doesn't hurt to exhaustively iterate over it.
3608:     for (int i = title.length - 2; i >= 0; i -= 2)
3609:       if (title[i] == ch)
3610:         return title[i + 1];
3611:     return toUpperCase(ch);
3612:   }
3613:   
3614:   /**
3615:    * Converts a Unicode character into its titlecase equivalent mapping.
3616:    * If a mapping does not exist, then the character passed is returned.
3617:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3618:    *
3619:    * @param codePoint character to convert to titlecase
3620:    * @return titlecase mapping of ch, or ch if titlecase mapping does
3621:    *         not exist
3622:    * @see #isTitleCase(char)
3623:    * @see #toLowerCase(char)
3624:    * @see #toUpperCase(char)
3625:    * 
3626:    * @since 1.5
3627:    */
3628:   public static int toTitleCase(int codePoint)
3629:   {
3630:     // As of Unicode 4.0.0 no characters outside of plane 0 have
3631:     // titlecase mappings that are different from their uppercase
3632:     // mapping.
3633:     if (codePoint < 0x10000)
3634:       return (int) toTitleCase((char)codePoint);
3635:     return toUpperCase(codePoint);
3636:   }
3637: 
3638:   /**
3639:    * Converts a character into a digit of the specified radix. If the radix
3640:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3641:    * exceeds the radix, or if ch is not a decimal digit or in the case
3642:    * insensitive set of 'a'-'z', the result is -1.
3643:    * <br>
3644:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3645:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3646:    *
3647:    * @param ch character to convert into a digit
3648:    * @param radix radix in which ch is a digit
3649:    * @return digit which ch represents in radix, or -1 not a valid digit
3650:    * @see #MIN_RADIX
3651:    * @see #MAX_RADIX
3652:    * @see #forDigit(int, int)
3653:    * @see #isDigit(char)
3654:    * @see #getNumericValue(char)
3655:    */
3656:   public static int digit(char ch, int radix)
3657:   {
3658:     if (radix < MIN_RADIX || radix > MAX_RADIX)
3659:       return -1;
3660:     char attr = readCodePoint((int)ch);
3661:     if (((1 << (attr & TYPE_MASK))
3662:          & ((1 << UPPERCASE_LETTER)
3663:             | (1 << LOWERCASE_LETTER)
3664:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3665:       {
3666:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3667:         int digit = numValue[0][attr >> 7];
3668:         return (digit < radix) ? digit : -1;
3669:       }
3670:     return -1;
3671:   }
3672: 
3673:   /**
3674:    * Converts a character into a digit of the specified radix. If the radix
3675:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3676:    * exceeds the radix, or if ch is not a decimal digit or in the case
3677:    * insensitive set of 'a'-'z', the result is -1.
3678:    * <br>
3679:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3680:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3681:    *
3682:    * @param codePoint character to convert into a digit
3683:    * @param radix radix in which ch is a digit
3684:    * @return digit which ch represents in radix, or -1 not a valid digit
3685:    * @see #MIN_RADIX
3686:    * @see #MAX_RADIX
3687:    * @see #forDigit(int, int)
3688:    * @see #isDigit(char)
3689:    * @see #getNumericValue(char)
3690:    */
3691:   public static int digit(int codePoint, int radix)
3692:   {
3693:     if (radix < MIN_RADIX || radix > MAX_RADIX)
3694:       return -1;
3695:     
3696:     // If the code point is unassigned or in one of the private use areas
3697:     // then we delegate the call to the appropriate private static inner class.
3698:     int plane = codePoint >>> 16;
3699:     if (plane > 2 && plane < 14)
3700:       return UnassignedCharacters.digit(codePoint, radix);
3701:     if (plane > 14)
3702:       return PrivateUseCharacters.digit(codePoint, radix);
3703:     char attr = readCodePoint(codePoint);
3704:     if (((1 << (attr & TYPE_MASK))
3705:          & ((1 << UPPERCASE_LETTER)
3706:             | (1 << LOWERCASE_LETTER)
3707:             | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3708:       {
3709:         // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3710:         int digit = numValue[plane][attr >> 7];
3711:         
3712:         // If digit is less than or equal to -3 then the numerical value was 
3713:         // too large to fit into numValue and is stored in CharData.LARGENUMS.
3714:         if (digit <= -3)
3715:           digit = CharData.LARGENUMS[-digit - 3];
3716:         return (digit < radix) ? digit : -1;
3717:       }
3718:     return -1;
3719:   }
3720:   
3721:   /**
3722:    * Returns the Unicode numeric value property of a character. For example,
3723:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3724:    *
3725:    * <p>This method also returns values for the letters A through Z, (not
3726:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3727:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3728:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3729:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3730:    * <code>'\uFF5A'</code> (full width variants).
3731:    *
3732:    * <p>If the character lacks a numeric value property, -1 is returned.
3733:    * If the character has a numeric value property which is not representable
3734:    * as a nonnegative integer, such as a fraction, -2 is returned.
3735:    *
3736:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3737:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3738:    *
3739:    * @param ch character from which the numeric value property will
3740:    *        be retrieved
3741:    * @return the numeric value property of ch, or -1 if it does not exist, or
3742:    *         -2 if it is not representable as a nonnegative integer
3743:    * @see #forDigit(int, int)
3744:    * @see #digit(char, int)
3745:    * @see #isDigit(char)
3746:    * @since 1.1
3747:    */
3748:   public static int getNumericValue(char ch)
3749:   {
3750:     // Treat numValue as signed.
3751:     return (short) numValue[0][readCodePoint((int)ch) >> 7];
3752:   }
3753:   
3754:   /**
3755:    * Returns the Unicode numeric value property of a character. For example,
3756:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3757:    *
3758:    * <p>This method also returns values for the letters A through Z, (not
3759:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3760:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3761:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3762:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3763:    * <code>'\uFF5A'</code> (full width variants).
3764:    *
3765:    * <p>If the character lacks a numeric value property, -1 is returned.
3766:    * If the character has a numeric value property which is not representable
3767:    * as a nonnegative integer, such as a fraction, -2 is returned.
3768:    *
3769:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3770:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3771:    *
3772:    * @param codePoint character from which the numeric value property will
3773:    *        be retrieved
3774:    * @return the numeric value property of ch, or -1 if it does not exist, or
3775:    *         -2 if it is not representable as a nonnegative integer
3776:    * @see #forDigit(int, int)
3777:    * @see #digit(char, int)
3778:    * @see #isDigit(char)
3779:    * @since 1.5
3780:    */
3781:   public static int getNumericValue(int codePoint)
3782:   {
3783:     // If the code point is unassigned or in one of the private use areas
3784:     // then we delegate the call to the appropriate private static inner class.
3785:     int plane = codePoint >>> 16;
3786:     if (plane > 2 && plane < 14)
3787:       return UnassignedCharacters.getNumericValue(codePoint);
3788:     if (plane > 14)
3789:       return PrivateUseCharacters.getNumericValue(codePoint);
3790:     
3791:     // If the value N found in numValue[plane] is less than or equal to -3
3792:     // then the numeric value was too big to fit into 16 bits and is 
3793:     // stored in CharData.LARGENUMS at offset (-N - 3).
3794:     short num = (short)numValue[plane][readCodePoint(codePoint) >> 7];
3795:     if (num <= -3)
3796:       return CharData.LARGENUMS[-num - 3];
3797:     return num;
3798:   }
3799: 
3800:   /**
3801:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
3802:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
3803:    * <code>'\r'</code>, and <code>' '</code>.
3804:    * <br>
3805:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
3806:    *
3807:    * @param ch character to test
3808:    * @return true if ch is a space, else false
3809:    * @deprecated Replaced by {@link #isWhitespace(char)}
3810:    * @see #isSpaceChar(char)
3811:    * @see #isWhitespace(char)
3812:    */
3813:   public static boolean isSpace(char ch)
3814:   {
3815:     // Performing the subtraction up front alleviates need to compare longs.
3816:     return ch-- <= ' ' && ((1 << ch)
3817:                            & ((1 << (' ' - 1))
3818:                               | (1 << ('\t' - 1))
3819:                               | (1 << ('\n' - 1))
3820:                               | (1 << ('\r' - 1))
3821:                               | (1 << ('\f' - 1)))) != 0;
3822:   }
3823: 
3824:   /**
3825:    * Determines if a character is a Unicode space character. This includes
3826:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3827:    * <br>
3828:    * Unicode space = [Zs]|[Zp]|[Zl]
3829:    *
3830:    * @param ch character to test
3831:    * @return true if ch is a Unicode space, else false
3832:    * @see #isWhitespace(char)
3833:    * @since 1.1
3834:    */
3835:   public static boolean isSpaceChar(char ch)
3836:   {
3837:     return isSpaceChar((int)ch);
3838:   }
3839:   
3840:   /**
3841:    * Determines if a character is a Unicode space character. This includes
3842:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3843:    * <br>
3844:    * Unicode space = [Zs]|[Zp]|[Zl]
3845:    *
3846:    * @param codePoint character to test
3847:    * @return true if ch is a Unicode space, else false
3848:    * @see #isWhitespace(char)
3849:    * @since 1.5
3850:    */
3851:   public static boolean isSpaceChar(int codePoint)
3852:   {
3853:     return ((1 << getType(codePoint))
3854:             & ((1 << SPACE_SEPARATOR)
3855:                | (1 << LINE_SEPARATOR)
3856:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
3857:   }
3858: 
3859:   /**
3860:    * Determines if a character is Java whitespace. This includes Unicode
3861:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3862:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3863:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3864:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3865:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3866:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3867:    * and <code>'\u001F'</code>.
3868:    * <br>
3869:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3870:    *
3871:    * @param ch character to test
3872:    * @return true if ch is Java whitespace, else false
3873:    * @see #isSpaceChar(char)
3874:    * @since 1.1
3875:    */
3876:   public static boolean isWhitespace(char ch)
3877:   {
3878:     return isWhitespace((int) ch);
3879:   }
3880:   
3881:   /**
3882:    * Determines if a character is Java whitespace. This includes Unicode
3883:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3884:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3885:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3886:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3887:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3888:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3889:    * and <code>'\u001F'</code>.
3890:    * <br>
3891:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3892:    *
3893:    * @param codePoint character to test
3894:    * @return true if ch is Java whitespace, else false
3895:    * @see #isSpaceChar(char)
3896:    * @since 1.5
3897:    */
3898:   public static boolean isWhitespace(int codePoint)
3899:   {
3900:     int plane = codePoint >>> 16;
3901:     if (plane > 2 && plane < 14)
3902:       return UnassignedCharacters.isWhiteSpace(codePoint);
3903:     if (plane > 14)
3904:       return PrivateUseCharacters.isWhiteSpace(codePoint);
3905:     
3906:     int attr = readCodePoint(codePoint);
3907:     return ((((1 << (attr & TYPE_MASK))
3908:               & ((1 << SPACE_SEPARATOR)
3909:                  | (1 << LINE_SEPARATOR)
3910:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3911:             && (attr & NO_BREAK_MASK) == 0)
3912:       || (codePoint <= '\u001F' && ((1 << codePoint)
3913:                              & ((1 << '\t')
3914:                                 | (1 << '\n')
3915:                                 | (1 << '\u000B')
3916:                                 | (1 << '\u000C')
3917:                                 | (1 << '\r')
3918:                                 | (1 << '\u001C')
3919:                                 | (1 << '\u001D')
3920:                                 | (1 << '\u001E')
3921:                                 | (1 << '\u001F'))) != 0);
3922:   }
3923: 
3924:   /**
3925:    * Determines if a character has the ISO Control property.
3926:    * <br>
3927:    * ISO Control = [Cc]
3928:    *
3929:    * @param ch character to test
3930:    * @return true if ch is an ISO Control character, else false
3931:    * @see #isSpaceChar(char)
3932:    * @see #isWhitespace(char)
3933:    * @since 1.1
3934:    */
3935:   public static boolean isISOControl(char ch)
3936:   {
3937:     return isISOControl((int)ch);
3938:   }
3939:   
3940:   /**
3941:    * Determines if the character is an ISO Control character.  This is true
3942:    * if the code point is in the range [0, 0x001F] or if it is in the range
3943:    * [0x007F, 0x009F].
3944:    * @param codePoint the character to check
3945:    * @return true if the character is in one of the above ranges
3946:    * 
3947:    * @since 1.5
3948:    */
3949:   public static boolean isISOControl(int codePoint)
3950:   {
3951:     if ((codePoint >= 0 && codePoint <= 0x001F)
3952:         || (codePoint >= 0x007F && codePoint <= 0x009F))
3953:       return true;
3954:     return false;      
3955:   }
3956: 
3957:   /**
3958:    * Returns the Unicode general category property of a character.
3959:    *
3960:    * @param ch character from which the general category property will
3961:    *        be retrieved
3962:    * @return the character category property of ch as an integer
3963:    * @see #UNASSIGNED
3964:    * @see #UPPERCASE_LETTER
3965:    * @see #LOWERCASE_LETTER
3966:    * @see #TITLECASE_LETTER
3967:    * @see #MODIFIER_LETTER
3968:    * @see #OTHER_LETTER
3969:    * @see #NON_SPACING_MARK
3970:    * @see #ENCLOSING_MARK
3971:    * @see #COMBINING_SPACING_MARK
3972:    * @see #DECIMAL_DIGIT_NUMBER
3973:    * @see #LETTER_NUMBER
3974:    * @see #OTHER_NUMBER
3975:    * @see #SPACE_SEPARATOR
3976:    * @see #LINE_SEPARATOR
3977:    * @see #PARAGRAPH_SEPARATOR
3978:    * @see #CONTROL
3979:    * @see #FORMAT
3980:    * @see #PRIVATE_USE
3981:    * @see #SURROGATE
3982:    * @see #DASH_PUNCTUATION
3983:    * @see #START_PUNCTUATION
3984:    * @see #END_PUNCTUATION
3985:    * @see #CONNECTOR_PUNCTUATION
3986:    * @see #OTHER_PUNCTUATION
3987:    * @see #MATH_SYMBOL
3988:    * @see #CURRENCY_SYMBOL
3989:    * @see #MODIFIER_SYMBOL
3990:    * @see #INITIAL_QUOTE_PUNCTUATION
3991:    * @see #FINAL_QUOTE_PUNCTUATION
3992:    * @since 1.1
3993:    */
3994:   public static int getType(char ch)
3995:   {
3996:     return getType((int)ch);
3997:   }
3998:   
3999:   /**
4000:    * Returns the Unicode general category property of a character.
4001:    *
4002:    * @param codePoint character from which the general category property will
4003:    *        be retrieved
4004:    * @return the character category property of ch as an integer
4005:    * @see #UNASSIGNED
4006:    * @see #UPPERCASE_LETTER
4007:    * @see #LOWERCASE_LETTER
4008:    * @see #TITLECASE_LETTER
4009:    * @see #MODIFIER_LETTER
4010:    * @see #OTHER_LETTER
4011:    * @see #NON_SPACING_MARK
4012:    * @see #ENCLOSING_MARK
4013:    * @see #COMBINING_SPACING_MARK
4014:    * @see #DECIMAL_DIGIT_NUMBER
4015:    * @see #LETTER_NUMBER
4016:    * @see #OTHER_NUMBER
4017:    * @see #SPACE_SEPARATOR
4018:    * @see #LINE_SEPARATOR
4019:    * @see #PARAGRAPH_SEPARATOR
4020:    * @see #CONTROL
4021:    * @see #FORMAT
4022:    * @see #PRIVATE_USE
4023:    * @see #SURROGATE
4024:    * @see #DASH_PUNCTUATION
4025:    * @see #START_PUNCTUATION
4026:    * @see #END_PUNCTUATION
4027:    * @see #CONNECTOR_PUNCTUATION
4028:    * @see #OTHER_PUNCTUATION
4029:    * @see #MATH_SYMBOL
4030:    * @see #CURRENCY_SYMBOL
4031:    * @see #MODIFIER_SYMBOL
4032:    * @see #INITIAL_QUOTE_PUNCTUATION
4033:    * @see #FINAL_QUOTE_PUNCTUATION
4034:    * 
4035:    * @since 1.5
4036:    */
4037:   public static int getType(int codePoint)
4038:   {
4039:     // If the codePoint is unassigned or in one of the private use areas
4040:     // then we delegate the call to the appropriate private static inner class.
4041:     int plane = codePoint >>> 16;
4042:     if (plane > 2 && plane < 14)
4043:       return UnassignedCharacters.getType(codePoint);
4044:     if (plane > 14)
4045:       return PrivateUseCharacters.getType(codePoint);
4046:     
4047:     return readCodePoint(codePoint) & TYPE_MASK;
4048:   }
4049: 
4050:   /**
4051:    * Converts a digit into a character which represents that digit
4052:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
4053:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
4054:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
4055:    * <br>
4056:    * return value boundary = U+0030-U+0039|U+0061-U+007A
4057:    *
4058:    * @param digit digit to be converted into a character
4059:    * @param radix radix of digit
4060:    * @return character representing digit in radix, or '\0'
4061:    * @see #MIN_RADIX
4062:    * @see #MAX_RADIX
4063:    * @see #digit(char, int)
4064:    */
4065:   public static char forDigit(int digit, int radix)
4066:   {
4067:     if (radix < MIN_RADIX || radix > MAX_RADIX
4068:         || digit < 0 || digit >= radix)
4069:       return '\0';
4070:     return Number.digits[digit];
4071:   }
4072: 
4073:   /**
4074:    * Returns the Unicode directionality property of the character. This
4075:    * is used in the visual ordering of text.
4076:    *
4077:    * @param ch the character to look up
4078:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4079:    * @see #DIRECTIONALITY_UNDEFINED
4080:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4081:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4082:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4083:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4084:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4085:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4086:    * @see #DIRECTIONALITY_ARABIC_NUMBER
4087:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4088:    * @see #DIRECTIONALITY_NONSPACING_MARK
4089:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4090:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4091:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4092:    * @see #DIRECTIONALITY_WHITESPACE
4093:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
4094:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4095:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4096:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4097:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4098:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4099:    * @since 1.4
4100:    */
4101:   public static byte getDirectionality(char ch)
4102:   {
4103:     // The result will correctly be signed.
4104:     return getDirectionality((int)ch);
4105:   }
4106:   
4107:   /**
4108:    * Returns the Unicode directionality property of the character. This
4109:    * is used in the visual ordering of text.
4110:    *
4111:    * @param codePoint the character to look up
4112:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4113:    * @see #DIRECTIONALITY_UNDEFINED
4114:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4115:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4116:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4117:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4118:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4119:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4120:    * @see #DIRECTIONALITY_ARABIC_NUMBER
4121:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4122:    * @see #DIRECTIONALITY_NONSPACING_MARK
4123:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4124:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4125:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4126:    * @see #DIRECTIONALITY_WHITESPACE
4127:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
4128:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4129:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4130:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4131:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4132:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4133:    * @since 1.5
4134:    */
4135:   public static byte getDirectionality(int codePoint)
4136:   {
4137:     // If the code point is unassigned or in one of the private use areas
4138:     // then we delegate the call to the appropriate private static inner class.
4139:     int plane = codePoint >>> 16;
4140:     if (plane > 2 && plane < 14)
4141:       return UnassignedCharacters.getDirectionality(codePoint);
4142:     if (plane > 14)
4143:       return PrivateUseCharacters.getDirectionality(codePoint);
4144:     
4145:     // The result will correctly be signed.
4146:     return (byte) (direction[plane][readCodePoint(codePoint) >> 7] >> 2);
4147:   }
4148:   
4149:   /**
4150:    * Determines whether the character is mirrored according to Unicode. For
4151:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4152:    * left-to-right text, but ')' in right-to-left text.
4153:    *
4154:    * @param ch the character to look up
4155:    * @return true if the character is mirrored
4156:    * @since 1.4
4157:    */
4158:   public static boolean isMirrored(char ch)
4159:   {
4160:     return (readCodePoint((int)ch) & MIRROR_MASK) != 0;
4161:   }
4162:   
4163:   /**
4164:    * Determines whether the character is mirrored according to Unicode. For
4165:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4166:    * left-to-right text, but ')' in right-to-left text.
4167:    *
4168:    * @param codePoint the character to look up
4169:    * @return true if the character is mirrored
4170:    * @since 1.5
4171:    */
4172:   public static boolean isMirrored(int codePoint)
4173:   {
4174:     // If the code point is unassigned or part of one of the private use areas
4175:     // then we delegate the call to the appropriate private static inner class.
4176:     int plane = codePoint >>> 16;
4177:     if (plane > 2 && plane < 14)
4178:       return UnassignedCharacters.isMirrored(codePoint);
4179:     if (plane > 14)
4180:       return PrivateUseCharacters.isMirrored(codePoint);
4181:     
4182:     return (readCodePoint(codePoint) & MIRROR_MASK) != 0;
4183:   }
4184: 
4185:   /**
4186:    * Compares another Character to this Character, numerically.
4187:    *
4188:    * @param anotherCharacter Character to compare with this Character
4189:    * @return a negative integer if this Character is less than
4190:    *         anotherCharacter, zero if this Character is equal, and
4191:    *         a positive integer if this Character is greater
4192:    * @throws NullPointerException if anotherCharacter is null
4193:    * @since 1.2
4194:    */
4195:   public int compareTo(Character anotherCharacter)
4196:   {
4197:     return value - anotherCharacter.value;
4198:   }
4199: 
4200:   /**
4201:    * Compares an object to this Character.  Assuming the object is a
4202:    * Character object, this method performs the same comparison as
4203:    * compareTo(Character).
4204:    *
4205:    * @param o object to compare
4206:    * @return the comparison value
4207:    * @throws ClassCastException if o is not a Character object
4208:    * @throws NullPointerException if o is null
4209:    * @see #compareTo(Character)
4210:    * @since 1.2
4211:    */
4212:   public int compareTo(Object o)
4213:   {
4214:     return compareTo((Character) o);
4215:   }
4216: 
4217:   /**
4218:    * Returns an <code>Character</code> object wrapping the value.
4219:    * In contrast to the <code>Character</code> constructor, this method
4220:    * will cache some values.  It is used by boxing conversion.
4221:    *
4222:    * @param val the value to wrap
4223:    * @return the <code>Character</code>
4224:    * 
4225:    * @since 1.5
4226:    */
4227:   public static Character valueOf(char val)
4228:   {
4229:     if (val > MAX_CACHE)
4230:       return new Character(val);
4231:     synchronized (charCache)
4232:       {
4233:     if (charCache[val - MIN_VALUE] == null)
4234:       charCache[val - MIN_VALUE] = new Character(val);
4235:     return charCache[val - MIN_VALUE];
4236:       }
4237:   }
4238: 
4239:   /**
4240:    * Reverse the bytes in val.
4241:    * @since 1.5
4242:    */
4243:   public static char reverseBytes(char val)
4244:   {
4245:     return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
4246:   }
4247: 
4248:   /**
4249:    * Converts a unicode code point to a UTF-16 representation of that
4250:    * code point.
4251:    * 
4252:    * @param codePoint the unicode code point
4253:    *
4254:    * @return the UTF-16 representation of that code point
4255:    *
4256:    * @throws IllegalArgumentException if the code point is not a valid
4257:    *         unicode code point
4258:    *
4259:    * @since 1.5
4260:    */
4261:   public static char[] toChars(int codePoint)
4262:   {
4263:     if (!isValidCodePoint(codePoint))
4264:       throw new IllegalArgumentException("Illegal Unicode code point : "
4265:                                          + codePoint);
4266:     char[] result = new char[charCount(codePoint)];
4267:     int ignore = toChars(codePoint, result, 0);
4268:     return result;
4269:   }
4270: 
4271:   /**
4272:    * Converts a unicode code point to its UTF-16 representation.
4273:    *
4274:    * @param codePoint the unicode code point
4275:    * @param dst the target char array
4276:    * @param dstIndex the start index for the target
4277:    *
4278:    * @return number of characters written to <code>dst</code>
4279:    *
4280:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
4281:    *         valid unicode code point
4282:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
4283:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
4284:    *         in <code>dst</code> or if the UTF-16 representation does not
4285:    *         fit into <code>dst</code>
4286:    *
4287:    * @since 1.5
4288:    */
4289:   public static int toChars(int codePoint, char[] dst, int dstIndex)
4290:   {
4291:     if (!isValidCodePoint(codePoint))
4292:       {
4293:         throw new IllegalArgumentException("not a valid code point: "
4294:                                            + codePoint);
4295:       }
4296: 
4297:     int result;
4298:     if (isSupplementaryCodePoint(codePoint))
4299:       {
4300:         // Write second char first to cause IndexOutOfBoundsException
4301:         // immediately.
4302:         final int cp2 = codePoint - 0x10000;
4303:         dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
4304:         dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
4305:         result = 2;
4306:       }
4307:     else
4308:       {
4309:         dst[dstIndex] = (char) codePoint;
4310:         result = 1; 
4311:       }
4312:     return result;
4313:   }
4314: 
4315:   /**
4316:    * Return number of 16-bit characters required to represent the given
4317:    * code point.
4318:    *
4319:    * @param codePoint a unicode code point
4320:    *
4321:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
4322:    *
4323:    * @since 1.5
4324:    */
4325:   public static int charCount(int codePoint)
4326:   {
4327:     return 
4328:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
4329:       ? 2 
4330:       : 1;
4331:   }
4332: 
4333:   /**
4334:    * Determines whether the specified code point is
4335:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
4336:    * supplementary character range.
4337:    *
4338:    * @param codePoint a Unicode code point
4339:    *
4340:    * @return <code>true</code> if code point is in supplementary range
4341:    *
4342:    * @since 1.5
4343:    */
4344:   public static boolean isSupplementaryCodePoint(int codePoint)
4345:   {
4346:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4347:       && codePoint <= MAX_CODE_POINT;
4348:   }
4349: 
4350:   /**
4351:    * Determines whether the specified code point is
4352:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
4353:    *
4354:    * @param codePoint a Unicode code point
4355:    *
4356:    * @return <code>true</code> if code point is valid
4357:    *
4358:    * @since 1.5
4359:    */
4360:   public static boolean isValidCodePoint(int codePoint)
4361:   {
4362:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
4363:   }
4364: 
4365:   /**
4366:    * Return true if the given character is a high surrogate.
4367:    * @param ch the character
4368:    * @return true if the character is a high surrogate character
4369:    *
4370:    * @since 1.5
4371:    */
4372:   public static boolean isHighSurrogate(char ch)
4373:   {
4374:     return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
4375:   }
4376: 
4377:   /**
4378:    * Return true if the given character is a low surrogate.
4379:    * @param ch the character
4380:    * @return true if the character is a low surrogate character
4381:    *
4382:    * @since 1.5
4383:    */
4384:   public static boolean isLowSurrogate(char ch)
4385:   {
4386:     return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
4387:   }
4388: 
4389:   /**
4390:    * Return true if the given characters compose a surrogate pair.
4391:    * This is true if the first character is a high surrogate and the
4392:    * second character is a low surrogate.
4393:    * @param ch1 the first character
4394:    * @param ch2 the first character
4395:    * @return true if the characters compose a surrogate pair
4396:    *
4397:    * @since 1.5
4398:    */
4399:   public static boolean isSurrogatePair(char ch1, char ch2)
4400:   {
4401:     return isHighSurrogate(ch1) && isLowSurrogate(ch2);
4402:   }
4403: 
4404:   /**
4405:    * Given a valid surrogate pair, this returns the corresponding
4406:    * code point.
4407:    * @param high the high character of the pair
4408:    * @param low the low character of the pair
4409:    * @return the corresponding code point
4410:    *
4411:    * @since 1.5
4412:    */
4413:   public static int toCodePoint(char high, char low)
4414:   {
4415:     return ((high - MIN_HIGH_SURROGATE) * 0x400) +
4416:       (low - MIN_LOW_SURROGATE) + 0x10000;
4417:   }
4418: 
4419:   /**
4420:    * Get the code point at the specified index in the CharSequence.
4421:    * This is like CharSequence#charAt(int), but if the character is
4422:    * the start of a surrogate pair, and there is a following
4423:    * character, and this character completes the pair, then the
4424:    * corresponding supplementary code point is returned.  Otherwise,
4425:    * the character at the index is returned.
4426:    *
4427:    * @param sequence the CharSequence
4428:    * @param index the index of the codepoint to get, starting at 0
4429:    * @return the codepoint at the specified index
4430:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4431:    * @since 1.5
4432:    */
4433:   public static int codePointAt(CharSequence sequence, int index)
4434:   {
4435:     int len = sequence.length();
4436:     if (index < 0 || index >= len)
4437:       throw new IndexOutOfBoundsException();
4438:     char high = sequence.charAt(index);
4439:     if (! isHighSurrogate(high) || ++index >= len)
4440:       return high;
4441:     char low = sequence.charAt(index);
4442:     if (! isLowSurrogate(low))
4443:       return high;
4444:     return toCodePoint(high, low);
4445:   }
4446: 
4447:   /**
4448:    * Get the code point at the specified index in the CharSequence.
4449:    * If the character is the start of a surrogate pair, and there is a
4450:    * following character, and this character completes the pair, then
4451:    * the corresponding supplementary code point is returned.
4452:    * Otherwise, the character at the index is returned.
4453:    *
4454:    * @param chars the character array in which to look
4455:    * @param index the index of the codepoint to get, starting at 0
4456:    * @return the codepoint at the specified index
4457:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4458:    * @since 1.5
4459:    */
4460:   public static int codePointAt(char[] chars, int index)
4461:   {
4462:     return codePointAt(chars, index, chars.length);
4463:   }
4464: 
4465:   /**
4466:    * Get the code point at the specified index in the CharSequence.
4467:    * If the character is the start of a surrogate pair, and there is a
4468:    * following character within the specified range, and this
4469:    * character completes the pair, then the corresponding
4470:    * supplementary code point is returned.  Otherwise, the character
4471:    * at the index is returned.
4472:    *
4473:    * @param chars the character array in which to look
4474:    * @param index the index of the codepoint to get, starting at 0
4475:    * @param limit the limit past which characters should not be examined
4476:    * @return the codepoint at the specified index
4477:    * @throws IndexOutOfBoundsException if index is negative or &gt;=
4478:    * limit, or if limit is negative or &gt;= the length of the array
4479:    * @since 1.5
4480:    */
4481:   public static int codePointAt(char[] chars, int index, int limit)
4482:   {
4483:     if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
4484:       throw new IndexOutOfBoundsException();
4485:     char high = chars[index];
4486:     if (! isHighSurrogate(high) || ++index >= limit)
4487:       return high;
4488:     char low = chars[index];
4489:     if (! isLowSurrogate(low))
4490:       return high;
4491:     return toCodePoint(high, low);
4492:   }
4493: 
4494:   /**
4495:    * Get the code point before the specified index.  This is like
4496:    * #codePointAt(char[], int), but checks the characters at
4497:    * <code>index-1</code> and <code>index-2</code> to see if they form
4498:    * a supplementary code point.  If they do not, the character at
4499:    * <code>index-1</code> is returned.
4500:    *
4501:    * @param chars the character array
4502:    * @param index the index just past the codepoint to get, starting at 0
4503:    * @return the codepoint at the specified index
4504:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4505:    * @since 1.5
4506:    */
4507:   public static int codePointBefore(char[] chars, int index)
4508:   {
4509:     return codePointBefore(chars, index, 1);
4510:   }
4511: 
4512:   /**
4513:    * Get the code point before the specified index.  This is like
4514:    * #codePointAt(char[], int), but checks the characters at
4515:    * <code>index-1</code> and <code>index-2</code> to see if they form
4516:    * a supplementary code point.  If they do not, the character at
4517:    * <code>index-1</code> is returned.  The start parameter is used to
4518:    * limit the range of the array which may be examined.
4519:    *
4520:    * @param chars the character array
4521:    * @param index the index just past the codepoint to get, starting at 0
4522:    * @param start the index before which characters should not be examined
4523:    * @return the codepoint at the specified index
4524:    * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
4525:    * the length of the array, or if limit is negative or &gt;= the
4526:    * length of the array
4527:    * @since 1.5
4528:    */
4529:   public static int codePointBefore(char[] chars, int index, int start)
4530:   {
4531:     if (index < start || index > chars.length
4532:     || start < 0 || start >= chars.length)
4533:       throw new IndexOutOfBoundsException();
4534:     --index;
4535:     char low = chars[index];
4536:     if (! isLowSurrogate(low) || --index < start)
4537:       return low;
4538:     char high = chars[index];
4539:     if (! isHighSurrogate(high))
4540:       return low;
4541:     return toCodePoint(high, low);
4542:   }
4543: 
4544:   /**
4545:    * Get the code point before the specified index.  This is like
4546:    * #codePointAt(CharSequence, int), but checks the characters at
4547:    * <code>index-1</code> and <code>index-2</code> to see if they form
4548:    * a supplementary code point.  If they do not, the character at
4549:    * <code>index-1</code> is returned.
4550:    *
4551:    * @param sequence the CharSequence
4552:    * @param index the index just past the codepoint to get, starting at 0
4553:    * @return the codepoint at the specified index
4554:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4555:    * @since 1.5
4556:    */
4557:   public static int codePointBefore(CharSequence sequence, int index)
4558:   {
4559:     int len = sequence.length();
4560:     if (index < 1 || index > len)
4561:       throw new IndexOutOfBoundsException();
4562:     --index;
4563:     char low = sequence.charAt(index);
4564:     if (! isLowSurrogate(low) || --index < 0)
4565:       return low;
4566:     char high = sequence.charAt(index);
4567:     if (! isHighSurrogate(high))
4568:       return low;
4569:     return toCodePoint(high, low);
4570:   }
4571: } // class Character