Source for java.text.Bidi

   1: /* Bidi.java -- Bidirectional Algorithm implementation
   2:    Copyright (C) 2005, 2006  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import java.awt.font.NumericShaper;
  42: import java.awt.font.TextAttribute;
  43: import java.util.ArrayList;
  44: 
  45: 
  46: /**
  47:  * Bidirectional Algorithm implementation.
  48:  *
  49:  * The full algorithm is
  50:  * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
  51:  * Annex #9: The Bidirectional Algorithm</a>.
  52:  *
  53:  * @since 1.4
  54:  */
  55: public final class Bidi
  56: {
  57:   /**
  58:    * This indicates that a strongly directional character in the text should
  59:    * set the initial direction, but if no such character is found, then the
  60:    * initial direction will be left-to-right.
  61:    */
  62:   public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
  63: 
  64:   /**
  65:    * This indicates that a strongly directional character in the text should
  66:    * set the initial direction, but if no such character is found, then the
  67:    * initial direction will be right-to-left.
  68:    */
  69:   public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
  70: 
  71:   /**
  72:    * This indicates that the initial direction should be left-to-right.
  73:    */
  74:   public static final int DIRECTION_LEFT_TO_RIGHT = 0;
  75: 
  76:   /**
  77:    * This indicates that the initial direction should be right-to-left.
  78:    */
  79:   public static final int DIRECTION_RIGHT_TO_LEFT = 1;
  80: 
  81:   // Flags used when computing the result.
  82:   private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
  83:   private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
  84: 
  85:   // The text we are examining, and the starting offset.
  86:   // If we had a better way to handle createLineBidi, we wouldn't
  87:   // need this at all -- which for the String case would be an
  88:   // efficiency win.
  89:   private char[] text;
  90:   private int textOffset;
  91:   // The embeddings corresponding to the text, and the starting offset.
  92:   private byte[] embeddings;
  93:   private int embeddingOffset;
  94:   // The length of the text (and embeddings) to use.
  95:   private int length;
  96:   // The flags.
  97:   private int flags;
  98: 
  99:   // All instance fields following this point are initialized
 100:   // during analysis.  Fields before this must be set by the constructor.
 101: 
 102:   // The initial embedding level.
 103:   private int baseEmbedding;
 104:   // The type of each character in the text.
 105:   private byte[] types;
 106:   // The levels we compute.
 107:   private byte[] levels;
 108: 
 109:   // A list of indices where a formatting code was found.  These
 110:   // are indicies into the original text -- not into the text after
 111:   // the codes have been removed.
 112:   private ArrayList formatterIndices;
 113: 
 114:   // Indices of the starts of runs in the text. 
 115:   private int[] runs;
 116: 
 117:   // A convenience field where we keep track of what kinds of runs
 118:   // we've seen.
 119:   private int resultFlags;
 120: 
 121:   /**
 122:    * Create a new Bidi object given an attributed character iterator.
 123:    * This constructor will examine various attributes of the text:
 124:    * <ul>
 125:    * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
 126:    * paragraph's base embedding level.  This constructor will recognize
 127:    * either {@link TextAttribute#RUN_DIRECTION_LTR} or
 128:    * {@link TextAttribute#RUN_DIRECTION_RTL}.  If neither is given,
 129:    * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
 130:    * </li>
 131:    * 
 132:    * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
 133:    * shaping will be done before the Bidi algorithm is run.
 134:    * </li>
 135:    * 
 136:    * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
 137:    * character, then the value of this attribute will be used as an
 138:    * embedding level override.
 139:    * </li>
 140:    * </ul>
 141:    * @param iter the attributed character iterator to use
 142:    */
 143:   public Bidi(AttributedCharacterIterator iter)
 144:   {
 145:     // If set, this attribute should be set on all characters.
 146:     // We don't check this (should we?) but we do assume that we
 147:     // can simply examine the first character.
 148:     Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
 149:     if (val == TextAttribute.RUN_DIRECTION_LTR)
 150:       this.flags = DIRECTION_LEFT_TO_RIGHT;
 151:     else if (val == TextAttribute.RUN_DIRECTION_RTL)
 152:       this.flags = DIRECTION_RIGHT_TO_LEFT;
 153:     else
 154:       this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
 155: 
 156:     // Likewise this attribute should be specified on the whole text.
 157:     // We read it here and then, if it is set, we apply the numeric shaper
 158:     // to the text before processing it.
 159:     NumericShaper shaper = null;
 160:     val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
 161:     if (val instanceof NumericShaper)
 162:       shaper = (NumericShaper) val;
 163: 
 164:     char[] text = new char[iter.getEndIndex() - iter.getBeginIndex()];
 165:     this.embeddings = new byte[this.text.length];
 166:     this.embeddingOffset = 0;
 167:     this.length = text.length;
 168:     for (int i = 0; i < this.text.length; ++i)
 169:       {
 170:         this.text[i] = iter.current();
 171: 
 172:         val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
 173:         if (val instanceof Integer)
 174:           {
 175:             int ival = ((Integer) val).intValue();
 176:             byte bval;
 177:             if (ival < -62 || ival > 62)
 178:               bval = 0;
 179:             else
 180:               bval = (byte) ival;
 181:             this.embeddings[i] = bval;
 182:           }
 183:       }
 184: 
 185:     // Invoke the numeric shaper, if specified.
 186:     if (shaper != null)
 187:       shaper.shape(this.text, 0, this.length);
 188: 
 189:     runBidi();
 190:   }
 191: 
 192:   /**
 193:    * Create a new Bidi object with the indicated text and, possibly, explicit
 194:    * embedding settings.
 195:    * 
 196:    * If the embeddings array is null, it is ignored.  Otherwise it is taken to
 197:    * be explicit embedding settings corresponding to the text.  Positive values
 198:    * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
 199:    * embedding overrides.  (FIXME: not at all clear what this really means.)
 200:    * 
 201:    * @param text the text to use
 202:    * @param offset the offset of the first character of the text
 203:    * @param embeddings the explicit embeddings, or null if there are none
 204:    * @param embedOffset the offset of the first embedding value to use
 205:    * @param length the length of both the text and the embeddings
 206:    * @param flags a flag indicating the base embedding direction
 207:    */
 208:   public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
 209:               int length, int flags)
 210:   {
 211:     if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
 212:         && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
 213:         && flags != DIRECTION_LEFT_TO_RIGHT
 214:         && flags != DIRECTION_RIGHT_TO_LEFT)
 215:       throw new IllegalArgumentException("unrecognized 'flags' argument: "
 216:                                          + flags);
 217:     this.text = text;
 218:     this.textOffset = offset;
 219:     this.embeddings = embeddings;
 220:     this.embeddingOffset = embedOffset;
 221:     this.length = length;
 222:     this.flags = flags;
 223: 
 224:     runBidi();
 225:   }
 226: 
 227:   /**
 228:    * Create a new Bidi object using the contents of the given String
 229:    * as the text.
 230:    * @param text the text to use
 231:    * @param flags a flag indicating the base embedding direction
 232:    */
 233:   public Bidi(String text, int flags)
 234:   {
 235:     if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
 236:         && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
 237:         && flags != DIRECTION_LEFT_TO_RIGHT
 238:         && flags != DIRECTION_RIGHT_TO_LEFT)
 239:       throw new IllegalArgumentException("unrecognized 'flags' argument: "
 240:                                          + flags);
 241: 
 242:     // This is inefficient, but it isn't clear whether it matters.
 243:     // If it does we can change our implementation a bit to allow either
 244:     // a String or a char[].
 245:     this.text = text.toCharArray();
 246:     this.textOffset = 0;
 247:     this.embeddings = null;
 248:     this.embeddingOffset = 0;
 249:     this.length = text.length();
 250:     this.flags = flags;
 251: 
 252:     runBidi();
 253:   }
 254: 
 255:   /**
 256:    * Implementation function which computes the initial type of
 257:    * each character in the input.
 258:    */
 259:   private void computeTypes()
 260:   {
 261:     types = new byte[length];
 262:     for (int i = 0; i < length; ++i)
 263:       types[i] = Character.getDirectionality(text[textOffset + i]);
 264:   }
 265: 
 266:   /**
 267:    * An internal function which implements rules P2 and P3.
 268:    * This computes the base embedding level.
 269:    * @return the paragraph's base embedding level
 270:    */
 271:   private int computeParagraphEmbeddingLevel()
 272:   {
 273:     // First check to see if the user supplied a directionality override.
 274:     if (flags == DIRECTION_LEFT_TO_RIGHT
 275:         || flags == DIRECTION_RIGHT_TO_LEFT)
 276:       return flags;
 277: 
 278:     // This implements rules P2 and P3.
 279:     // (Note that we don't need P1, as the user supplies
 280:     // a paragraph.)
 281:     for (int i = 0; i < length; ++i)
 282:       {
 283:         int dir = types[i];
 284:         if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
 285:           return DIRECTION_LEFT_TO_RIGHT;
 286:         if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
 287:             || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
 288:           return DIRECTION_RIGHT_TO_LEFT;
 289:       }
 290:     return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
 291:             ? DIRECTION_LEFT_TO_RIGHT
 292:             : DIRECTION_RIGHT_TO_LEFT);
 293:   }
 294: 
 295:   /**
 296:    * An internal function which implements rules X1 through X9.
 297:    * This computes the initial levels for the text, handling
 298:    * explicit overrides and embeddings.
 299:    */
 300:   private void computeExplicitLevels()
 301:   {
 302:     levels = new byte[length];
 303:     byte currentEmbedding = (byte) baseEmbedding;
 304:     // The directional override is a Character directionality
 305:     // constant.  -1 means there is no override.
 306:     byte directionalOverride = -1;
 307:     // The stack of pushed embeddings, and the stack pointer.
 308:     // Note that because the direction is inherent in the depth,
 309:     // and because we have a bit left over in a byte, we can encode
 310:     // the override, if any, directly in this value on the stack.
 311:     final int MAX_DEPTH = 62;
 312:     byte[] embeddingStack = new byte[MAX_DEPTH];
 313:     int sp = 0;
 314: 
 315:     for (int i = 0; i < length; ++i)
 316:       {
 317:         // If we see an explicit embedding, we use that, even if
 318:         // the current character is itself a directional override.
 319:         if (embeddings != null && embeddings[embeddingOffset + i] != 0)
 320:           {
 321:             // It isn't at all clear what we're supposed to do here.
 322:             // What does a negative value really mean?
 323:             // Should we push on the embedding stack here?
 324:             currentEmbedding = embeddings[embeddingOffset + i]; 
 325:             if (currentEmbedding < 0)
 326:               {
 327:                 currentEmbedding = (byte) -currentEmbedding;
 328:                 directionalOverride
 329:                   = (((currentEmbedding % 2) == 0)
 330:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 331:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 332:               }
 333:             else
 334:               directionalOverride = -1;
 335:             continue;
 336:           }
 337:         // No explicit embedding.
 338:         boolean isLtoR = false;
 339:         boolean isSpecial = true;
 340:         switch (types[i])
 341:           {
 342:             case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
 343:             case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
 344:               isLtoR = true;
 345:               // Fall through.
 346:             case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
 347:             case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
 348:               {
 349:                 byte newEmbedding;
 350:                 if (isLtoR)
 351:                   {
 352:                     // Least greater even.
 353:                     newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
 354:                   }
 355:                 else
 356:                   {
 357:                     // Least greater odd.
 358:                     newEmbedding = (byte) ((currentEmbedding + 1) | 1);
 359:                   }
 360:                 // FIXME: we don't properly handle invalid pushes.
 361:                 if (newEmbedding < MAX_DEPTH)
 362:                   {
 363:                     // The new level is valid.  Push the old value.
 364:                     // See above for a comment on the encoding here.
 365:                     if (directionalOverride != -1)
 366:                       currentEmbedding |= Byte.MIN_VALUE;
 367:                     embeddingStack[sp++] = currentEmbedding;
 368:                     currentEmbedding = newEmbedding;
 369:                     if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
 370:                       directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
 371:                     else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
 372:                       directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
 373:                     else
 374:                       directionalOverride = -1;
 375:                   }
 376:                 }
 377:               break;
 378:             case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
 379:               {
 380:                 // FIXME: we don't properly handle a pop with a corresponding
 381:                 // invalid push.
 382:                 if (sp == 0)
 383:                   {
 384:                     // We saw a pop without a push.  Just ignore it.
 385:                     break;
 386:                   }
 387:                 byte newEmbedding = embeddingStack[--sp];
 388:                 currentEmbedding = (byte) (newEmbedding & 0x7f);
 389:                 if (newEmbedding < 0)
 390:                   directionalOverride
 391:                   = (((newEmbedding & 1) == 0)
 392:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 393:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 394:                 else
 395:                   directionalOverride = -1;
 396:               }
 397:               break;
 398:             default:
 399:               isSpecial = false;
 400:               break;
 401:           }
 402:         levels[i] = currentEmbedding;
 403:         if (isSpecial)
 404:           {
 405:             // Mark this character for removal.
 406:             if (formatterIndices == null)
 407:               formatterIndices = new ArrayList();
 408:             formatterIndices.add(Integer.valueOf(i));
 409:           }
 410:         else if (directionalOverride != -1)
 411:           types[i] = directionalOverride;
 412:       }
 413: 
 414:     // Remove the formatting codes and update both the arrays
 415:     // and 'length'.  It would be more efficient not to remove
 416:     // these codes, but it is also more complicated.  Also, the
 417:     // Unicode algorithm reference does not properly describe
 418:     // how this is to be done -- from what I can tell, their suggestions
 419:     // in this area will not yield the correct results.
 420:     if (formatterIndices == null)
 421:       return;
 422:     int output = 0, input = 0;
 423:     final int size = formatterIndices.size();
 424:     for (int i = 0; i <= size; ++i)
 425:       {
 426:         int nextFmt;
 427:         if (i == size)
 428:           nextFmt = length;
 429:         else
 430:           nextFmt = ((Integer) formatterIndices.get(i)).intValue();
 431:         // Non-formatter codes are from 'input' to 'nextFmt'.
 432:         int len = nextFmt - input;
 433:         System.arraycopy(levels, input, levels, output, len);
 434:         System.arraycopy(types, input, types, output, len);
 435:         output += len;
 436:         input = nextFmt + 1;
 437:       }
 438:     length -= formatterIndices.size();
 439:   }
 440: 
 441:   /**
 442:    * An internal function to compute the boundaries of runs
 443:    * in the text.  It isn't strictly necessary to do this, but
 444:    * it lets us write some following passes in a less complicated
 445:    * way.  Also it lets us efficiently implement some of the public
 446:    * methods.  A run is simply a sequence of characters at the
 447:    * same level.
 448:    */
 449:   private void computeRuns()
 450:   {
 451:     int runCount = 0;
 452:     int currentEmbedding = baseEmbedding;
 453:     for (int i = 0; i < length; ++i)
 454:       {
 455:         if (levels[i] != currentEmbedding)
 456:           {
 457:             currentEmbedding = levels[i];
 458:             ++runCount;
 459:           }
 460:       }
 461: 
 462:     // This may be called multiple times.  If so, and if
 463:     // the number of runs has not changed, then don't bother
 464:     // allocating a new array.
 465:     if (runs == null || runs.length != runCount + 1)
 466:       runs = new int[runCount + 1];
 467:     int where = 0;
 468:     int lastRunStart = 0;
 469:     currentEmbedding = baseEmbedding;
 470:     for (int i = 0; i < length; ++i)
 471:       {
 472:         if (levels[i] != currentEmbedding)
 473:           {
 474:             runs[where++] = lastRunStart;
 475:             lastRunStart = i;
 476:             currentEmbedding = levels[i];
 477:           }
 478:       }
 479:     runs[where++] = lastRunStart;
 480:   }
 481: 
 482:   /**
 483:    * An internal method to resolve weak types.  This implements
 484:    * rules W1 through W7.
 485:    */
 486:   private void resolveWeakTypes()
 487:   {
 488:     final int runCount = getRunCount();
 489:     
 490:     int previousLevel = baseEmbedding;
 491:     for (int run = 0; run < runCount; ++run)
 492:       {
 493:         int start = getRunStart(run);
 494:         int end = getRunLimit(run);
 495:         int level = getRunLevel(run);
 496: 
 497:         // These are the names used in the Bidi algorithm.
 498:         byte sor = (((Math.max(previousLevel, level) % 2) == 0)
 499:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 500:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 501:         int nextLevel;
 502:         if (run == runCount - 1)
 503:           nextLevel = baseEmbedding;
 504:         else
 505:           nextLevel = getRunLevel(run + 1);
 506:         byte eor = (((Math.max(level, nextLevel) % 2) == 0)
 507:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 508:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 509: 
 510:         byte prevType = sor;
 511:         byte prevStrongType = sor;
 512:         for (int i = start; i < end; ++i)
 513:           {
 514:             final byte nextType = (i == end - 1) ? eor : types[i + 1];
 515: 
 516:             // Rule W1: change NSM to the prevailing direction.
 517:             if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
 518:               types[i] = prevType;
 519:             else
 520:               prevType = types[i];
 521: 
 522:             // Rule W2: change EN to AN in some cases.
 523:             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 524:               {
 525:                 if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
 526:                   types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
 527:               }
 528:             else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
 529:                      || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
 530:                      || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
 531:               prevStrongType = types[i];
 532: 
 533:             // Rule W3: change AL to R.
 534:             if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
 535:               types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
 536: 
 537:             // Rule W4: handle separators between two numbers.
 538:             if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
 539:                 && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 540:               {
 541:                 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 542:                     || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
 543:                   types[i] = nextType;
 544:               }
 545:             else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
 546:                      && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
 547:                      && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
 548:               types[i] = nextType;
 549: 
 550:             // Rule W5: change a sequence of european terminators to
 551:             // european numbers, if they are adjacent to european numbers.
 552:             // We also include BN characters in this.
 553:             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 554:                 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
 555:               {
 556:                 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 557:                   types[i] = prevType;
 558:                 else
 559:                   {
 560:                     // Look ahead to see if there is an EN terminating this
 561:                     // sequence of ETs.
 562:                     int j = i + 1;
 563:                     while (j < end
 564:                            && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 565:                                || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
 566:                       ++j;
 567:                     if (j < end
 568:                         && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 569:                       {
 570:                         // Change them all to EN now.
 571:                         for (int k = i; k < j; ++k)
 572:                           types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
 573:                       }
 574:                   }
 575:               }
 576: 
 577:             // Rule W6: separators and terminators change to ON.
 578:             // Again we include BN.
 579:             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 580:                 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 581:                 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 582:                 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
 583:               types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
 584: 
 585:             // Rule W7: change european number types.
 586:             if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
 587:                 && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 588:               types[i] = prevStrongType;
 589:           }
 590: 
 591:         previousLevel = level;
 592:       }
 593:   }
 594: 
 595:   /**
 596:    * An internal method to resolve neutral types.  This implements
 597:    * rules N1 and N2.
 598:    */
 599:   private void resolveNeutralTypes()
 600:   {
 601:     // This implements rules N1 and N2.
 602:     final int runCount = getRunCount();
 603:     
 604:     int previousLevel = baseEmbedding;
 605:     for (int run = 0; run < runCount; ++run)
 606:       {
 607:         int start = getRunStart(run);
 608:         int end = getRunLimit(run);
 609:         int level = getRunLevel(run);
 610: 
 611:         byte embeddingDirection
 612:           = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 613:                                 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 614:         // These are the names used in the Bidi algorithm.
 615:         byte sor = (((Math.max(previousLevel, level) % 2) == 0)
 616:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 617:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 618:         int nextLevel;
 619:         if (run == runCount - 1)
 620:           nextLevel = baseEmbedding;
 621:         else
 622:           nextLevel = getRunLevel(run + 1);
 623:         byte eor = (((Math.max(level, nextLevel) % 2) == 0)
 624:                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
 625:                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
 626: 
 627:         byte prevStrong = sor;
 628:         int neutralStart = -1;
 629:         for (int i = start; i <= end; ++i)
 630:           {
 631:             byte newStrong = -1;
 632:             byte thisType = i == end ? eor : types[i];
 633:             switch (thisType)
 634:               {
 635:               case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
 636:                 newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
 637:                 break;
 638:               case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
 639:               case Character.DIRECTIONALITY_ARABIC_NUMBER:
 640:               case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
 641:                 newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
 642:                 break;
 643:               case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
 644:               case Character.DIRECTIONALITY_OTHER_NEUTRALS:
 645:               case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
 646:               case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
 647:                 if (neutralStart == -1)
 648:                   neutralStart = i;
 649:                 break;
 650:               }
 651:             // If we see a strong character, update all the neutrals.
 652:             if (newStrong != -1)
 653:               {
 654:                 if (neutralStart != -1)
 655:                   {
 656:                     byte override = (prevStrong == newStrong
 657:                                      ? prevStrong
 658:                                      : embeddingDirection);
 659:                     for (int j = neutralStart; j < i; ++j)
 660:                       types[i] = override;
 661:                   }
 662:                 prevStrong = newStrong;
 663:                 neutralStart = -1;
 664:               }
 665:           }
 666: 
 667:         previousLevel = level;
 668:       }
 669:   }
 670: 
 671:   /**
 672:    * An internal method to resolve implicit levels.
 673:    * This implements rules I1 and I2.
 674:    */
 675:   private void resolveImplicitLevels()
 676:   {
 677:     // This implements rules I1 and I2.
 678:     for (int i = 0; i < length; ++i)
 679:       {
 680:         if ((levels[i] & 1) == 0)
 681:           {
 682:             if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
 683:               ++levels[i];
 684:             else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
 685:                      || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 686:               levels[i] += 2;
 687:           }
 688:         else
 689:           {
 690:             if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
 691:                 || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
 692:                 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
 693:               ++levels[i];
 694:           }
 695: 
 696:         // Update the result flags.
 697:         resultFlags |= 1 << (levels[i] & 1);
 698:       }
 699:     // One final update of the result flags, using the base level.
 700:     resultFlags |= 1 << baseEmbedding;
 701:   }
 702: 
 703:   /**
 704:    * This reinserts the formatting codes that we removed early on.
 705:    * Actually it does not insert formatting codes per se, but rather
 706:    * simply inserts new levels at the appropriate locations in the
 707:    * 'levels' array.
 708:    */
 709:   private void reinsertFormattingCodes()
 710:   {
 711:     if (formatterIndices == null)
 712:       return;
 713:     int input = length;
 714:     int output = levels.length;
 715:     // Process from the end as we are copying the array over itself here.
 716:     for (int index = formatterIndices.size() - 1; index >= 0; --index)
 717:       {
 718:         int nextFmt = ((Integer) formatterIndices.get(index)).intValue();
 719: 
 720:         // nextFmt points to a location in the original array.  So,
 721:         // nextFmt+1 is the target of our copying.  output is the location
 722:         // to which we last copied, thus we can derive the length of the
 723:         // copy from it.
 724:         int len = output - nextFmt - 1;
 725:         output = nextFmt;
 726:         input -= len;
 727:         // Note that we no longer need 'types' at this point, so we
 728:         // only edit 'levels'.
 729:         if (nextFmt + 1 < levels.length)
 730:           System.arraycopy(levels, input, levels, nextFmt + 1, len);
 731: 
 732:         // Now set the level at the reinsertion point.
 733:         int rightLevel;
 734:         if (output == levels.length - 1)
 735:           rightLevel = baseEmbedding;
 736:         else
 737:           rightLevel = levels[output + 1];
 738:         int leftLevel;
 739:         if (input == 0)
 740:           leftLevel = baseEmbedding;
 741:         else
 742:           leftLevel = levels[input];
 743:         levels[output] = (byte) Math.max(leftLevel, rightLevel);
 744:       }
 745:     length = levels.length;
 746:   }
 747: 
 748:   /**
 749:    * This is the main internal entry point.  After a constructor
 750:    * has initialized the appropriate local state, it will call
 751:    * this method to do all the work.
 752:    */
 753:   private void runBidi()
 754:   {
 755:     computeTypes();
 756:     baseEmbedding = computeParagraphEmbeddingLevel();
 757:     computeExplicitLevels();
 758:     computeRuns();
 759:     resolveWeakTypes();
 760:     resolveNeutralTypes();
 761:     resolveImplicitLevels();
 762:     // We're done with the types.  Let the GC clean up.
 763:     types = null;
 764:     reinsertFormattingCodes();
 765:     // After resolving the implicit levels, the number
 766:     // of runs may have changed.
 767:     computeRuns();
 768:   }
 769: 
 770:   /**
 771:    * Return true if the paragraph base embedding is left-to-right,
 772:    * false otherwise.
 773:    */
 774:   public boolean baseIsLeftToRight()
 775:   {
 776:     return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
 777:   }
 778: 
 779:   /**
 780:    * Create a new Bidi object for a single line of text, taken
 781:    * from the text used when creating the current Bidi object.
 782:    * @param start the index of the first character of the line
 783:    * @param end the index of the final character of the line
 784:    * @return a new Bidi object for the indicated line of text
 785:    */
 786:   public Bidi createLineBidi(int start, int end)
 787:   {
 788:     // This isn't the most efficient implementation possible.
 789:     // This probably does not matter, so we choose simplicity instead.
 790:     int level = getLevelAt(start);
 791:     int flag = (((level % 2) == 0)
 792:                 ? DIRECTION_LEFT_TO_RIGHT
 793:                 : DIRECTION_RIGHT_TO_LEFT);
 794:     return new Bidi(text, textOffset + start,
 795:                     embeddings, embeddingOffset + start,
 796:                     end - start, flag);
 797:   }
 798: 
 799:   /**
 800:    * Return the base embedding level of the paragraph.
 801:    */
 802:   public int getBaseLevel()
 803:   {
 804:     return baseEmbedding;
 805:   }
 806: 
 807:   /**
 808:    * Return the length of the paragraph, in characters.
 809:    */
 810:   public int getLength()
 811:   {
 812:     return length;
 813:   }
 814: 
 815:   /**
 816:    * Return the level at the indicated character.  If the
 817:    * supplied index is less than zero or greater than the length
 818:    * of the text, then the paragraph's base embedding level will
 819:    * be returned.
 820:    * @param offset the character to examine
 821:    * @return the level of that character
 822:    */
 823:   public int getLevelAt(int offset)
 824:   {
 825:     if (offset < 0 || offset >= length)
 826:       return getBaseLevel();
 827:     return levels[offset];
 828:   }
 829: 
 830:   /**
 831:    * Return the number of runs in the result.  A run is
 832:    * a sequence of characters at the same embedding level.
 833:    */
 834:   public int getRunCount()
 835:   {
 836:     return runs.length;
 837:   }
 838: 
 839:   /**
 840:    * Return the level of the indicated run.
 841:    * @param which the run to examine
 842:    * @return the level of that run
 843:    */
 844:   public int getRunLevel(int which)
 845:   {
 846:     return levels[runs[which]];
 847:   }
 848: 
 849:   /**
 850:    * Return the index of the character just following the end
 851:    * of the indicated run.
 852:    * @param which the run to examine
 853:    * @return the index of the character after the final character
 854:    * of the run
 855:    */
 856:   public int getRunLimit(int which)
 857:   {
 858:     if (which == runs.length - 1)
 859:       return length;
 860:     return runs[which + 1];
 861:   }
 862: 
 863:   /**
 864:    * Return the index of the first character in the indicated run.
 865:    * @param which the run to examine
 866:    * @return the index of the first character of the run
 867:    */
 868:   public int getRunStart(int which)
 869:   {
 870:     return runs[which];
 871:   }
 872: 
 873:   /**
 874:    * Return true if the text is entirely left-to-right, and the
 875:    * base embedding is also left-to-right.
 876:    */
 877:   public boolean isLeftToRight()
 878:   {
 879:     return resultFlags == LTOR;
 880:   }
 881: 
 882:   /**
 883:    * Return true if the text consists of mixed left-to-right and
 884:    * right-to-left runs, or if the text consists of one kind of run
 885:    * which differs from the base embedding direction.
 886:    */
 887:   public boolean isMixed()
 888:   {
 889:     return resultFlags == (LTOR | RTOL);
 890:   }
 891: 
 892:   /**
 893:    * Return true if the text is entirely right-to-left, and the
 894:    * base embedding is also right-to-left.
 895:    */
 896:   public boolean isRightToLeft()
 897:   {
 898:     return resultFlags == RTOL;
 899:   }
 900: 
 901:   /**
 902:    * Return a String describing the internal state of this object.
 903:    * This is only useful for debugging.
 904:    */
 905:   public String toString()
 906:   {
 907:     return "Bidi Bidi Bidi I like you, Buck!";
 908:   }
 909: 
 910:   /**
 911:    * Reorder objects according to the levels passed in.  This implements
 912:    * reordering as defined by the Unicode bidirectional layout specification.
 913:    * The levels are integers from 0 to 62; even numbers represent left-to-right
 914:    * runs, and odd numbers represent right-to-left runs.
 915:    *
 916:    * @param levels the levels associated with each object
 917:    * @param levelOffset the index of the first level to use
 918:    * @param objs the objects to reorder according to the levels
 919:    * @param objOffset the index of the first object to use
 920:    * @param count the number of objects (and levels) to manipulate
 921:    */
 922:   public static void reorderVisually(byte[] levels, int levelOffset,
 923:                                      Object[] objs, int objOffset, int count)
 924:   {
 925:     // We need a copy of the 'levels' array, as we are going to modify it.
 926:     // This is unfortunate but difficult to avoid.
 927:     byte[] levelCopy = new byte[count];
 928:     // Do this explicitly so we can also find the maximum depth at the
 929:     // same time.
 930:     int max = 0;
 931:     int lowestOdd = 63;
 932:     for (int i = 0; i < count; ++i)
 933:       {
 934:         levelCopy[i] = levels[levelOffset + i];
 935:         max = Math.max(levelCopy[i], max);
 936:         if (levelCopy[i] % 2 != 0)
 937:           lowestOdd = Math.min(lowestOdd, levelCopy[i]);
 938:       }
 939: 
 940:     // Reverse the runs starting with the deepest.
 941:     for (int depth = max; depth >= lowestOdd; --depth)
 942:       {
 943:         int start = 0;
 944:         while (start < count)
 945:           {
 946:             // Find the start of a run >= DEPTH.
 947:             while (start < count && levelCopy[start] < depth)
 948:               ++start;
 949:             if (start == count)
 950:               break;
 951:             // Find the end of the run.
 952:             int end = start + 1;
 953:             while (end < count && levelCopy[end] >= depth)
 954:               ++end;
 955: 
 956:             // Reverse this run.
 957:             for (int i = 0; i < (end - start) / 2; ++i)
 958:               {
 959:                 byte tmpb = levelCopy[end - i - 1];
 960:                 levelCopy[end - i - 1] = levelCopy[start + i];
 961:                 levelCopy[start + i] = tmpb;
 962:                 Object tmpo = objs[objOffset + end - i - 1];
 963:                 objs[objOffset + end - i - 1] = objs[objOffset + start + i];
 964:                 objs[objOffset + start + i] = tmpo;
 965:               }
 966: 
 967:             // Handle the next run.
 968:             start = end + 1;
 969:           }
 970:       }
 971:   }
 972: 
 973:   /**
 974:    * Returns false if all characters in the text between start and end
 975:    * are all left-to-right text. This implementation is just calls
 976:    * <code>Character.getDirectionality(char)</code> on all characters
 977:    * and makes sure all characters are either explicitly left-to-right
 978:    * or neutral in directionality (character types L, EN, ES, ET, AN,
 979:    * CS, S and WS).
 980:    */
 981:   public static boolean requiresBidi(char[] text, int start, int end)
 982:   {
 983:     for (int i = start; i < end; i++)
 984:       {
 985:     byte dir = Character.getDirectionality(text[i]);
 986:     if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
 987:         && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
 988:         && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
 989:         && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
 990:         && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
 991:         && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
 992:         && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
 993:         && dir != Character.DIRECTIONALITY_WHITESPACE)
 994:       return true;
 995:       }
 996: 
 997:     return false;
 998:   }
 999: }