GNU Classpath (0.91) | |
Frames | No Frames |
1: /* InputStreamReader.java -- Reader than transforms bytes to chars 2: Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005, 2006 3: Free Software Foundation, Inc. 4: 5: This file is part of GNU Classpath. 6: 7: GNU Classpath is free software; you can redistribute it and/or modify 8: it under the terms of the GNU General Public License as published by 9: the Free Software Foundation; either version 2, or (at your option) 10: any later version. 11: 12: GNU Classpath is distributed in the hope that it will be useful, but 13: WITHOUT ANY WARRANTY; without even the implied warranty of 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15: General Public License for more details. 16: 17: You should have received a copy of the GNU General Public License 18: along with GNU Classpath; see the file COPYING. If not, write to the 19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20: 02110-1301 USA. 21: 22: Linking this library statically or dynamically with other modules is 23: making a combined work based on this library. Thus, the terms and 24: conditions of the GNU General Public License cover the whole 25: combination. 26: 27: As a special exception, the copyright holders of this library give you 28: permission to link this library with independent modules to produce an 29: executable, regardless of the license terms of these independent 30: modules, and to copy and distribute the resulting executable under 31: terms of your choice, provided that you also meet, for each linked 32: independent module, the terms and conditions of the license of that 33: module. An independent module is a module which is not derived from 34: or based on this library. If you modify this library, you may extend 35: this exception to your version of the library, but you are not 36: obligated to do so. If you do not wish to do so, delete this 37: exception statement from your version. */ 38: 39: 40: package java.io; 41: 42: import gnu.classpath.SystemProperties; 43: import gnu.java.nio.charset.EncodingHelper; 44: 45: import java.nio.ByteBuffer; 46: import java.nio.CharBuffer; 47: import java.nio.charset.Charset; 48: import java.nio.charset.CharsetDecoder; 49: import java.nio.charset.CoderResult; 50: import java.nio.charset.CodingErrorAction; 51: 52: /** 53: * This class reads characters from a byte input stream. The characters 54: * read are converted from bytes in the underlying stream by a 55: * decoding layer. The decoding layer transforms bytes to chars according 56: * to an encoding standard. There are many available encodings to choose 57: * from. The desired encoding can either be specified by name, or if no 58: * encoding is selected, the system default encoding will be used. The 59: * system default encoding name is determined from the system property 60: * <code>file.encoding</code>. The only encodings that are guaranteed to 61: * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 62: * Unforunately, Java does not provide a mechanism for listing the 63: * ecodings that are supported in a given implementation. 64: * <p> 65: * Here is a list of standard encoding names that may be available: 66: * <p> 67: * <ul> 68: * <li>8859_1 (ISO-8859-1/Latin-1)</li> 69: * <li>8859_2 (ISO-8859-2/Latin-2)</li> 70: * <li>8859_3 (ISO-8859-3/Latin-3)</li> 71: * <li>8859_4 (ISO-8859-4/Latin-4)</li> 72: * <li>8859_5 (ISO-8859-5/Latin-5)</li> 73: * <li>8859_6 (ISO-8859-6/Latin-6)</li> 74: * <li>8859_7 (ISO-8859-7/Latin-7)</li> 75: * <li>8859_8 (ISO-8859-8/Latin-8)</li> 76: * <li>8859_9 (ISO-8859-9/Latin-9)</li> 77: * <li>ASCII (7-bit ASCII)</li> 78: * <li>UTF8 (UCS Transformation Format-8)</li> 79: * <li>More later</li> 80: * </ul> 81: * <p> 82: * It is recommended that applications do not use 83: * <code>InputStreamReader</code>'s 84: * directly. Rather, for efficiency purposes, an object of this class 85: * should be wrapped by a <code>BufferedReader</code>. 86: * <p> 87: * Due to a deficiency the Java class library design, there is no standard 88: * way for an application to install its own byte-character encoding. 89: * 90: * @see BufferedReader 91: * @see InputStream 92: * 93: * @author Robert Schuster 94: * @author Aaron M. Renn (arenn@urbanophile.com) 95: * @author Per Bothner (bothner@cygnus.com) 96: * @date April 22, 1998. 97: */ 98: public class InputStreamReader extends Reader 99: { 100: /** 101: * The input stream. 102: */ 103: private InputStream in; 104: 105: /** 106: * The charset decoder. 107: */ 108: private CharsetDecoder decoder; 109: 110: /** 111: * End of stream reached. 112: */ 113: private boolean isDone = false; 114: 115: /** 116: * Need this. 117: */ 118: private float maxBytesPerChar; 119: 120: /** 121: * Buffer holding surplus loaded bytes (if any) 122: */ 123: private ByteBuffer byteBuffer; 124: 125: /** 126: * java.io canonical name of the encoding. 127: */ 128: private String encoding; 129: 130: /** 131: * We might decode to a 2-char UTF-16 surrogate, which won't fit in the 132: * output buffer. In this case we need to save the surrogate char. 133: */ 134: private char savedSurrogate; 135: private boolean hasSavedSurrogate = false; 136: 137: /** 138: * This method initializes a new instance of <code>InputStreamReader</code> 139: * to read from the specified stream using the default encoding. 140: * 141: * @param in The <code>InputStream</code> to read from 142: */ 143: public InputStreamReader(InputStream in) 144: { 145: if (in == null) 146: throw new NullPointerException(); 147: this.in = in; 148: try 149: { 150: encoding = SystemProperties.getProperty("file.encoding"); 151: // Don't use NIO if avoidable 152: if(EncodingHelper.isISOLatin1(encoding)) 153: { 154: encoding = "ISO8859_1"; 155: maxBytesPerChar = 1f; 156: decoder = null; 157: return; 158: } 159: Charset cs = EncodingHelper.getCharset(encoding); 160: decoder = cs.newDecoder(); 161: encoding = EncodingHelper.getOldCanonical(cs.name()); 162: try { 163: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 164: } catch(UnsupportedOperationException _){ 165: maxBytesPerChar = 1f; 166: } 167: decoder.onMalformedInput(CodingErrorAction.REPLACE); 168: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 169: decoder.reset(); 170: } catch(RuntimeException e) { 171: encoding = "ISO8859_1"; 172: maxBytesPerChar = 1f; 173: decoder = null; 174: } catch(UnsupportedEncodingException e) { 175: encoding = "ISO8859_1"; 176: maxBytesPerChar = 1f; 177: decoder = null; 178: } 179: } 180: 181: /** 182: * This method initializes a new instance of <code>InputStreamReader</code> 183: * to read from the specified stream using a caller supplied character 184: * encoding scheme. Note that due to a deficiency in the Java language 185: * design, there is no way to determine which encodings are supported. 186: * 187: * @param in The <code>InputStream</code> to read from 188: * @param encoding_name The name of the encoding scheme to use 189: * 190: * @exception UnsupportedEncodingException If the encoding scheme 191: * requested is not available. 192: */ 193: public InputStreamReader(InputStream in, String encoding_name) 194: throws UnsupportedEncodingException 195: { 196: if (in == null 197: || encoding_name == null) 198: throw new NullPointerException(); 199: 200: this.in = in; 201: // Don't use NIO if avoidable 202: if(EncodingHelper.isISOLatin1(encoding_name)) 203: { 204: encoding = "ISO8859_1"; 205: maxBytesPerChar = 1f; 206: decoder = null; 207: return; 208: } 209: try { 210: Charset cs = EncodingHelper.getCharset(encoding_name); 211: try { 212: maxBytesPerChar = cs.newEncoder().maxBytesPerChar(); 213: } catch(UnsupportedOperationException _){ 214: maxBytesPerChar = 1f; 215: } 216: 217: decoder = cs.newDecoder(); 218: decoder.onMalformedInput(CodingErrorAction.REPLACE); 219: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 220: decoder.reset(); 221: 222: // The encoding should be the old name, if such exists. 223: encoding = EncodingHelper.getOldCanonical(cs.name()); 224: } catch(RuntimeException e) { 225: encoding = "ISO8859_1"; 226: maxBytesPerChar = 1f; 227: decoder = null; 228: } 229: } 230: 231: /** 232: * Creates an InputStreamReader that uses a decoder of the given 233: * charset to decode the bytes in the InputStream into 234: * characters. 235: * 236: * @since 1.4 237: */ 238: public InputStreamReader(InputStream in, Charset charset) { 239: if (in == null) 240: throw new NullPointerException(); 241: this.in = in; 242: decoder = charset.newDecoder(); 243: 244: try { 245: maxBytesPerChar = charset.newEncoder().maxBytesPerChar(); 246: } catch(UnsupportedOperationException _){ 247: maxBytesPerChar = 1f; 248: } 249: 250: decoder.onMalformedInput(CodingErrorAction.REPLACE); 251: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 252: decoder.reset(); 253: encoding = EncodingHelper.getOldCanonical(charset.name()); 254: } 255: 256: /** 257: * Creates an InputStreamReader that uses the given charset decoder 258: * to decode the bytes in the InputStream into characters. 259: * 260: * @since 1.4 261: */ 262: public InputStreamReader(InputStream in, CharsetDecoder decoder) { 263: if (in == null) 264: throw new NullPointerException(); 265: this.in = in; 266: this.decoder = decoder; 267: 268: Charset charset = decoder.charset(); 269: try { 270: if (charset == null) 271: maxBytesPerChar = 1f; 272: else 273: maxBytesPerChar = charset.newEncoder().maxBytesPerChar(); 274: } catch(UnsupportedOperationException _){ 275: maxBytesPerChar = 1f; 276: } 277: 278: decoder.onMalformedInput(CodingErrorAction.REPLACE); 279: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 280: decoder.reset(); 281: if (charset == null) 282: encoding = "US-ASCII"; 283: else 284: encoding = EncodingHelper.getOldCanonical(decoder.charset().name()); 285: } 286: 287: /** 288: * This method closes this stream, as well as the underlying 289: * <code>InputStream</code>. 290: * 291: * @exception IOException If an error occurs 292: */ 293: public void close() throws IOException 294: { 295: synchronized (lock) 296: { 297: // Makes sure all intermediate data is released by the decoder. 298: if (decoder != null) 299: decoder.reset(); 300: if (in != null) 301: in.close(); 302: in = null; 303: isDone = true; 304: decoder = null; 305: } 306: } 307: 308: /** 309: * This method returns the name of the encoding that is currently in use 310: * by this object. If the stream has been closed, this method is allowed 311: * to return <code>null</code>. 312: * 313: * @return The current encoding name 314: */ 315: public String getEncoding() 316: { 317: return in != null ? encoding : null; 318: } 319: 320: /** 321: * This method checks to see if the stream is ready to be read. It 322: * will return <code>true</code> if is, or <code>false</code> if it is not. 323: * If the stream is not ready to be read, it could (although is not required 324: * to) block on the next read attempt. 325: * 326: * @return <code>true</code> if the stream is ready to be read, 327: * <code>false</code> otherwise 328: * 329: * @exception IOException If an error occurs 330: */ 331: public boolean ready() throws IOException 332: { 333: if (in == null) 334: throw new IOException("Reader has been closed"); 335: 336: return in.available() != 0; 337: } 338: 339: /** 340: * This method reads up to <code>length</code> characters from the stream into 341: * the specified array starting at index <code>offset</code> into the 342: * array. 343: * 344: * @param buf The character array to recieve the data read 345: * @param offset The offset into the array to start storing characters 346: * @param length The requested number of characters to read. 347: * 348: * @return The actual number of characters read, or -1 if end of stream. 349: * 350: * @exception IOException If an error occurs 351: */ 352: public int read(char[] buf, int offset, int length) throws IOException 353: { 354: if (in == null) 355: throw new IOException("Reader has been closed"); 356: if (isDone) 357: return -1; 358: if(decoder != null){ 359: int totalBytes = (int)((double)length * maxBytesPerChar); 360: byte[] bytes = new byte[totalBytes]; 361: 362: int remaining = 0; 363: if(byteBuffer != null) 364: { 365: remaining = byteBuffer.remaining(); 366: byteBuffer.get(bytes, 0, remaining); 367: } 368: int read; 369: if(totalBytes - remaining > 0) 370: { 371: read = in.read(bytes, remaining, totalBytes - remaining); 372: if(read == -1){ 373: read = remaining; 374: isDone = true; 375: } else 376: read += remaining; 377: } else 378: read = remaining; 379: byteBuffer = ByteBuffer.wrap(bytes, 0, read); 380: CharBuffer cb = CharBuffer.wrap(buf, offset, length); 381: int startPos = cb.position(); 382: 383: if(hasSavedSurrogate){ 384: hasSavedSurrogate = false; 385: cb.put(savedSurrogate); 386: read++; 387: } 388: 389: CoderResult cr = decoder.decode(byteBuffer, cb, isDone); 390: decoder.reset(); 391: // 1 char remains which is the first half of a surrogate pair. 392: if(cr.isOverflow() && cb.hasRemaining()){ 393: CharBuffer overflowbuf = CharBuffer.allocate(2); 394: cr = decoder.decode(byteBuffer, overflowbuf, isDone); 395: overflowbuf.flip(); 396: if(overflowbuf.hasRemaining()) 397: { 398: cb.put(overflowbuf.get()); 399: savedSurrogate = overflowbuf.get(); 400: hasSavedSurrogate = true; 401: isDone = false; 402: } 403: } 404: 405: if(byteBuffer.hasRemaining()) { 406: byteBuffer.compact(); 407: byteBuffer.flip(); 408: isDone = false; 409: } else 410: byteBuffer = null; 411: 412: read = cb.position() - startPos; 413: return (read <= 0) ? -1 : read; 414: } else { 415: byte[] bytes = new byte[length]; 416: int read = in.read(bytes); 417: for(int i=0;i<read;i++) 418: buf[offset+i] = (char)(bytes[i]&0xFF); 419: return read; 420: } 421: } 422: 423: /** 424: * Reads an char from the input stream and returns it 425: * as an int in the range of 0-65535. This method also will return -1 if 426: * the end of the stream has been reached. 427: * <p> 428: * This method will block until the char can be read. 429: * 430: * @return The char read or -1 if end of stream 431: * 432: * @exception IOException If an error occurs 433: */ 434: public int read() throws IOException 435: { 436: char[] buf = new char[1]; 437: int count = read(buf, 0, 1); 438: return count > 0 ? buf[0] : -1; 439: } 440: 441: /** 442: * Skips the specified number of chars in the stream. It 443: * returns the actual number of chars skipped, which may be less than the 444: * requested amount. 445: * 446: * @param count The requested number of chars to skip 447: * 448: * @return The actual number of chars skipped. 449: * 450: * @exception IOException If an error occurs 451: */ 452: public long skip(long count) throws IOException 453: { 454: if (in == null) 455: throw new IOException("Reader has been closed"); 456: 457: return super.skip(count); 458: } 459: }
GNU Classpath (0.91) |