src/share/classes/java/io/DataInput.java

Print this page




  31  * reconstructing from them data in any of
  32  * the Java primitive types. There is also
  33  * a
  34  * facility for reconstructing a {@code String}
  35  * from data in
  36  * <a href="#modified-utf-8">modified UTF-8</a>
  37  * format.
  38  * <p>
  39  * It is generally true of all the reading
  40  * routines in this interface that if end of
  41  * file is reached before the desired number
  42  * of bytes has been read, an {@code EOFException}
  43  * (which is a kind of {@code IOException})
  44  * is thrown. If any byte cannot be read for
  45  * any reason other than end of file, an {@code IOException}
  46  * other than {@code EOFException} is
  47  * thrown. In particular, an {@code IOException}
  48  * may be thrown if the input stream has been
  49  * closed.
  50  *
  51  * <h4><a name="modified-utf-8">Modified UTF-8</a></h4>
  52  * <p>
  53  * Implementations of the DataInput and DataOutput interfaces represent
  54  * Unicode strings in a format that is a slight modification of UTF-8.
  55  * (For information regarding the standard UTF-8 format, see section
  56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
  57  * 4.0</i>).
  58  * Note that in the following tables, the most significant bit appears in the
  59  * far left-hand column.
  60  * <p>
  61  * All characters in the range {@code '\u005Cu0001'} to
  62  * {@code '\u005Cu007F'} are represented by a single byte:
  63  *
  64  * <blockquote>
  65  *   <table border="1" cellspacing="0" cellpadding="8" width="50%"
  66  *          summary="Bit values and bytes">
  67  *     <tr>





  68  *       <td></td>
  69  *       <th id="bit_a">Bit Values</th>
  70  *     </tr>
  71  *     <tr>
  72  *       <th id="byte1_a">Byte 1</th>
  73  *       <td>
  74  *         <table border="1" cellspacing="0" width="100%">
  75  *           <tr>
  76  *             <td width="12%"><center>0</center>
  77  *             <td colspan="7"><center>bits 6-0</center>
  78  *           </tr>
  79  *         </table>
  80  *       </td>



  81  *     </tr>
  82  *   </table>
  83  * </blockquote>
  84  *
  85  * <p>
  86  * The null character {@code '\u005Cu0000'} and characters in the
  87  * range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
  88  * represented by a pair of bytes:
  89  *
  90  * <blockquote>
  91  *   <table border="1" cellspacing="0" cellpadding="8" width="50%"
  92  *          summary="Bit values and bytes">
  93  *     <tr>
  94  *       <td></td>
  95  *       <th id="bit_b">Bit Values</th>
  96  *     </tr>
  97  *     <tr>
  98  *       <th id="byte1_b">Byte 1</th>
  99  *       <td>
 100  *         <table border="1" cellspacing="0" width="100%">
 101  *           <tr>
 102  *             <td width="12%"><center>1</center>
 103  *             <td width="13%"><center>1</center>
 104  *             <td width="12%"><center>0</center>
 105  *             <td colspan="5"><center>bits 10-6</center>
 106  *           </tr>
 107  *         </table>
 108  *       </td>
 109  *     </tr>
 110  *     <tr>
 111  *       <th id="byte2_a">Byte 2</th>
 112  *       <td>
 113  *         <table border="1" cellspacing="0" width="100%">
 114  *           <tr>
 115  *             <td width="12%"><center>1</center>
 116  *             <td width="13%"><center>0</center>
 117  *             <td colspan="6"><center>bits 5-0</center>
 118  *           </tr>
 119  *         </table>
 120  *       </td>


 121  *     </tr>
 122  *   </table>
 123  *  </blockquote>
 124  *
 125  * <br>
 126  * {@code char} values in the range {@code '\u005Cu0800'} to
 127  * {@code '\u005CuFFFF'} are represented by three bytes:
 128  *
 129  * <blockquote>
 130  *   <table border="1" cellspacing="0" cellpadding="8" width="50%"
 131  *          summary="Bit values and bytes">
 132  *     <tr>
 133  *       <td></td>
 134  *       <th id="bit_c">Bit Values</th>
 135  *     </tr>
 136  *     <tr>
 137  *       <th id="byte1_c">Byte 1</th>
 138  *       <td>
 139  *         <table border="1" cellspacing="0" width="100%">
 140  *           <tr>
 141  *             <td width="12%"><center>1</center>
 142  *             <td width="13%"><center>1</center>
 143  *             <td width="12%"><center>1</center>
 144  *             <td width="13%"><center>0</center>
 145  *             <td colspan="4"><center>bits 15-12</center>
 146  *           </tr>
 147  *         </table>
 148  *       </td>
 149  *     </tr>
 150  *     <tr>
 151  *       <th id="byte2_b">Byte 2</th>
 152  *       <td>
 153  *         <table border="1" cellspacing="0" width="100%">
 154  *           <tr>
 155  *             <td width="12%"><center>1</center>
 156  *             <td width="13%"><center>0</center>
 157  *             <td colspan="6"><center>bits 11-6</center>
 158  *           </tr>
 159  *         </table>
 160  *       </td>
 161  *     </tr>
 162  *     <tr>
 163  *       <th id="byte3">Byte 3</th>
 164  *       <td>
 165  *         <table border="1" cellspacing="0" width="100%">
 166  *           <tr>
 167  *             <td width="12%"><center>1</center>
 168  *             <td width="13%"><center>0</center>
 169  *             <td colspan="6"><center>bits 5-0</center>
 170  *           </tr>
 171  *         </table>
 172  *       </td>
 173  *     </tr>
 174  *   </table>
 175  *  </blockquote>
 176  *
 177  * <p>
 178  * The differences between this format and the
 179  * standard UTF-8 format are the following:
 180  * <ul>
 181  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
 182  *     rather than 1-byte, so that the encoded strings never have
 183  *     embedded nulls.
 184  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
 185  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
 186  *     are represented in the form of surrogate pairs.
 187  * </ul>
 188  * @author  Frank Yellin
 189  * @see     java.io.DataInputStream
 190  * @see     java.io.DataOutput
 191  * @since   JDK1.0
 192  */
 193 public
 194 interface DataInput {
 195     /**
 196      * Reads some bytes from an input




  31  * reconstructing from them data in any of
  32  * the Java primitive types. There is also
  33  * a
  34  * facility for reconstructing a {@code String}
  35  * from data in
  36  * <a href="#modified-utf-8">modified UTF-8</a>
  37  * format.
  38  * <p>
  39  * It is generally true of all the reading
  40  * routines in this interface that if end of
  41  * file is reached before the desired number
  42  * of bytes has been read, an {@code EOFException}
  43  * (which is a kind of {@code IOException})
  44  * is thrown. If any byte cannot be read for
  45  * any reason other than end of file, an {@code IOException}
  46  * other than {@code EOFException} is
  47  * thrown. In particular, an {@code IOException}
  48  * may be thrown if the input stream has been
  49  * closed.
  50  *
  51  * <h3><a name="modified-utf-8">Modified UTF-8</a></h3>
  52  * <p>
  53  * Implementations of the DataInput and DataOutput interfaces represent
  54  * Unicode strings in a format that is a slight modification of UTF-8.
  55  * (For information regarding the standard UTF-8 format, see section
  56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
  57  * 4.0</i>).
  58  * Note that in the following table, the most significant bit appears in the
  59  * far left-hand column.



  60  *
  61  * <blockquote>
  62  *   <table border="1" cellspacing="0" cellpadding="8"
  63  *          summary="Bit values and bytes">
  64  *     <tr>
  65  *       <th colspan="9"><span style="font-weight:normal">
  66  *         All characters in the range {@code '\u005Cu0001'} to
  67  *         {@code '\u005Cu007F'} are represented by a single byte:</span></th>
  68  *     </tr>
  69  *     <tr>
  70  *       <td></td>
  71  *       <th colspan="8" id="bit_a">Bit Values</th>
  72  *     </tr>
  73  *     <tr>
  74  *       <th id="byte1_a">Byte 1</th>
  75  *       <td><center>0</center>



  76  *       <td colspan="7"><center>bits 6-0</center>
  77  *     </tr>
  78  *     <tr>
  79  *       <th colspan="9"><span style="font-weight:normal">
  80  *         The null character {@code '\u005Cu0000'} and characters
  81  *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
  82  *         represented by a pair of bytes:</span></th>
  83  *     </tr>











  84  *     <tr>
  85  *       <td></td>
  86  *       <th colspan="8" id="bit_b">Bit Values</th>
  87  *     </tr>
  88  *     <tr>
  89  *       <th id="byte1_b">Byte 1</th>
  90  *       <td><center>1</center>
  91  *       <td><center>1</center>
  92  *       <td><center>0</center>



  93  *       <td colspan="5"><center>bits 10-6</center>
  94  *     </tr>



  95  *     <tr>
  96  *       <th id="byte2_a">Byte 2</th>
  97  *       <td><center>1</center>
  98  *       <td><center>0</center>



  99  *       <td colspan="6"><center>bits 5-0</center>
 100  *     </tr>
 101  *     <tr>
 102  *       <th colspan="9"><span style="font-weight:normal">
 103  *         {@code char} values in the range {@code '\u005Cu0800'}
 104  *         to {@code '\u005CuFFFF'} are represented by three bytes:</span></th>
 105  *     </tr>










 106  *     <tr>
 107  *       <td></td>
 108  *       <th colspan="8"id="bit_c">Bit Values</th>
 109  *     </tr>
 110  *     <tr>
 111  *       <th id="byte1_c">Byte 1</th>
 112  *       <td><center>1</center>
 113  *       <td><center>1</center>
 114  *       <td><center>1</center>
 115  *       <td><center>0</center>



 116  *       <td colspan="4"><center>bits 15-12</center>
 117  *     </tr>



 118  *     <tr>
 119  *       <th id="byte2_b">Byte 2</th>
 120  *       <td><center>1</center>
 121  *       <td><center>0</center>



 122  *       <td colspan="6"><center>bits 11-6</center>
 123  *     </tr>



 124  *     <tr>
 125  *       <th id="byte3">Byte 3</th>
 126  *       <td><center>1</center>
 127  *       <td><center>0</center>



 128  *       <td colspan="6"><center>bits 5-0</center>
 129  *     </tr>
 130  *   </table>



 131  * </blockquote>

 132  * <p>
 133  * The differences between this format and the
 134  * standard UTF-8 format are the following:
 135  * <ul>
 136  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
 137  *     rather than 1-byte, so that the encoded strings never have
 138  *     embedded nulls.
 139  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
 140  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
 141  *     are represented in the form of surrogate pairs.
 142  * </ul>
 143  * @author  Frank Yellin
 144  * @see     java.io.DataInputStream
 145  * @see     java.io.DataOutput
 146  * @since   JDK1.0
 147  */
 148 public
 149 interface DataInput {
 150     /**
 151      * Reads some bytes from an input