< prev index next >

src/java.base/share/classes/java/io/DataInput.java

Print this page




  42  * of bytes has been read, an {@code EOFException}
  43  * (which is a kind of {@code IOException})
  44  * is thrown. If any byte cannot be read for
  45  * any reason other than end of file, an {@code IOException}
  46  * other than {@code EOFException} is
  47  * thrown. In particular, an {@code IOException}
  48  * may be thrown if the input stream has been
  49  * closed.
  50  *
  51  * <h3><a id="modified-utf-8">Modified UTF-8</a></h3>
  52  * <p>
  53  * Implementations of the DataInput and DataOutput interfaces represent
  54  * Unicode strings in a format that is a slight modification of UTF-8.
  55  * (For information regarding the standard UTF-8 format, see section
  56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
  57  * 4.0</i>).
  58  * Note that in the following table, the most significant bit appears in the
  59  * far left-hand column.
  60  *
  61  * <blockquote>
  62  *   <table border="1" cellspacing="0" cellpadding="8"
  63  *          summary="Bit values and bytes">

  64  *     <tr>
  65  *       <th colspan="9"><span style="font-weight:normal">
  66  *         All characters in the range {@code '\u005Cu0001'} to
  67  *         {@code '\u005Cu007F'} are represented by a single byte:</span></th>
  68  *     </tr>
  69  *     <tr>
  70  *       <td></td>
  71  *       <th colspan="8" id="bit_a">Bit Values</th>
  72  *     </tr>
  73  *     <tr>
  74  *       <th id="byte1_a">Byte 1</th>
  75  *       <td style="text-align:center">0
  76  *       <td colspan="7" style="text-align:center">bits 6-0
  77  *     </tr>
  78  *     <tr>
  79  *       <th colspan="9"><span style="font-weight:normal">
  80  *         The null character {@code '\u005Cu0000'} and characters
  81  *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
  82  *         represented by a pair of bytes:</span></th>
  83  *     </tr>
  84  *     <tr>
  85  *       <td></td>
  86  *       <th colspan="8" id="bit_b">Bit Values</th>
  87  *     </tr>
  88  *     <tr>
  89  *       <th id="byte1_b">Byte 1</th>
  90  *       <td style="text-align:center">1
  91  *       <td style="text-align:center">1
  92  *       <td style="text-align:center">0
  93  *       <td colspan="5" style="text-align:center">bits 10-6
  94  *     </tr>
  95  *     <tr>
  96  *       <th id="byte2_a">Byte 2</th>
  97  *       <td style="text-align:center">1
  98  *       <td style="text-align:center">0
  99  *       <td colspan="6" style="text-align:center">bits 5-0
 100  *     </tr>
 101  *     <tr>
 102  *       <th colspan="9"><span style="font-weight:normal">
 103  *         {@code char} values in the range {@code '\u005Cu0800'}
 104  *         to {@code '\u005CuFFFF'} are represented by three bytes:</span></th>
 105  *     </tr>
 106  *     <tr>
 107  *       <td></td>
 108  *       <th colspan="8"id="bit_c">Bit Values</th>
 109  *     </tr>
 110  *     <tr>
 111  *       <th id="byte1_c">Byte 1</th>
 112  *       <td style="text-align:center">1
 113  *       <td style="text-align:center">1
 114  *       <td style="text-align:center">1
 115  *       <td style="text-align:center">0
 116  *       <td colspan="4" style="text-align:center">bits 15-12
 117  *     </tr>
 118  *     <tr>
 119  *       <th id="byte2_b">Byte 2</th>
 120  *       <td style="text-align:center">1
 121  *       <td style="text-align:center">0
 122  *       <td colspan="6" style="text-align:center">bits 11-6
 123  *     </tr>
 124  *     <tr>
 125  *       <th id="byte3">Byte 3</th>
 126  *       <td style="text-align:center">1
 127  *       <td style="text-align:center">0
 128  *       <td colspan="6" style="text-align:center">bits 5-0
 129  *     </tr>

 130  *   </table>
 131  * </blockquote>
 132  * <p>
 133  * The differences between this format and the
 134  * standard UTF-8 format are the following:
 135  * <ul>
 136  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
 137  *     rather than 1-byte, so that the encoded strings never have
 138  *     embedded nulls.
 139  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
 140  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
 141  *     are represented in the form of surrogate pairs.
 142  * </ul>
 143  * @author  Frank Yellin
 144  * @see     java.io.DataInputStream
 145  * @see     java.io.DataOutput
 146  * @since   1.0
 147  */
 148 public
 149 interface DataInput {




  42  * of bytes has been read, an {@code EOFException}
  43  * (which is a kind of {@code IOException})
  44  * is thrown. If any byte cannot be read for
  45  * any reason other than end of file, an {@code IOException}
  46  * other than {@code EOFException} is
  47  * thrown. In particular, an {@code IOException}
  48  * may be thrown if the input stream has been
  49  * closed.
  50  *
  51  * <h3><a id="modified-utf-8">Modified UTF-8</a></h3>
  52  * <p>
  53  * Implementations of the DataInput and DataOutput interfaces represent
  54  * Unicode strings in a format that is a slight modification of UTF-8.
  55  * (For information regarding the standard UTF-8 format, see section
  56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
  57  * 4.0</i>).
  58  * Note that in the following table, the most significant bit appears in the
  59  * far left-hand column.
  60  *
  61  * <blockquote>
  62  *   <table class="plain">
  63  *     <caption style="display:none">Bit values and bytes</caption>
  64  *     <tbody>
  65  *     <tr>
  66  *       <th colspan="9"><span style="font-weight:normal">
  67  *         All characters in the range {@code '\u005Cu0001'} to
  68  *         {@code '\u005Cu007F'} are represented by a single byte:</span></th>
  69  *     </tr>
  70  *     <tr>
  71  *       <td></td>
  72  *       <th colspan="8" id="bit_a">Bit Values</th>
  73  *     </tr>
  74  *     <tr>
  75  *       <th id="byte1_a" style="text-align:left">Byte 1</th>
  76  *       <td style="text-align:center">0
  77  *       <td colspan="7" style="text-align:center">bits 6-0
  78  *     </tr>
  79  *     <tr>
  80  *       <th colspan="9"><span style="font-weight:normal">
  81  *         The null character {@code '\u005Cu0000'} and characters
  82  *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
  83  *         represented by a pair of bytes:</span></th>
  84  *     </tr>
  85  *     <tr>
  86  *       <td></td>
  87  *       <th colspan="8" id="bit_b">Bit Values</th>
  88  *     </tr>
  89  *     <tr>
  90  *       <th id="byte1_b" style="text-align:left">Byte 1</th>
  91  *       <td style="text-align:center">1
  92  *       <td style="text-align:center">1
  93  *       <td style="text-align:center">0
  94  *       <td colspan="5" style="text-align:center">bits 10-6
  95  *     </tr>
  96  *     <tr>
  97  *       <th id="byte2_a" style="text-align:left">Byte 2</th>
  98  *       <td style="text-align:center">1
  99  *       <td style="text-align:center">0
 100  *       <td colspan="6" style="text-align:center">bits 5-0
 101  *     </tr>
 102  *     <tr>
 103  *       <th colspan="9"><span style="font-weight:normal">
 104  *         {@code char} values in the range {@code '\u005Cu0800'}
 105  *         to {@code '\u005CuFFFF'} are represented by three bytes:</span></th>
 106  *     </tr>
 107  *     <tr>
 108  *       <td></td>
 109  *       <th colspan="8"id="bit_c">Bit Values</th>
 110  *     </tr>
 111  *     <tr>
 112  *       <th id="byte1_c" style="text-align:left">Byte 1</th>
 113  *       <td style="text-align:center">1
 114  *       <td style="text-align:center">1
 115  *       <td style="text-align:center">1
 116  *       <td style="text-align:center">0
 117  *       <td colspan="4" style="text-align:center">bits 15-12
 118  *     </tr>
 119  *     <tr>
 120  *       <th id="byte2_b" style="text-align:left">Byte 2</th>
 121  *       <td style="text-align:center">1
 122  *       <td style="text-align:center">0
 123  *       <td colspan="6" style="text-align:center">bits 11-6
 124  *     </tr>
 125  *     <tr>
 126  *       <th id="byte3" style="text-align:left">Byte 3</th>
 127  *       <td style="text-align:center">1
 128  *       <td style="text-align:center">0
 129  *       <td colspan="6" style="text-align:center">bits 5-0
 130  *     </tr>
 131  *     </tbody>
 132  *   </table>
 133  * </blockquote>
 134  * <p>
 135  * The differences between this format and the
 136  * standard UTF-8 format are the following:
 137  * <ul>
 138  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
 139  *     rather than 1-byte, so that the encoded strings never have
 140  *     embedded nulls.
 141  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
 142  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
 143  *     are represented in the form of surrogate pairs.
 144  * </ul>
 145  * @author  Frank Yellin
 146  * @see     java.io.DataInputStream
 147  * @see     java.io.DataOutput
 148  * @since   1.0
 149  */
 150 public
 151 interface DataInput {


< prev index next >