src/share/jaxws_classes/com/sun/xml/internal/org/jvnet/mimepull/MIMEParser.java

Print this page


   1 /*
   2  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.org.jvnet.mimepull;
  27 
  28 import java.io.InputStream;
  29 import java.io.IOException;
  30 import java.util.*;
  31 import java.util.logging.Logger;
  32 import java.nio.ByteBuffer;

  33 
  34 /**
  35  * Pull parser for the MIME messages. Applications can use pull API to continue
  36  * the parsing MIME messages lazily.
  37  *
  38  * <pre>
  39  * for e.g.:
  40  * <p>
  41  *
  42  * MIMEParser parser = ...
  43  * Iterator<MIMEEvent> it = parser.iterator();
  44  * while(it.hasNext()) {
  45  *   MIMEEvent event = it.next();
  46  *   ...
  47  * }
  48  * </pre>
  49  *
  50  * @author Jitendra Kotamraju
  51  */
  52 class MIMEParser implements Iterable<MIMEEvent> {
  53 
  54     private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
  55 


  56     // Actually, the grammar doesn't support whitespace characters
  57     // after boundary. But the mail implementation checks for it.
  58     // We will only check for these many whitespace characters after boundary
  59     private static final int NO_LWSP = 1000;
  60     private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
  61     private STATE state = STATE.START_MESSAGE;
  62 
  63     private final InputStream in;
  64     private final byte[] bndbytes;
  65     private final int bl;
  66     private final MIMEConfig config;
  67     private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
  68     private final int[] gss;                // BnM algo : Good Suffix Shift table
  69 
  70     /**
  71      * Have we parsed the data from our InputStream yet?
  72      */
  73     private boolean parsed;
  74 
  75     /*


  89      */
  90     MIMEParser(InputStream in, String boundary, MIMEConfig config) {
  91         this.in = in;
  92         this.bndbytes = getBytes("--"+boundary);
  93         bl = bndbytes.length;
  94         this.config = config;
  95         gss = new int[bl];
  96         compileBoundaryPattern();
  97 
  98         // \r\n + boundary + "--\r\n" + lots of LWSP
  99         capacity = config.chunkSize+2+bl+4+NO_LWSP;
 100         createBuf(capacity);
 101     }
 102 
 103     /**
 104      * Returns iterator for the parsing events. Use the iterator to advance
 105      * the parsing.
 106      *
 107      * @return iterator for parsing events
 108      */

 109     public Iterator<MIMEEvent> iterator() {
 110         return new MIMEEventIterator();
 111     }
 112 
 113     class MIMEEventIterator implements Iterator<MIMEEvent> {
 114 

 115         public boolean hasNext() {
 116             return !parsed;
 117         }
 118 

 119         public MIMEEvent next() {
 120             switch(state) {
 121                 case START_MESSAGE :
 122                     LOGGER.finer("MIMEParser state="+STATE.START_MESSAGE);
 123                     state = STATE.SKIP_PREAMBLE;
 124                     return MIMEEvent.START_MESSAGE;
 125 
 126                 case SKIP_PREAMBLE :
 127                     LOGGER.finer("MIMEParser state="+STATE.SKIP_PREAMBLE);
 128                     skipPreamble();
 129                     // fall through
 130                 case START_PART :
 131                     LOGGER.finer("MIMEParser state="+STATE.START_PART);
 132                     state = STATE.HEADERS;
 133                     return MIMEEvent.START_PART;
 134 
 135                 case HEADERS :
 136                     LOGGER.finer("MIMEParser state="+STATE.HEADERS);
 137                     InternetHeaders ih = readHeaders();
 138                     state = STATE.BODY;
 139                     bol = true;
 140                     return new MIMEEvent.Headers(ih);
 141 
 142                 case BODY :
 143                     LOGGER.finer("MIMEParser state="+STATE.BODY);
 144                     ByteBuffer buf = readBody();
 145                     bol = false;
 146                     return new MIMEEvent.Content(buf);
 147 
 148                 case END_PART :
 149                     LOGGER.finer("MIMEParser state="+STATE.END_PART);
 150                     if (done) {
 151                         state = STATE.END_MESSAGE;
 152                     } else {
 153                         state = STATE.START_PART;
 154                     }
 155                     return MIMEEvent.END_PART;
 156 
 157                 case END_MESSAGE :
 158                     LOGGER.finer("MIMEParser state="+STATE.END_MESSAGE);
 159                     parsed = true;
 160                     return MIMEEvent.END_MESSAGE;
 161 
 162                 default :
 163                     throw new MIMEParsingException("Unknown Parser state = "+state);
 164             }
 165         }
 166 

 167         public void remove() {
 168             throw new UnsupportedOperationException();
 169         }
 170     }
 171 
 172     /**
 173      * Collects the headers for the current part by parsing mesage stream.
 174      *
 175      * @return headers for the current part
 176      */
 177     private InternetHeaders readHeaders() {
 178         if (!eof) {
 179             fillBuf();
 180         }
 181         return new InternetHeaders(new LineInputStream());
 182     }
 183 
 184     /**
 185      * Reads and saves the part of the current attachment part's content.
 186      * At the end of this method, buf should have the remaining data


 299                 adjustBuf(start, len-start);
 300                 continue;
 301             }
 302             // Consider all the whitespace boundary+whitespace+"\r\n"
 303             int lwsp = 0;
 304             for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
 305                 ++lwsp;
 306             }
 307             // Check for \n or \r\n
 308             if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
 309                 if (buf[start+bl+lwsp] == '\n') {
 310                     adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
 311                     break;
 312                 } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
 313                     adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
 314                     break;
 315                 }
 316             }
 317             adjustBuf(start+1, len-start-1);
 318         }
 319         LOGGER.fine("Skipped the preamble. buffer len="+len);
 320     }
 321 
 322     private static byte[] getBytes(String s) {
 323         char [] chars= s.toCharArray();
 324         int size = chars.length;
 325         byte[] bytes = new byte[size];
 326 
 327         for (int i = 0; i < size;)
 328             bytes[i] = (byte) chars[i++];

 329         return bytes;
 330     }
 331 
 332         /**
 333      * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
 334      *
 335      * Pre calculates arrays needed to generate the bad character
 336      * shift and the good suffix shift. Only the last seven bits
 337      * are used to see if chars match; This keeps the tables small
 338      * and covers the heavily used ASCII range, but occasionally
 339      * results in an aliased match for the bad character shift.
 340      */
 341     private void compileBoundaryPattern() {
 342         int i, j;
 343 
 344         // Precalculate part of the bad character shift
 345         // It is a table for where in the pattern each
 346         // lower 7-bit value occurs
 347         for (i = 0; i < bndbytes.length; i++) {
 348             bcs[bndbytes[i]&0x7F] = i + 1;


 392             // Loop over pattern from right to left
 393             for (int j = bndbytes.length - 1; j >= 0; j--) {
 394                 byte ch = mybuf[off+j];
 395                 if (ch != bndbytes[j]) {
 396                     // Shift search to the right by the maximum of the
 397                     // bad character shift and the good suffix shift
 398                     off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
 399                     continue NEXT;
 400                 }
 401             }
 402             // Entire pattern matched starting at off
 403             return off;
 404         }
 405         return -1;
 406     }
 407 
 408     /**
 409      * Fills the remaining buf to the full capacity
 410      */
 411     private void fillBuf() {
 412         LOGGER.finer("Before fillBuf() buffer len="+len);
 413         assert !eof;
 414         while(len < buf.length) {
 415             int read;
 416             try {
 417                 read = in.read(buf, len, buf.length-len);
 418             } catch(IOException ioe) {
 419                 throw new MIMEParsingException(ioe);
 420             }
 421             if (read == -1) {
 422                 eof = true;
 423                 try {
 424                     LOGGER.fine("Closing the input stream.");
 425                     in.close();
 426                 } catch(IOException ioe) {
 427                     throw new MIMEParsingException(ioe);
 428                 }
 429                 break;
 430             } else {
 431                 len += read;
 432             }
 433         }
 434         LOGGER.finer("After fillBuf() buffer len="+len);
 435     }
 436 
 437     private void doubleBuf() {
 438         byte[] temp = new byte[2*len];
 439         System.arraycopy(buf, 0, temp, 0, len);
 440         buf = temp;
 441         if (!eof) {
 442             fillBuf();
 443         }
 444     }
 445 
 446     class LineInputStream {
 447         private int offset;
 448 
 449         /*
 450          * Read a line containing only ASCII characters from the input
 451          * stream. A line is terminated by a CR or NL or CR-NL sequence.
 452          * A common error is a CR-CR-NL sequence, which will also terminate
 453          * a line.
 454          * The line terminator is not returned as part of the returned


 467                     break;
 468                 }
 469                 if (offset+hdrLen+1 == len) {
 470                     doubleBuf();
 471                 }
 472                 if (offset+hdrLen+1 >= len) {   // No more data in the stream
 473                     assert eof;
 474                     return null;
 475                 }
 476                 if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
 477                     lwsp = 2;
 478                     break;
 479                 }
 480                 ++hdrLen;
 481             }
 482             if (hdrLen == 0) {
 483                 adjustBuf(offset+lwsp, len-offset-lwsp);
 484                 return null;
 485             }
 486 
 487             String hdr = new String(buf, offset, hdrLen);
 488             offset += hdrLen+lwsp;
 489             return hdr;
 490         }
 491 
 492     }
 493 
 494 }
   1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.org.jvnet.mimepull;
  27 
  28 import java.io.InputStream;
  29 import java.io.IOException;
  30 import java.util.*;
  31 import java.util.logging.Logger;
  32 import java.nio.ByteBuffer;
  33 import java.util.logging.Level;
  34 
  35 /**
  36  * Pull parser for the MIME messages. Applications can use pull API to continue
  37  * the parsing MIME messages lazily.
  38  *
  39  * <pre>
  40  * for e.g.:
  41  * <p>
  42  *
  43  * MIMEParser parser = ...
  44  * Iterator<MIMEEvent> it = parser.iterator();
  45  * while(it.hasNext()) {
  46  *   MIMEEvent event = it.next();
  47  *   ...
  48  * }
  49  * </pre>
  50  *
  51  * @author Jitendra Kotamraju
  52  */
  53 class MIMEParser implements Iterable<MIMEEvent> {
  54 
  55     private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
  56 
  57     private static final String HEADER_ENCODING = "ISO8859-1";
  58 
  59     // Actually, the grammar doesn't support whitespace characters
  60     // after boundary. But the mail implementation checks for it.
  61     // We will only check for these many whitespace characters after boundary
  62     private static final int NO_LWSP = 1000;
  63     private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
  64     private STATE state = STATE.START_MESSAGE;
  65 
  66     private final InputStream in;
  67     private final byte[] bndbytes;
  68     private final int bl;
  69     private final MIMEConfig config;
  70     private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
  71     private final int[] gss;                // BnM algo : Good Suffix Shift table
  72 
  73     /**
  74      * Have we parsed the data from our InputStream yet?
  75      */
  76     private boolean parsed;
  77 
  78     /*


  92      */
  93     MIMEParser(InputStream in, String boundary, MIMEConfig config) {
  94         this.in = in;
  95         this.bndbytes = getBytes("--"+boundary);
  96         bl = bndbytes.length;
  97         this.config = config;
  98         gss = new int[bl];
  99         compileBoundaryPattern();
 100 
 101         // \r\n + boundary + "--\r\n" + lots of LWSP
 102         capacity = config.chunkSize+2+bl+4+NO_LWSP;
 103         createBuf(capacity);
 104     }
 105 
 106     /**
 107      * Returns iterator for the parsing events. Use the iterator to advance
 108      * the parsing.
 109      *
 110      * @return iterator for parsing events
 111      */
 112     @Override
 113     public Iterator<MIMEEvent> iterator() {
 114         return new MIMEEventIterator();
 115     }
 116 
 117     class MIMEEventIterator implements Iterator<MIMEEvent> {
 118 
 119         @Override
 120         public boolean hasNext() {
 121             return !parsed;
 122         }
 123 
 124         @Override
 125         public MIMEEvent next() {
 126             switch(state) {
 127                 case START_MESSAGE :
 128                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_MESSAGE);}
 129                     state = STATE.SKIP_PREAMBLE;
 130                     return MIMEEvent.START_MESSAGE;
 131 
 132                 case SKIP_PREAMBLE :
 133                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.SKIP_PREAMBLE);}
 134                     skipPreamble();
 135                     // fall through
 136                 case START_PART :
 137                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_PART);}
 138                     state = STATE.HEADERS;
 139                     return MIMEEvent.START_PART;
 140 
 141                 case HEADERS :
 142                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.HEADERS);}
 143                     InternetHeaders ih = readHeaders();
 144                     state = STATE.BODY;
 145                     bol = true;
 146                     return new MIMEEvent.Headers(ih);
 147 
 148                 case BODY :
 149                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.BODY);}
 150                     ByteBuffer buf = readBody();
 151                     bol = false;
 152                     return new MIMEEvent.Content(buf);
 153 
 154                 case END_PART :
 155                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_PART);}
 156                     if (done) {
 157                         state = STATE.END_MESSAGE;
 158                     } else {
 159                         state = STATE.START_PART;
 160                     }
 161                     return MIMEEvent.END_PART;
 162 
 163                 case END_MESSAGE :
 164                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_MESSAGE);}
 165                     parsed = true;
 166                     return MIMEEvent.END_MESSAGE;
 167 
 168                 default :
 169                     throw new MIMEParsingException("Unknown Parser state = "+state);
 170             }
 171         }
 172 
 173         @Override
 174         public void remove() {
 175             throw new UnsupportedOperationException();
 176         }
 177     }
 178 
 179     /**
 180      * Collects the headers for the current part by parsing mesage stream.
 181      *
 182      * @return headers for the current part
 183      */
 184     private InternetHeaders readHeaders() {
 185         if (!eof) {
 186             fillBuf();
 187         }
 188         return new InternetHeaders(new LineInputStream());
 189     }
 190 
 191     /**
 192      * Reads and saves the part of the current attachment part's content.
 193      * At the end of this method, buf should have the remaining data


 306                 adjustBuf(start, len-start);
 307                 continue;
 308             }
 309             // Consider all the whitespace boundary+whitespace+"\r\n"
 310             int lwsp = 0;
 311             for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
 312                 ++lwsp;
 313             }
 314             // Check for \n or \r\n
 315             if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
 316                 if (buf[start+bl+lwsp] == '\n') {
 317                     adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
 318                     break;
 319                 } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
 320                     adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
 321                     break;
 322                 }
 323             }
 324             adjustBuf(start+1, len-start-1);
 325         }
 326         if (LOGGER.isLoggable(Level.FINE)) {LOGGER.log(Level.FINE, "Skipped the preamble. buffer len={0}", len);}
 327     }
 328 
 329     private static byte[] getBytes(String s) {
 330         char [] chars= s.toCharArray();
 331         int size = chars.length;
 332         byte[] bytes = new byte[size];
 333 
 334         for (int i = 0; i < size;) {
 335             bytes[i] = (byte) chars[i++];
 336         }
 337         return bytes;
 338     }
 339 
 340         /**
 341      * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
 342      *
 343      * Pre calculates arrays needed to generate the bad character
 344      * shift and the good suffix shift. Only the last seven bits
 345      * are used to see if chars match; This keeps the tables small
 346      * and covers the heavily used ASCII range, but occasionally
 347      * results in an aliased match for the bad character shift.
 348      */
 349     private void compileBoundaryPattern() {
 350         int i, j;
 351 
 352         // Precalculate part of the bad character shift
 353         // It is a table for where in the pattern each
 354         // lower 7-bit value occurs
 355         for (i = 0; i < bndbytes.length; i++) {
 356             bcs[bndbytes[i]&0x7F] = i + 1;


 400             // Loop over pattern from right to left
 401             for (int j = bndbytes.length - 1; j >= 0; j--) {
 402                 byte ch = mybuf[off+j];
 403                 if (ch != bndbytes[j]) {
 404                     // Shift search to the right by the maximum of the
 405                     // bad character shift and the good suffix shift
 406                     off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
 407                     continue NEXT;
 408                 }
 409             }
 410             // Entire pattern matched starting at off
 411             return off;
 412         }
 413         return -1;
 414     }
 415 
 416     /**
 417      * Fills the remaining buf to the full capacity
 418      */
 419     private void fillBuf() {
 420         if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "Before fillBuf() buffer len={0}", len);}
 421         assert !eof;
 422         while(len < buf.length) {
 423             int read;
 424             try {
 425                 read = in.read(buf, len, buf.length-len);
 426             } catch(IOException ioe) {
 427                 throw new MIMEParsingException(ioe);
 428             }
 429             if (read == -1) {
 430                 eof = true;
 431                 try {
 432                     if (LOGGER.isLoggable(Level.FINE)) {LOGGER.fine("Closing the input stream.");}
 433                     in.close();
 434                 } catch(IOException ioe) {
 435                     throw new MIMEParsingException(ioe);
 436                 }
 437                 break;
 438             } else {
 439                 len += read;
 440             }
 441         }
 442         if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "After fillBuf() buffer len={0}", len);}
 443     }
 444 
 445     private void doubleBuf() {
 446         byte[] temp = new byte[2*len];
 447         System.arraycopy(buf, 0, temp, 0, len);
 448         buf = temp;
 449         if (!eof) {
 450             fillBuf();
 451         }
 452     }
 453 
 454     class LineInputStream {
 455         private int offset;
 456 
 457         /*
 458          * Read a line containing only ASCII characters from the input
 459          * stream. A line is terminated by a CR or NL or CR-NL sequence.
 460          * A common error is a CR-CR-NL sequence, which will also terminate
 461          * a line.
 462          * The line terminator is not returned as part of the returned


 475                     break;
 476                 }
 477                 if (offset+hdrLen+1 == len) {
 478                     doubleBuf();
 479                 }
 480                 if (offset+hdrLen+1 >= len) {   // No more data in the stream
 481                     assert eof;
 482                     return null;
 483                 }
 484                 if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
 485                     lwsp = 2;
 486                     break;
 487                 }
 488                 ++hdrLen;
 489             }
 490             if (hdrLen == 0) {
 491                 adjustBuf(offset+lwsp, len-offset-lwsp);
 492                 return null;
 493             }
 494 
 495             String hdr = new String(buf, offset, hdrLen, HEADER_ENCODING);
 496             offset += hdrLen+lwsp;
 497             return hdr;
 498         }
 499 
 500     }
 501 
 502 }