1 /*
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.xml.internal.org.jvnet.mimepull;
27
28 import java.io.InputStream;
29 import java.io.IOException;
30 import java.util.*;
31 import java.util.logging.Logger;
32 import java.nio.ByteBuffer;
33
34 /**
35 * Pull parser for the MIME messages. Applications can use pull API to continue
36 * the parsing MIME messages lazily.
37 *
38 * <pre>
39 * for e.g.:
40 * <p>
41 *
42 * MIMEParser parser = ...
43 * Iterator<MIMEEvent> it = parser.iterator();
44 * while(it.hasNext()) {
45 * MIMEEvent event = it.next();
46 * ...
47 * }
48 * </pre>
49 *
50 * @author Jitendra Kotamraju
51 */
52 class MIMEParser implements Iterable<MIMEEvent> {
53
54 private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
55
56 // Actually, the grammar doesn't support whitespace characters
57 // after boundary. But the mail implementation checks for it.
58 // We will only check for these many whitespace characters after boundary
59 private static final int NO_LWSP = 1000;
60 private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
61 private STATE state = STATE.START_MESSAGE;
62
63 private final InputStream in;
64 private final byte[] bndbytes;
65 private final int bl;
66 private final MIMEConfig config;
67 private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
68 private final int[] gss; // BnM algo : Good Suffix Shift table
69
70 /**
71 * Have we parsed the data from our InputStream yet?
72 */
73 private boolean parsed;
74
75 /*
89 */
90 MIMEParser(InputStream in, String boundary, MIMEConfig config) {
91 this.in = in;
92 this.bndbytes = getBytes("--"+boundary);
93 bl = bndbytes.length;
94 this.config = config;
95 gss = new int[bl];
96 compileBoundaryPattern();
97
98 // \r\n + boundary + "--\r\n" + lots of LWSP
99 capacity = config.chunkSize+2+bl+4+NO_LWSP;
100 createBuf(capacity);
101 }
102
103 /**
104 * Returns iterator for the parsing events. Use the iterator to advance
105 * the parsing.
106 *
107 * @return iterator for parsing events
108 */
109 public Iterator<MIMEEvent> iterator() {
110 return new MIMEEventIterator();
111 }
112
113 class MIMEEventIterator implements Iterator<MIMEEvent> {
114
115 public boolean hasNext() {
116 return !parsed;
117 }
118
119 public MIMEEvent next() {
120 switch(state) {
121 case START_MESSAGE :
122 LOGGER.finer("MIMEParser state="+STATE.START_MESSAGE);
123 state = STATE.SKIP_PREAMBLE;
124 return MIMEEvent.START_MESSAGE;
125
126 case SKIP_PREAMBLE :
127 LOGGER.finer("MIMEParser state="+STATE.SKIP_PREAMBLE);
128 skipPreamble();
129 // fall through
130 case START_PART :
131 LOGGER.finer("MIMEParser state="+STATE.START_PART);
132 state = STATE.HEADERS;
133 return MIMEEvent.START_PART;
134
135 case HEADERS :
136 LOGGER.finer("MIMEParser state="+STATE.HEADERS);
137 InternetHeaders ih = readHeaders();
138 state = STATE.BODY;
139 bol = true;
140 return new MIMEEvent.Headers(ih);
141
142 case BODY :
143 LOGGER.finer("MIMEParser state="+STATE.BODY);
144 ByteBuffer buf = readBody();
145 bol = false;
146 return new MIMEEvent.Content(buf);
147
148 case END_PART :
149 LOGGER.finer("MIMEParser state="+STATE.END_PART);
150 if (done) {
151 state = STATE.END_MESSAGE;
152 } else {
153 state = STATE.START_PART;
154 }
155 return MIMEEvent.END_PART;
156
157 case END_MESSAGE :
158 LOGGER.finer("MIMEParser state="+STATE.END_MESSAGE);
159 parsed = true;
160 return MIMEEvent.END_MESSAGE;
161
162 default :
163 throw new MIMEParsingException("Unknown Parser state = "+state);
164 }
165 }
166
167 public void remove() {
168 throw new UnsupportedOperationException();
169 }
170 }
171
172 /**
173 * Collects the headers for the current part by parsing mesage stream.
174 *
175 * @return headers for the current part
176 */
177 private InternetHeaders readHeaders() {
178 if (!eof) {
179 fillBuf();
180 }
181 return new InternetHeaders(new LineInputStream());
182 }
183
184 /**
185 * Reads and saves the part of the current attachment part's content.
186 * At the end of this method, buf should have the remaining data
299 adjustBuf(start, len-start);
300 continue;
301 }
302 // Consider all the whitespace boundary+whitespace+"\r\n"
303 int lwsp = 0;
304 for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
305 ++lwsp;
306 }
307 // Check for \n or \r\n
308 if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
309 if (buf[start+bl+lwsp] == '\n') {
310 adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
311 break;
312 } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
313 adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
314 break;
315 }
316 }
317 adjustBuf(start+1, len-start-1);
318 }
319 LOGGER.fine("Skipped the preamble. buffer len="+len);
320 }
321
322 private static byte[] getBytes(String s) {
323 char [] chars= s.toCharArray();
324 int size = chars.length;
325 byte[] bytes = new byte[size];
326
327 for (int i = 0; i < size;)
328 bytes[i] = (byte) chars[i++];
329 return bytes;
330 }
331
332 /**
333 * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
334 *
335 * Pre calculates arrays needed to generate the bad character
336 * shift and the good suffix shift. Only the last seven bits
337 * are used to see if chars match; This keeps the tables small
338 * and covers the heavily used ASCII range, but occasionally
339 * results in an aliased match for the bad character shift.
340 */
341 private void compileBoundaryPattern() {
342 int i, j;
343
344 // Precalculate part of the bad character shift
345 // It is a table for where in the pattern each
346 // lower 7-bit value occurs
347 for (i = 0; i < bndbytes.length; i++) {
348 bcs[bndbytes[i]&0x7F] = i + 1;
392 // Loop over pattern from right to left
393 for (int j = bndbytes.length - 1; j >= 0; j--) {
394 byte ch = mybuf[off+j];
395 if (ch != bndbytes[j]) {
396 // Shift search to the right by the maximum of the
397 // bad character shift and the good suffix shift
398 off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
399 continue NEXT;
400 }
401 }
402 // Entire pattern matched starting at off
403 return off;
404 }
405 return -1;
406 }
407
408 /**
409 * Fills the remaining buf to the full capacity
410 */
411 private void fillBuf() {
412 LOGGER.finer("Before fillBuf() buffer len="+len);
413 assert !eof;
414 while(len < buf.length) {
415 int read;
416 try {
417 read = in.read(buf, len, buf.length-len);
418 } catch(IOException ioe) {
419 throw new MIMEParsingException(ioe);
420 }
421 if (read == -1) {
422 eof = true;
423 try {
424 LOGGER.fine("Closing the input stream.");
425 in.close();
426 } catch(IOException ioe) {
427 throw new MIMEParsingException(ioe);
428 }
429 break;
430 } else {
431 len += read;
432 }
433 }
434 LOGGER.finer("After fillBuf() buffer len="+len);
435 }
436
437 private void doubleBuf() {
438 byte[] temp = new byte[2*len];
439 System.arraycopy(buf, 0, temp, 0, len);
440 buf = temp;
441 if (!eof) {
442 fillBuf();
443 }
444 }
445
446 class LineInputStream {
447 private int offset;
448
449 /*
450 * Read a line containing only ASCII characters from the input
451 * stream. A line is terminated by a CR or NL or CR-NL sequence.
452 * A common error is a CR-CR-NL sequence, which will also terminate
453 * a line.
454 * The line terminator is not returned as part of the returned
467 break;
468 }
469 if (offset+hdrLen+1 == len) {
470 doubleBuf();
471 }
472 if (offset+hdrLen+1 >= len) { // No more data in the stream
473 assert eof;
474 return null;
475 }
476 if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
477 lwsp = 2;
478 break;
479 }
480 ++hdrLen;
481 }
482 if (hdrLen == 0) {
483 adjustBuf(offset+lwsp, len-offset-lwsp);
484 return null;
485 }
486
487 String hdr = new String(buf, offset, hdrLen);
488 offset += hdrLen+lwsp;
489 return hdr;
490 }
491
492 }
493
494 }
|
1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.xml.internal.org.jvnet.mimepull;
27
28 import java.io.InputStream;
29 import java.io.IOException;
30 import java.util.*;
31 import java.util.logging.Logger;
32 import java.nio.ByteBuffer;
33 import java.util.logging.Level;
34
35 /**
36 * Pull parser for the MIME messages. Applications can use pull API to continue
37 * the parsing MIME messages lazily.
38 *
39 * <pre>
40 * for e.g.:
41 * <p>
42 *
43 * MIMEParser parser = ...
44 * Iterator<MIMEEvent> it = parser.iterator();
45 * while(it.hasNext()) {
46 * MIMEEvent event = it.next();
47 * ...
48 * }
49 * </pre>
50 *
51 * @author Jitendra Kotamraju
52 */
53 class MIMEParser implements Iterable<MIMEEvent> {
54
55 private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
56
57 private static final String HEADER_ENCODING = "ISO8859-1";
58
59 // Actually, the grammar doesn't support whitespace characters
60 // after boundary. But the mail implementation checks for it.
61 // We will only check for these many whitespace characters after boundary
62 private static final int NO_LWSP = 1000;
63 private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
64 private STATE state = STATE.START_MESSAGE;
65
66 private final InputStream in;
67 private final byte[] bndbytes;
68 private final int bl;
69 private final MIMEConfig config;
70 private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
71 private final int[] gss; // BnM algo : Good Suffix Shift table
72
73 /**
74 * Have we parsed the data from our InputStream yet?
75 */
76 private boolean parsed;
77
78 /*
92 */
93 MIMEParser(InputStream in, String boundary, MIMEConfig config) {
94 this.in = in;
95 this.bndbytes = getBytes("--"+boundary);
96 bl = bndbytes.length;
97 this.config = config;
98 gss = new int[bl];
99 compileBoundaryPattern();
100
101 // \r\n + boundary + "--\r\n" + lots of LWSP
102 capacity = config.chunkSize+2+bl+4+NO_LWSP;
103 createBuf(capacity);
104 }
105
106 /**
107 * Returns iterator for the parsing events. Use the iterator to advance
108 * the parsing.
109 *
110 * @return iterator for parsing events
111 */
112 @Override
113 public Iterator<MIMEEvent> iterator() {
114 return new MIMEEventIterator();
115 }
116
117 class MIMEEventIterator implements Iterator<MIMEEvent> {
118
119 @Override
120 public boolean hasNext() {
121 return !parsed;
122 }
123
124 @Override
125 public MIMEEvent next() {
126 switch(state) {
127 case START_MESSAGE :
128 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_MESSAGE);}
129 state = STATE.SKIP_PREAMBLE;
130 return MIMEEvent.START_MESSAGE;
131
132 case SKIP_PREAMBLE :
133 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.SKIP_PREAMBLE);}
134 skipPreamble();
135 // fall through
136 case START_PART :
137 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_PART);}
138 state = STATE.HEADERS;
139 return MIMEEvent.START_PART;
140
141 case HEADERS :
142 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.HEADERS);}
143 InternetHeaders ih = readHeaders();
144 state = STATE.BODY;
145 bol = true;
146 return new MIMEEvent.Headers(ih);
147
148 case BODY :
149 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.BODY);}
150 ByteBuffer buf = readBody();
151 bol = false;
152 return new MIMEEvent.Content(buf);
153
154 case END_PART :
155 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_PART);}
156 if (done) {
157 state = STATE.END_MESSAGE;
158 } else {
159 state = STATE.START_PART;
160 }
161 return MIMEEvent.END_PART;
162
163 case END_MESSAGE :
164 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_MESSAGE);}
165 parsed = true;
166 return MIMEEvent.END_MESSAGE;
167
168 default :
169 throw new MIMEParsingException("Unknown Parser state = "+state);
170 }
171 }
172
173 @Override
174 public void remove() {
175 throw new UnsupportedOperationException();
176 }
177 }
178
179 /**
180 * Collects the headers for the current part by parsing mesage stream.
181 *
182 * @return headers for the current part
183 */
184 private InternetHeaders readHeaders() {
185 if (!eof) {
186 fillBuf();
187 }
188 return new InternetHeaders(new LineInputStream());
189 }
190
191 /**
192 * Reads and saves the part of the current attachment part's content.
193 * At the end of this method, buf should have the remaining data
306 adjustBuf(start, len-start);
307 continue;
308 }
309 // Consider all the whitespace boundary+whitespace+"\r\n"
310 int lwsp = 0;
311 for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
312 ++lwsp;
313 }
314 // Check for \n or \r\n
315 if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
316 if (buf[start+bl+lwsp] == '\n') {
317 adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
318 break;
319 } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
320 adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
321 break;
322 }
323 }
324 adjustBuf(start+1, len-start-1);
325 }
326 if (LOGGER.isLoggable(Level.FINE)) {LOGGER.log(Level.FINE, "Skipped the preamble. buffer len={0}", len);}
327 }
328
329 private static byte[] getBytes(String s) {
330 char [] chars= s.toCharArray();
331 int size = chars.length;
332 byte[] bytes = new byte[size];
333
334 for (int i = 0; i < size;) {
335 bytes[i] = (byte) chars[i++];
336 }
337 return bytes;
338 }
339
340 /**
341 * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
342 *
343 * Pre calculates arrays needed to generate the bad character
344 * shift and the good suffix shift. Only the last seven bits
345 * are used to see if chars match; This keeps the tables small
346 * and covers the heavily used ASCII range, but occasionally
347 * results in an aliased match for the bad character shift.
348 */
349 private void compileBoundaryPattern() {
350 int i, j;
351
352 // Precalculate part of the bad character shift
353 // It is a table for where in the pattern each
354 // lower 7-bit value occurs
355 for (i = 0; i < bndbytes.length; i++) {
356 bcs[bndbytes[i]&0x7F] = i + 1;
400 // Loop over pattern from right to left
401 for (int j = bndbytes.length - 1; j >= 0; j--) {
402 byte ch = mybuf[off+j];
403 if (ch != bndbytes[j]) {
404 // Shift search to the right by the maximum of the
405 // bad character shift and the good suffix shift
406 off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
407 continue NEXT;
408 }
409 }
410 // Entire pattern matched starting at off
411 return off;
412 }
413 return -1;
414 }
415
416 /**
417 * Fills the remaining buf to the full capacity
418 */
419 private void fillBuf() {
420 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "Before fillBuf() buffer len={0}", len);}
421 assert !eof;
422 while(len < buf.length) {
423 int read;
424 try {
425 read = in.read(buf, len, buf.length-len);
426 } catch(IOException ioe) {
427 throw new MIMEParsingException(ioe);
428 }
429 if (read == -1) {
430 eof = true;
431 try {
432 if (LOGGER.isLoggable(Level.FINE)) {LOGGER.fine("Closing the input stream.");}
433 in.close();
434 } catch(IOException ioe) {
435 throw new MIMEParsingException(ioe);
436 }
437 break;
438 } else {
439 len += read;
440 }
441 }
442 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "After fillBuf() buffer len={0}", len);}
443 }
444
445 private void doubleBuf() {
446 byte[] temp = new byte[2*len];
447 System.arraycopy(buf, 0, temp, 0, len);
448 buf = temp;
449 if (!eof) {
450 fillBuf();
451 }
452 }
453
454 class LineInputStream {
455 private int offset;
456
457 /*
458 * Read a line containing only ASCII characters from the input
459 * stream. A line is terminated by a CR or NL or CR-NL sequence.
460 * A common error is a CR-CR-NL sequence, which will also terminate
461 * a line.
462 * The line terminator is not returned as part of the returned
475 break;
476 }
477 if (offset+hdrLen+1 == len) {
478 doubleBuf();
479 }
480 if (offset+hdrLen+1 >= len) { // No more data in the stream
481 assert eof;
482 return null;
483 }
484 if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
485 lwsp = 2;
486 break;
487 }
488 ++hdrLen;
489 }
490 if (hdrLen == 0) {
491 adjustBuf(offset+lwsp, len-offset-lwsp);
492 return null;
493 }
494
495 String hdr = new String(buf, offset, hdrLen, HEADER_ENCODING);
496 offset += hdrLen+lwsp;
497 return hdr;
498 }
499
500 }
501
502 }
|