1 /*
2 * Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.tools.javac.parser;
27
28 import com.sun.tools.javac.parser.Tokens.Comment;
29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30 import com.sun.tools.javac.util.*;
31
32 import java.nio.*;
33 import java.util.regex.Pattern;
34
35 import static com.sun.tools.javac.util.LayoutCharacters.*;
36
37 /** An extension to the base lexical analyzer that captures
38 * and processes the contents of doc comments. It does so by
39 * translating Unicode escape sequences and by stripping the
40 * leading whitespace and starts from each line of the comment.
41 *
42 * <p><b>This is NOT part of any supported API.
43 * If you write code that depends on this, you do so at your own risk.
44 * This code and its internal interfaces are subject to change or
45 * deletion without notice.</b>
46 */
47 public class JavadocTokenizer extends JavaTokenizer {
48
49 /** Create a scanner from the input buffer. buffer must implement
50 * array() and compact(), and remaining() must be less than limit().
51 */
52 protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
53 super(fac, buffer);
54 }
55
56 /** Create a scanner from the input array. The array must have at
57 * least a single character of extra space.
58 */
59 protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
60 super(fac, input, inputLength);
61 }
62
63 @Override
64 protected Comment processComment(int pos, int endPos, CommentStyle style) {
65 char[] buf = reader.getRawCharacters(pos, endPos);
66 return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
67 }
68
69 /**
70 * This is a specialized version of UnicodeReader that keeps track of the
71 * column position within a given character stream (used for Javadoc processing),
72 * and which builds a table for mapping positions in the comment string to
73 * positions in the source file.
74 */
75 static class DocReader extends UnicodeReader {
76
77 int col;
78 int startPos;
79
80 /**
81 * A buffer for building a table for mapping positions in {@link #sbuf}
82 * to positions in the source buffer.
83 *
84 * The array is organized as a series of pairs of integers: the first
85 * number in each pair specifies a position in the comment text,
86 * the second number in each pair specifies the corresponding position
87 * in the source buffer. The pairs are sorted in ascending order.
88 *
89 * Since the mapping function is generally continuous, with successive
90 * positions in the string corresponding to successive positions in the
91 * source buffer, the table only needs to record discontinuities in
92 * the mapping. The values of intermediate positions can be inferred.
93 *
94 * Discontinuities may occur in a number of places: when a newline
95 * is followed by whitespace and asterisks (which are ignored),
96 * when a tab is expanded into spaces, and when unicode escapes
97 * are used in the source buffer.
98 *
99 * Thus, to find the source position of any position, p, in the comment
100 * string, find the index, i, of the pair whose string offset
101 * ({@code pbuf[i] }) is closest to but not greater than p. Then,
102 * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
103 */
104 int[] pbuf = new int[128];
105
106 /**
107 * The index of the next empty slot in the pbuf buffer.
108 */
109 int pp = 0;
110
111 /** The buffer index of the last double backslash sequence
112 */
113 private int doubleBackslashBp = -1;
114
115 DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
116 super(fac, input, inputLength);
117 this.startPos = startPos;
118 }
119
120 @Override
121 protected void convertUnicode() {
122 if (ch == '\\' && unicodeConversionBp != bp) {
123 bp++; ch = buf[bp]; col++;
124 if (ch == 'u') {
125 do {
126 bp++; ch = buf[bp]; col++;
127 } while (ch == 'u');
128 int limit = bp + 3;
129 if (limit < buflen) {
130 int d = digit(bp, 16);
131 int code = d;
132 while (bp < limit && d >= 0) {
133 bp++; ch = buf[bp]; col++;
134 d = digit(bp, 16);
135 code = (code << 4) + d;
136 }
137 if (d >= 0) {
138 ch = (char)code;
139 unicodeConversionBp = bp;
140 return;
141 }
142 }
143 // "illegal.Unicode.esc", reported by base scanner
144 } else {
145 bp--;
146 ch = '\\';
147 col--;
148 }
149 }
150 }
151
152 @Override
153 protected void scanCommentChar() {
154 scanChar();
155 if (ch == '\\') {
156 if (peekChar() == '\\' && !isUnicode()) {
157 bp++; col++;
158 doubleBackslashBp = bp;
159 } else {
160 convertUnicode();
161 }
162 }
163 }
164
165 @Override
166 protected void scanChar() {
167 bp++;
168 ch = buf[bp];
169 switch (ch) {
170 case '\r': // return
171 col = 0;
172 break;
173 case '\n': // newline
174 if (bp == 0 || buf[bp-1] != '\r') {
175 col = 0;
176 }
177 break;
178 case '\t': // tab
179 col = (col / TabInc * TabInc) + TabInc;
180 break;
181 case '\\': // possible Unicode
182 col++;
183 convertUnicode();
184 break;
185 default:
186 col++;
187 break;
188 }
189 }
190
191 @Override
192 public void putChar(char ch, boolean scan) {
193 // At this point, bp is the position of the current character in buf,
194 // and sp is the position in sbuf where this character will be put.
195 // Record a new entry in pbuf if pbuf is empty or if sp and its
196 // corresponding source position are not equidistant from the
197 // corresponding values in the latest entry in the pbuf array.
198 // (i.e. there is a discontinuity in the map function.)
199 if ((pp == 0)
200 || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
201 if (pp + 1 >= pbuf.length) {
202 int[] new_pbuf = new int[pbuf.length * 2];
203 System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
204 pbuf = new_pbuf;
205 }
206 pbuf[pp] = sp;
207 pbuf[pp + 1] = startPos + bp;
208 pp += 2;
209 }
210 super.putChar(ch, scan);
211 }
212
213 /** Whether the ch represents a sequence of two backslashes. */
214 boolean isDoubleBackslash() {
215 return doubleBackslashBp == bp;
216 }
217
218
219 }
220
221 protected static class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {
222
223 /**
224 * Translated and stripped contents of doc comment
225 */
226 private String docComment = null;
227 private int[] docPosns = null;
228
229 JavadocComment(DocReader reader, CommentStyle cs) {
230 super(reader, cs);
231 }
232
233 @Override
234 public String getText() {
235 if (!scanned && cs == CommentStyle.JAVADOC) {
236 scanDocComment();
237 }
238 return docComment;
239 }
240
241 @Override
242 public int getSourcePos(int pos) {
243 // Binary search to find the entry for which the string index is
244 // less than pos. Since docPosns is a list of pairs of integers
245 // we must make sure the index is always even.
246 // If we find an exact match for pos, the other item in the pair
247 // gives the source pos; otherwise, compute the source position
248 // relative to the best match found in the array.
249 if (pos == Position.NOPOS)
250 return Position.NOPOS;
251 if (pos < 0 || pos > docComment.length())
252 throw new StringIndexOutOfBoundsException(String.valueOf(pos));
253 if (docPosns == null)
254 return Position.NOPOS;
255 int start = 0;
256 int end = docPosns.length;
257 while (start < end - 2) {
258 // find an even index midway between start and end
259 int index = ((start + end) / 4) * 2;
260 if (docPosns[index] < pos)
261 start = index;
262 else if (docPosns[index] == pos)
263 return docPosns[index + 1];
264 else
265 end = index;
266 }
267 return docPosns[start + 1] + (pos - docPosns[start]);
268 }
269
270 @Override
271 @SuppressWarnings("fallthrough")
272 protected void scanDocComment() {
273 try {
274 boolean firstLine = true;
275
276 // Skip over first slash
277 comment_reader.scanCommentChar();
278 // Skip over first star
279 comment_reader.scanCommentChar();
280
281 // consume any number of stars
282 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
283 comment_reader.scanCommentChar();
284 }
285 // is the comment in the form /**/, /***/, /****/, etc. ?
286 if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
287 docComment = "";
288 return;
289 }
290
291 // skip a newline on the first line of the comment.
292 if (comment_reader.bp < comment_reader.buflen) {
293 if (comment_reader.ch == LF) {
294 comment_reader.scanCommentChar();
295 firstLine = false;
296 } else if (comment_reader.ch == CR) {
297 comment_reader.scanCommentChar();
298 if (comment_reader.ch == LF) {
299 comment_reader.scanCommentChar();
300 firstLine = false;
301 }
302 }
303 }
304
305 outerLoop:
306
307 // The outerLoop processes the doc comment, looping once
308 // for each line. For each line, it first strips off
309 // whitespace, then it consumes any stars, then it
310 // puts the rest of the line into our buffer.
311 while (comment_reader.bp < comment_reader.buflen) {
312 int begin_bp = comment_reader.bp;
313 char begin_ch = comment_reader.ch;
314 // The wsLoop consumes whitespace from the beginning
315 // of each line.
316 wsLoop:
317
318 while (comment_reader.bp < comment_reader.buflen) {
319 switch(comment_reader.ch) {
320 case ' ':
321 comment_reader.scanCommentChar();
322 break;
323 case '\t':
324 comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
325 comment_reader.scanCommentChar();
326 break;
327 case FF:
328 comment_reader.col = 0;
329 comment_reader.scanCommentChar();
330 break;
331 // Treat newline at beginning of line (blank line, no star)
332 // as comment text. Old Javadoc compatibility requires this.
333 /*---------------------------------*
334 case CR: // (Spec 3.4)
335 doc_reader.scanCommentChar();
336 if (ch == LF) {
337 col = 0;
338 doc_reader.scanCommentChar();
339 }
340 break;
341 case LF: // (Spec 3.4)
342 doc_reader.scanCommentChar();
343 break;
344 *---------------------------------*/
345 default:
346 // we've seen something that isn't whitespace;
347 // jump out.
348 break wsLoop;
349 }
350 }
351
352 // Are there stars here? If so, consume them all
353 // and check for the end of comment.
354 if (comment_reader.ch == '*') {
355 // skip all of the stars
356 do {
357 comment_reader.scanCommentChar();
358 } while (comment_reader.ch == '*');
359
360 // check for the closing slash.
361 if (comment_reader.ch == '/') {
362 // We're done with the doc comment
363 // scanChar() and breakout.
364 break outerLoop;
365 }
366 } else if (! firstLine) {
367 // The current line does not begin with a '*' so we will
368 // treat it as comment
369 comment_reader.bp = begin_bp;
370 comment_reader.ch = begin_ch;
371 }
372 // The textLoop processes the rest of the characters
373 // on the line, adding them to our buffer.
374 textLoop:
375 while (comment_reader.bp < comment_reader.buflen) {
376 switch (comment_reader.ch) {
377 case '*':
378 // Is this just a star? Or is this the
379 // end of a comment?
380 comment_reader.scanCommentChar();
381 if (comment_reader.ch == '/') {
382 // This is the end of the comment,
383 // set ch and return our buffer.
384 break outerLoop;
385 }
386 // This is just an ordinary star. Add it to
387 // the buffer.
388 comment_reader.putChar('*', false);
389 break;
390 case '\\':
391 comment_reader.putChar('\\', false);
392 // If a double backslash was found, write two
393 if (comment_reader.isDoubleBackslash()) {
394 comment_reader.putChar('\\', false);
395 }
396 comment_reader.scanCommentChar();
397 break;
398 case ' ':
399 case '\t':
400 comment_reader.putChar(comment_reader.ch, false);
401 comment_reader.scanCommentChar();
402 break;
403 case FF:
404 comment_reader.scanCommentChar();
405 break textLoop; // treat as end of line
406 case CR: // (Spec 3.4)
407 comment_reader.scanCommentChar();
408 if (comment_reader.ch != LF) {
409 // Canonicalize CR-only line terminator to LF
410 comment_reader.putChar((char)LF, false);
411 break textLoop;
412 }
413 /* fall through to LF case */
414 case LF: // (Spec 3.4)
415 // We've seen a newline. Add it to our
416 // buffer and break out of this loop,
417 // starting fresh on a new line.
418 comment_reader.putChar(comment_reader.ch, false);
419 comment_reader.scanCommentChar();
420 break textLoop;
421 default:
422 // Add the character to our buffer.
423 comment_reader.putChar(comment_reader.ch, false);
424 comment_reader.scanCommentChar();
425 }
426 } // end textLoop
427 firstLine = false;
428 } // end outerLoop
429
430 if (comment_reader.sp > 0) {
431 int i = comment_reader.sp - 1;
432 trailLoop:
433 while (i > -1) {
434 switch (comment_reader.sbuf[i]) {
435 case '*':
436 i--;
437 break;
438 default:
439 break trailLoop;
440 }
441 }
442 comment_reader.sp = i + 1;
443
444 // Store the text of the doc comment
445 docComment = comment_reader.chars();
446 docPosns = new int[comment_reader.pp];
447 System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);
448 } else {
449 docComment = "";
450 }
451 } finally {
452 scanned = true;
453 comment_reader = null;
454 if (docComment != null &&
455 DEPRECATED_PATTERN.matcher(docComment).matches()) {
456 deprecatedFlag = true;
457 }
458 }
459 }
460 //where:
461 private static final Pattern DEPRECATED_PATTERN =
462 Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
463
464 }
465
466 @Override
467 public Position.LineMap getLineMap() {
468 char[] buf = reader.getRawCharacters();
469 return Position.makeLineMap(buf, buf.length, true);
470 }
471 }
|
1 /*
2 * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.tools.javac.parser;
27
28 import com.sun.tools.javac.parser.Tokens.Comment;
29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30 import com.sun.tools.javac.util.*;
31
32 import java.nio.CharBuffer;
33 import java.util.Arrays;
34 import java.util.regex.Pattern;
35
36 /**
37 * An extension to the base lexical analyzer (JavaTokenizer) that
38 * captures and processes the contents of doc comments. It does
39 * so by stripping the leading whitespace and comment starts from
40 * each line of the Javadoc comment.
41 *
42 * <p><b>This is NOT part of any supported API.
43 * If you write code that depends on this, you do so at your own risk.
44 * This code and its internal interfaces are subject to change or
45 * deletion without notice.</b>
46 */
47 public class JavadocTokenizer extends JavaTokenizer {
48 /**
49 * The factory that created this Scanner.
50 */
51 final ScannerFactory fac;
52
53 /**
54 * Create a tokenizer from the input character buffer. The input buffer
55 * content would typically be a Javadoc comment extracted by
56 * JavaTokenizer.
57 *
58 * @param fac the factory which created this Scanner.
59 * @param cb the input character buffer.
60 */
61 protected JavadocTokenizer(ScannerFactory fac, CharBuffer cb) {
62 super(fac, cb);
63 this.fac = fac;
64 }
65
66 /**
67 * Create a tokenizer from the input array. The input buffer
68 * content would typically be a Javadoc comment extracted by
69 * JavaTokenizer.
70 *
71 * @param fac factory which created this Scanner
72 * @param array input character array.
73 * @param length length of the meaningful content in the array.
74 */
75 protected JavadocTokenizer(ScannerFactory fac, char[] array, int length) {
76 super(fac, array, length);
77 this.fac = fac;
78 }
79
80 @Override
81 protected Comment processComment(int pos, int endPos, CommentStyle style) {
82 char[] buf = getRawCharacters(pos, endPos);
83 return new JavadocComment(style, fac, buf, pos);
84 }
85
86 /**
87 * An extension of BasicComment used to extract the relevant portion
88 * of a Javadoc comment.
89 */
90 protected static class JavadocComment extends BasicComment {
91 /**
92 * Pattern used to detect a well formed @deprecated tag in a JaavDoc
93 * comment.
94 */
95 private static final Pattern DEPRECATED_PATTERN =
96 Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
97
98 /**
99 * The relevant portion of the comment that is of interest to Javadoc.
100 * Produced by invoking scanDocComment.
101 */
102 private String docComment = null;
103
104 /**
105 * StringBuilder used to extract the relevant portion of the Javadoc comment.
106 */
107 private StringBuilder sb;
108
109 /**
110 * Map used to map the extracted Javadoc comment's character positions back to
111 * the original source.
112 */
113 OffsetMap offsetMap = new OffsetMap();
114
115 JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
116 super( cs, sf, array, offset);
117 this.sb = new StringBuilder();
118 }
119
120 /**
121 * Add a character to the extraction buffer.
122 *
123 * @param ch character to add.
124 */
125 protected void put(char ch) {
126 offsetMap.add(sb.length(), offsetPosition());
127 sb.append(ch);
128 }
129
130 /**
131 * Add a code point to the extraction buffer.
132 *
133 * @param codePoint code point to add.
134 */
135 protected void putCodePoint(int codePoint) {
136 offsetMap.add(sb.length(), offsetPosition());
137 sb.appendCodePoint(codePoint);
138 }
139
140 /**
141 * Add current character or code point to the extraction buffer.
142 */
143 protected void put() {
144 if (isSurrogate()) {
145 putCodePoint(getCodepoint());
146 } else {
147 put(get());
148 }
149 }
150
151 @Override
152 public String getText() {
153 if (!scanned && cs == CommentStyle.JAVADOC) {
154 scanDocComment();
155 }
156 return docComment;
157 }
158
159 @Override
160 public int getSourcePos(int pos) {
161 if (pos == Position.NOPOS) {
162 return Position.NOPOS;
163 }
164
165 if (pos < 0 || pos > docComment.length()) {
166 throw new StringIndexOutOfBoundsException(String.valueOf(pos));
167 }
168
169 return offsetMap.getSourcePos(pos);
170 }
171
172 @Override
173 protected void scanDocComment() {
174 try {
175 boolean firstLine = true;
176
177 // Skip over /*
178 accept("/*");
179
180 // Consume any number of stars
181 skip('*');
182
183 // Is the comment in the form /**/, /***/, /****/, etc. ?
184 if (is('/')) {
185 docComment = "";
186 return;
187 }
188
189 // Skip line terminator on the first line of the comment.
190 if (isOneOf('\n', '\r')) {
191 accept('\r');
192 accept('\n');
193 firstLine = false;
194 }
195
196 outerLoop:
197 // The outerLoop processes the doc comment, looping once
198 // for each line. For each line, it first strips off
199 // whitespace, then it consumes any stars, then it
200 // puts the rest of the line into the extraction buffer.
201 while (!isEOF()) {
202 int begin_pos = position();
203 // Consume whitespace from the beginning of each line.
204 skipWhitespace();
205 // Are there stars here? If so, consume them all
206 // and check for the end of comment.
207 if (is('*')) {
208 // skip all of the stars
209 skip('*');
210
211 // check for the closing slash.
212 if (accept('/')) {
213 // We're done with the Javadoc comment
214 break outerLoop;
215 }
216 } else if (!firstLine) {
217 // The current line does not begin with a '*' so we will
218 // treat it as comment
219 reset(begin_pos);
220 }
221
222 textLoop:
223 // The textLoop processes the rest of the characters
224 // on the line, adding them to the extraction buffer.
225 while (!isEOF()) {
226 if (accept("*/")) {
227 // This is the end of the comment, return
228 // the contents of the extraction buffer.
229 break outerLoop;
230 } else if (isOneOf('\n', '\r')) {
231 // We've seen a newline. Add it to our
232 // buffer and break out of this loop,
233 // starting fresh on a new line.
234 put('\n');
235 accept('\r');
236 accept('\n');
237 break textLoop;
238 } else if (is('\f')){
239 next();
240 break textLoop; // treat as end of line
241
242 } else {
243 // Add the character to our buffer.
244 put();
245 next();
246 }
247 } // end textLoop
248 firstLine = false;
249 } // end outerLoop
250
251 // If extraction buffer is not empty.
252 if (sb.length() > 0) {
253 // Remove trailing asterisks.
254 int i = sb.length() - 1;
255 while (i > -1 && sb.charAt(i) == '*') {
256 i--;
257 }
258 sb.setLength(i + 1) ;
259
260 // Store the text of the doc comment
261 docComment = sb.toString();
262 } else {
263 docComment = "";
264 }
265 } finally {
266 scanned = true;
267
268 // Check if comment contains @deprecated comment.
269 if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) {
270 deprecatedFlag = true;
271 }
272 }
273 }
274 }
275
276 /**
277 * Build a map for translating between line numbers and positions in the input.
278 * Overridden to expand tabs.
279 *
280 * @return a LineMap
281 */
282 @Override
283 public Position.LineMap getLineMap() {
284 char[] buf = getRawCharacters();
285 return Position.makeLineMap(buf, buf.length, true);
286 }
287
288 /**
289 * Build an int table to mapping positions in extracted Javadoc comment
290 * to positions in the JavaTokenizer source buffer.
291 *
292 * The array is organized as a series of pairs of integers: the first
293 * number in each pair specifies a position in the comment text,
294 * the second number in each pair specifies the corresponding position
295 * in the source buffer. The pairs are sorted in ascending order.
296 *
297 * Since the mapping function is generally continuous, with successive
298 * positions in the string corresponding to successive positions in the
299 * source buffer, the table only needs to record discontinuities in
300 * the mapping. The values of intermediate positions can be inferred.
301 *
302 * Discontinuities may occur in a number of places: when a newline
303 * is followed by whitespace and asterisks (which are ignored),
304 * when a tab is expanded into spaces, and when unicode escapes
305 * are used in the source buffer.
306 *
307 * Thus, to find the source position of any position, p, in the comment
308 * string, find the index, i, of the pair whose string offset
309 * ({@code map[i + SB_OFFSET] }) is closest to but not greater than p. Then,
310 * {@code sourcePos(p) = map[i + POS_OFFSET] + (p - map[i + SB_OFFSET]) }.
311 */
312 static class OffsetMap {
313 /**
314 * map entry offset for comment offset member of pair.
315 */
316 private static final int SB_OFFSET = 0;
317
318 /**
319 * map entry offset of input offset member of pair.
320 */
321 private static final int POS_OFFSET = 1;
322
323 /**
324 * Number of elements in each entry.
325 */
326 private static final int NOFFSETS = 2;
327
328 /**
329 * Array storing entries in map.
330 */
331 private int[] map;
332
333 /**
334 * Logical size of map (number of valid entries.)
335 */
336 private int size;
337
338 /**
339 * Constructor.
340 */
341 OffsetMap() {
342 this.map = new int[128];
343 this.size = 0;
344 }
345
346 /**
347 * Returns true if it is worthwhile adding the entry pair to the map. That is
348 * if there is a change in relative offset.
349 *
350 * @param sbOffset comment offset member of pair.
351 * @param posOffet input offset member of pair.
352 *
353 * @return true if it is worthwhile adding the entry pair.
354 */
355 boolean shouldAdd(int sbOffset, int posOffet) {
356 return sbOffset - lastSBOffset() != posOffet - lastPosOffset();
357 }
358
359 /**
360 * Adds entry pair if worthwhile.
361 *
362 * @param sbOffset comment offset member of pair.
363 * @param posOffet input offset member of pair.
364 */
365 void add(int sbOffset, int posOffet) {
366 if (size == 0 || shouldAdd(sbOffset, posOffet)) {
367 ensure(NOFFSETS);
368 map[size + SB_OFFSET] = sbOffset;
369 map[size + POS_OFFSET] = posOffet;
370 size += NOFFSETS;
371 }
372 }
373
374 /**
375 * Returns the previous comment offset.
376 *
377 * @return the previous comment offset.
378 */
379 private int lastSBOffset() {
380 return size == 0 ? 0 : map[size - NOFFSETS + SB_OFFSET];
381 }
382
383 /**
384 * Returns the previous input offset.
385 *
386 * @return the previous input offset.
387 */
388 private int lastPosOffset() {
389 return size == 0 ? 0 : map[size - NOFFSETS + POS_OFFSET];
390 }
391
392 /**
393 * Ensures there is enough space for a new entry.
394 *
395 * @param need number of array slots needed.
396 */
397 private void ensure(int need) {
398 need += size;
399 int grow = map.length;
400
401 while (need > grow) {
402 grow <<= 1;
403 }
404
405 // Handle overflow.
406 if (grow < map.length) {
407 throw new IndexOutOfBoundsException();
408 } else if (grow != map.length) {
409 map = Arrays.copyOf(map, grow);
410 }
411 }
412
413 /**
414 * Binary search to find the entry for which the string index is less
415 * than pos. Since the map is a list of pairs of integers we must make
416 * sure the index is always NOFFSETS scaled. If we find an exact match
417 * for pos, the other item in the pair gives the source pos; otherwise,
418 * compute the source position relative to the best match found in the
419 * array.
420 */
421 int getSourcePos(int pos) {
422 if (size == 0) {
423 return Position.NOPOS;
424 }
425
426 int start = 0;
427 int end = size / NOFFSETS;
428
429 while (start < end - NOFFSETS) {
430 // find an index midway between start and end
431 int index = (start + end) / 2;
432 int indexScaled = index * NOFFSETS;
433
434 if (map[indexScaled + SB_OFFSET] < pos) {
435 start = index;
436 } else if (map[indexScaled + SB_OFFSET] == pos) {
437 return map[indexScaled + POS_OFFSET];
438 } else {
439 end = index;
440 }
441 }
442
443 int startScaled = start * NOFFSETS;
444
445 return map[startScaled + POS_OFFSET] + (pos - map[startScaled + SB_OFFSET]);
446 }
447 }
448 }
|