1 /* 2 * Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import com.sun.tools.javac.parser.Tokens.Comment; 29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 30 import com.sun.tools.javac.util.*; 31 32 import java.nio.*; 33 import java.util.regex.Pattern; 34 35 import static com.sun.tools.javac.util.LayoutCharacters.*; 36 37 /** An extension to the base lexical analyzer that captures 38 * and processes the contents of doc comments. It does so by 39 * translating Unicode escape sequences and by stripping the 40 * leading whitespace and starts from each line of the comment. 41 * 42 * <p><b>This is NOT part of any supported API. 43 * If you write code that depends on this, you do so at your own risk. 44 * This code and its internal interfaces are subject to change or 45 * deletion without notice.</b> 46 */ 47 public class JavadocTokenizer extends JavaTokenizer { 48 49 /** Create a scanner from the input buffer. buffer must implement 50 * array() and compact(), and remaining() must be less than limit(). 51 */ 52 protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) { 53 super(fac, buffer); 54 } 55 56 /** Create a scanner from the input array. The array must have at 57 * least a single character of extra space. 58 */ 59 protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) { 60 super(fac, input, inputLength); 61 } 62 63 @Override 64 protected Comment processComment(int pos, int endPos, CommentStyle style) { 65 char[] buf = reader.getRawCharacters(pos, endPos); 66 return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style); 67 } 68 69 /** 70 * This is a specialized version of UnicodeReader that keeps track of the 71 * column position within a given character stream (used for Javadoc processing), 72 * and which builds a table for mapping positions in the comment string to 73 * positions in the source file. 74 */ 75 static class DocReader extends UnicodeReader { 76 77 int col; 78 int startPos; 79 80 /** 81 * A buffer for building a table for mapping positions in {@link #sbuf} 82 * to positions in the source buffer. 83 * 84 * The array is organized as a series of pairs of integers: the first 85 * number in each pair specifies a position in the comment text, 86 * the second number in each pair specifies the corresponding position 87 * in the source buffer. The pairs are sorted in ascending order. 88 * 89 * Since the mapping function is generally continuous, with successive 90 * positions in the string corresponding to successive positions in the 91 * source buffer, the table only needs to record discontinuities in 92 * the mapping. The values of intermediate positions can be inferred. 93 * 94 * Discontinuities may occur in a number of places: when a newline 95 * is followed by whitespace and asterisks (which are ignored), 96 * when a tab is expanded into spaces, and when unicode escapes 97 * are used in the source buffer. 98 * 99 * Thus, to find the source position of any position, p, in the comment 100 * string, find the index, i, of the pair whose string offset 101 * ({@code pbuf[i] }) is closest to but not greater than p. Then, 102 * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }. 103 */ 104 int[] pbuf = new int[128]; 105 106 /** 107 * The index of the next empty slot in the pbuf buffer. 108 */ 109 int pp = 0; 110 111 /** The buffer index of the last double backslash sequence 112 */ 113 private int doubleBackslashBp = -1; 114 115 DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) { 116 super(fac, input, inputLength); 117 this.startPos = startPos; 118 } 119 120 @Override 121 protected void convertUnicode() { 122 if (ch == '\\' && unicodeConversionBp != bp) { 123 bp++; ch = buf[bp]; col++; 124 if (ch == 'u') { 125 do { 126 bp++; ch = buf[bp]; col++; 127 } while (ch == 'u'); 128 int limit = bp + 3; 129 if (limit < buflen) { 130 int d = digit(bp, 16); 131 int code = d; 132 while (bp < limit && d >= 0) { 133 bp++; ch = buf[bp]; col++; 134 d = digit(bp, 16); 135 code = (code << 4) + d; 136 } 137 if (d >= 0) { 138 ch = (char)code; 139 unicodeConversionBp = bp; 140 return; 141 } 142 } 143 // "illegal.Unicode.esc", reported by base scanner 144 } else { 145 bp--; 146 ch = '\\'; 147 col--; 148 } 149 } 150 } 151 152 @Override 153 protected void scanCommentChar() { 154 scanChar(); 155 if (ch == '\\') { 156 if (peekChar() == '\\' && !isUnicode()) { 157 bp++; col++; 158 doubleBackslashBp = bp; 159 } else { 160 convertUnicode(); 161 } 162 } 163 } 164 165 @Override 166 protected void scanChar() { 167 bp++; 168 ch = buf[bp]; 169 switch (ch) { 170 case '\r': // return 171 col = 0; 172 break; 173 case '\n': // newline 174 if (bp == 0 || buf[bp-1] != '\r') { 175 col = 0; 176 } 177 break; 178 case '\t': // tab 179 col = (col / TabInc * TabInc) + TabInc; 180 break; 181 case '\\': // possible Unicode 182 col++; 183 convertUnicode(); 184 break; 185 default: 186 col++; 187 break; 188 } 189 } 190 191 @Override 192 public void putChar(char ch, boolean scan) { 193 // At this point, bp is the position of the current character in buf, 194 // and sp is the position in sbuf where this character will be put. 195 // Record a new entry in pbuf if pbuf is empty or if sp and its 196 // corresponding source position are not equidistant from the 197 // corresponding values in the latest entry in the pbuf array. 198 // (i.e. there is a discontinuity in the map function.) 199 if ((pp == 0) 200 || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) { 201 if (pp + 1 >= pbuf.length) { 202 int[] new_pbuf = new int[pbuf.length * 2]; 203 System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length); 204 pbuf = new_pbuf; 205 } 206 pbuf[pp] = sp; 207 pbuf[pp + 1] = startPos + bp; 208 pp += 2; 209 } 210 super.putChar(ch, scan); 211 } 212 213 /** Whether the ch represents a sequence of two backslashes. */ 214 boolean isDoubleBackslash() { 215 return doubleBackslashBp == bp; 216 } 217 218 219 } 220 221 protected static class JavadocComment extends JavaTokenizer.BasicComment<DocReader> { 222 223 /** 224 * Translated and stripped contents of doc comment 225 */ 226 private String docComment = null; 227 private int[] docPosns = null; 228 229 JavadocComment(DocReader reader, CommentStyle cs) { 230 super(reader, cs); 231 } 232 233 @Override 234 public String getText() { 235 if (!scanned && cs == CommentStyle.JAVADOC) { 236 scanDocComment(); 237 } 238 return docComment; 239 } 240 241 @Override 242 public int getSourcePos(int pos) { 243 // Binary search to find the entry for which the string index is 244 // less than pos. Since docPosns is a list of pairs of integers 245 // we must make sure the index is always even. 246 // If we find an exact match for pos, the other item in the pair 247 // gives the source pos; otherwise, compute the source position 248 // relative to the best match found in the array. 249 if (pos == Position.NOPOS) 250 return Position.NOPOS; 251 if (pos < 0 || pos > docComment.length()) 252 throw new StringIndexOutOfBoundsException(String.valueOf(pos)); 253 if (docPosns == null) 254 return Position.NOPOS; 255 int start = 0; 256 int end = docPosns.length; 257 while (start < end - 2) { 258 // find an even index midway between start and end 259 int index = ((start + end) / 4) * 2; 260 if (docPosns[index] < pos) 261 start = index; 262 else if (docPosns[index] == pos) 263 return docPosns[index + 1]; 264 else 265 end = index; 266 } 267 return docPosns[start + 1] + (pos - docPosns[start]); 268 } 269 270 @Override 271 @SuppressWarnings("fallthrough") 272 protected void scanDocComment() { 273 try { 274 boolean firstLine = true; 275 276 // Skip over first slash 277 comment_reader.scanCommentChar(); 278 // Skip over first star 279 comment_reader.scanCommentChar(); 280 281 // consume any number of stars 282 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { 283 comment_reader.scanCommentChar(); 284 } 285 // is the comment in the form /**/, /***/, /****/, etc. ? 286 if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') { 287 docComment = ""; 288 return; 289 } 290 291 // skip a newline on the first line of the comment. 292 if (comment_reader.bp < comment_reader.buflen) { 293 if (comment_reader.ch == LF) { 294 comment_reader.scanCommentChar(); 295 firstLine = false; 296 } else if (comment_reader.ch == CR) { 297 comment_reader.scanCommentChar(); 298 if (comment_reader.ch == LF) { 299 comment_reader.scanCommentChar(); 300 firstLine = false; 301 } 302 } 303 } 304 305 outerLoop: 306 307 // The outerLoop processes the doc comment, looping once 308 // for each line. For each line, it first strips off 309 // whitespace, then it consumes any stars, then it 310 // puts the rest of the line into our buffer. 311 while (comment_reader.bp < comment_reader.buflen) { 312 int begin_bp = comment_reader.bp; 313 char begin_ch = comment_reader.ch; 314 // The wsLoop consumes whitespace from the beginning 315 // of each line. 316 wsLoop: 317 318 while (comment_reader.bp < comment_reader.buflen) { 319 switch(comment_reader.ch) { 320 case ' ': 321 comment_reader.scanCommentChar(); 322 break; 323 case '\t': 324 comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc; 325 comment_reader.scanCommentChar(); 326 break; 327 case FF: 328 comment_reader.col = 0; 329 comment_reader.scanCommentChar(); 330 break; 331 // Treat newline at beginning of line (blank line, no star) 332 // as comment text. Old Javadoc compatibility requires this. 333 /*---------------------------------* 334 case CR: // (Spec 3.4) 335 doc_reader.scanCommentChar(); 336 if (ch == LF) { 337 col = 0; 338 doc_reader.scanCommentChar(); 339 } 340 break; 341 case LF: // (Spec 3.4) 342 doc_reader.scanCommentChar(); 343 break; 344 *---------------------------------*/ 345 default: 346 // we've seen something that isn't whitespace; 347 // jump out. 348 break wsLoop; 349 } 350 } 351 352 // Are there stars here? If so, consume them all 353 // and check for the end of comment. 354 if (comment_reader.ch == '*') { 355 // skip all of the stars 356 do { 357 comment_reader.scanCommentChar(); 358 } while (comment_reader.ch == '*'); 359 360 // check for the closing slash. 361 if (comment_reader.ch == '/') { 362 // We're done with the doc comment 363 // scanChar() and breakout. 364 break outerLoop; 365 } 366 } else if (! firstLine) { 367 // The current line does not begin with a '*' so we will 368 // treat it as comment 369 comment_reader.bp = begin_bp; 370 comment_reader.ch = begin_ch; 371 } 372 // The textLoop processes the rest of the characters 373 // on the line, adding them to our buffer. 374 textLoop: 375 while (comment_reader.bp < comment_reader.buflen) { 376 switch (comment_reader.ch) { 377 case '*': 378 // Is this just a star? Or is this the 379 // end of a comment? 380 comment_reader.scanCommentChar(); 381 if (comment_reader.ch == '/') { 382 // This is the end of the comment, 383 // set ch and return our buffer. 384 break outerLoop; 385 } 386 // This is just an ordinary star. Add it to 387 // the buffer. 388 comment_reader.putChar('*', false); 389 break; 390 case '\\': 391 comment_reader.putChar('\\', false); 392 // If a double backslash was found, write two 393 if (comment_reader.isDoubleBackslash()) { 394 comment_reader.putChar('\\', false); 395 } 396 comment_reader.scanCommentChar(); 397 break; 398 case ' ': 399 case '\t': 400 comment_reader.putChar(comment_reader.ch, false); 401 comment_reader.scanCommentChar(); 402 break; 403 case FF: 404 comment_reader.scanCommentChar(); 405 break textLoop; // treat as end of line 406 case CR: // (Spec 3.4) 407 comment_reader.scanCommentChar(); 408 if (comment_reader.ch != LF) { 409 // Canonicalize CR-only line terminator to LF 410 comment_reader.putChar((char)LF, false); 411 break textLoop; 412 } 413 /* fall through to LF case */ 414 case LF: // (Spec 3.4) 415 // We've seen a newline. Add it to our 416 // buffer and break out of this loop, 417 // starting fresh on a new line. 418 comment_reader.putChar(comment_reader.ch, false); 419 comment_reader.scanCommentChar(); 420 break textLoop; 421 default: 422 // Add the character to our buffer. 423 comment_reader.putChar(comment_reader.ch, false); 424 comment_reader.scanCommentChar(); 425 } 426 } // end textLoop 427 firstLine = false; 428 } // end outerLoop 429 430 if (comment_reader.sp > 0) { 431 int i = comment_reader.sp - 1; 432 trailLoop: 433 while (i > -1) { 434 switch (comment_reader.sbuf[i]) { 435 case '*': 436 i--; 437 break; 438 default: 439 break trailLoop; 440 } 441 } 442 comment_reader.sp = i + 1; 443 444 // Store the text of the doc comment 445 docComment = comment_reader.chars(); 446 docPosns = new int[comment_reader.pp]; 447 System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length); 448 } else { 449 docComment = ""; 450 } 451 } finally { 452 scanned = true; 453 comment_reader = null; 454 if (docComment != null && 455 DEPRECATED_PATTERN.matcher(docComment).matches()) { 456 deprecatedFlag = true; 457 } 458 } 459 } 460 //where: 461 private static final Pattern DEPRECATED_PATTERN = 462 Pattern.compile("(?sm).*^\\s*@deprecated( |$).*"); 463 464 } 465 466 @Override 467 public Position.LineMap getLineMap() { 468 char[] buf = reader.getRawCharacters(); 469 return Position.makeLineMap(buf, buf.length, true); 470 } 471 }