1 /* 2 * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "utilities/debug.hpp" 27 #include "utilities/ostream.hpp" 28 #include "utilities/stringUtils.hpp" 29 30 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) { 31 int replace_count = 0; 32 size_t from_len = strlen(from); 33 size_t to_len = strlen(to); 34 assert(from_len >= to_len, "must not expand input"); 35 36 for (char* dst = string; *dst && (dst = strstr(dst, from)) != NULL;) { 37 char* left_over = dst + from_len; 38 memmove(dst, to, to_len); // does not copy trailing 0 of <to> 39 dst += to_len; // skip over the replacement. 40 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over> 41 ++ replace_count; 42 } 43 44 return replace_count; 45 } 46 47 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) { 48 assert(str1 != NULL && str2 != NULL, "sanity"); 49 50 // filter out zero-length strings else we will underflow on len-1 below 51 if (len1 == 0 || len2 == 0) { 52 return 0.0; 53 } 54 55 size_t total = len1 + len2; 56 size_t hit = 0; 57 58 for (size_t i = 0; i < len1 - 1; i++) { 59 for (size_t j = 0; j < len2 - 1; j++) { 60 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) { 61 ++hit; 62 break; 63 } 64 } 65 } 66 67 return 2.0 * (double) hit / (double) total; 68 } 69 70 class StringMatcher { 71 public: 72 typedef int getc_function_t(const char* &source, const char* limit); 73 const getc_function_t* _pattern_getc; 74 const getc_function_t* _string_getc; 75 76 StringMatcher(getc_function_t pattern_getc, 77 getc_function_t string_getc) 78 : _pattern_getc(pattern_getc), 79 _string_getc(string_getc) 80 { } 81 82 enum { // special results from _pattern_getc 83 string_match_comma = -0x100 + ',', 84 string_match_star = -0x100 + '*', 85 string_match_eos = -0x100 + '\0' 86 }; 87 88 private: 89 const char* 90 skip_anchor_word(const char* match, 91 const char* match_end, 92 int anchor_length, 93 const char* pattern, 94 const char* pattern_end) { 95 assert(pattern < pattern_end && anchor_length > 0, ""); 96 const char* begp = pattern; 97 int ch1 = _pattern_getc(begp, pattern_end); 98 // note that begp is now advanced over ch1 99 assert(ch1 > 0, "regular char only"); 100 const char* matchp = match; 101 const char* limitp = match_end - anchor_length; 102 while (matchp <= limitp) { 103 int mch = _string_getc(matchp, match_end); 104 if (mch == ch1) { 105 const char* patp = begp; 106 const char* anchorp = matchp; 107 while (patp < pattern_end) { 108 char ch = _pattern_getc(patp, pattern_end); 109 char mch = _string_getc(anchorp, match_end); 110 if (mch != ch) { 111 anchorp = NULL; 112 break; 113 } 114 } 115 if (anchorp != NULL) { 116 return anchorp; // Found a full copy of the anchor. 117 } 118 // That did not work, so restart the search for ch1. 119 } 120 } 121 return NULL; 122 } 123 124 public: 125 bool string_match(const char* pattern, 126 const char* string) { 127 return string_match(pattern, pattern + strlen(pattern), 128 string, string + strlen(string)); 129 } 130 bool string_match(const char* pattern, const char* pattern_end, 131 const char* string, const char* string_end) { 132 const char* patp = pattern; 133 switch (_pattern_getc(patp, pattern_end)) { 134 case string_match_eos: 135 return false; // Empty pattern is always false. 136 case string_match_star: 137 if (patp == pattern_end) { 138 return true; // Lone star pattern is always true. 139 } 140 break; 141 } 142 patp = pattern; // Reset after lookahead. 143 const char* matchp = string; // NULL if failing 144 for (;;) { 145 int ch = _pattern_getc(patp, pattern_end); 146 switch (ch) { 147 case string_match_eos: 148 case string_match_comma: 149 // End of a list item; see if it's a match. 150 if (matchp == string_end) { 151 return true; 152 } 153 if (ch == string_match_comma) { 154 // Get ready to match the next item. 155 matchp = string; 156 continue; 157 } 158 return false; // End of all items. 159 160 case string_match_star: 161 if (matchp != NULL) { 162 // Wildcard: Parse out following anchor word and look for it. 163 const char* begp = patp; 164 const char* endp = patp; 165 int anchor_len = 0; 166 for (;;) { 167 // get as many following regular characters as possible 168 endp = patp; 169 ch = _pattern_getc(patp, pattern_end); 170 if (ch <= 0) { 171 break; 172 } 173 anchor_len += 1; 174 } 175 // Anchor word [begp..endp) does not contain ch, so back up. 176 // Now do an eager match to the anchor word, and commit to it. 177 patp = endp; 178 if (ch == string_match_eos || 179 ch == string_match_comma) { 180 // Anchor word is at end of pattern, so treat it as a fixed pattern. 181 const char* limitp = (matchp + strlen(matchp)) - anchor_len; 182 matchp = limitp; 183 patp = begp; 184 // Resume normal scanning at the only possible match position. 185 continue; 186 } 187 // Find a floating occurrence of the anchor and continue matching. 188 // Note: This is greedy; there is no backtrack here. Good enough. 189 matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp); 190 } 191 continue; 192 } 193 // Normal character. 194 if (matchp != NULL) { 195 int mch = _string_getc(matchp, string_end); 196 if (mch != ch) { 197 matchp = NULL; 198 } 199 } 200 } 201 } 202 }; 203 204 // Match a wildcarded class list to a proposed class name (in internal form). 205 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards. 206 class ClassListMatcher : public StringMatcher { 207 public: 208 ClassListMatcher() 209 : StringMatcher(pattern_list_getc, class_name_getc) 210 { } 211 212 private: 213 static int pattern_list_getc(const char* &pattern_ptr, 214 const char* pattern_end) { 215 if (pattern_ptr == pattern_end) { 216 return string_match_eos; 217 } 218 int ch = (unsigned char) *pattern_ptr++; 219 switch (ch) { 220 case ' ': case '\t': case '\n': case '\r': 221 case ',': 222 // End of list item. 223 for (;;) { 224 switch (*pattern_ptr) { 225 case ' ': case '\t': case '\n': case '\r': 226 case ',': 227 pattern_ptr += 1; // Collapse multiple commas or spaces. 228 continue; 229 } 230 break; 231 } 232 return string_match_comma; 233 234 case '*': 235 // Wildcard, matching any number of chars. 236 while (*pattern_ptr == '*') { 237 pattern_ptr += 1; // Collapse multiple stars. 238 } 239 return string_match_star; 240 241 case '.': 242 ch = '/'; // Look for internal form of package separator 243 break; 244 245 case '\\': 246 // Superquote in pattern escapes * , whitespace, and itself. 247 if (pattern_ptr < pattern_end) { 248 ch = (unsigned char) *pattern_ptr++; 249 } 250 break; 251 } 252 253 assert(ch > 0, "regular char only"); 254 return ch; 255 } 256 257 static int class_name_getc(const char* &name_ptr, 258 const char* name_end) { 259 if (name_ptr == name_end) { 260 return string_match_eos; 261 } 262 int ch = (unsigned char) *name_ptr++; 263 if (ch == '.') { 264 ch = '/'; // Normalize to internal form of package separator 265 } 266 return ch; // plain character 267 } 268 }; 269 270 static bool class_list_match_sane(); 271 272 bool StringUtils::class_list_match(const char* class_pattern_list, 273 const char* class_name) { 274 assert(class_list_match_sane(), ""); 275 if (class_pattern_list == NULL || class_name == NULL || class_name[0] == '\0') 276 return false; 277 ClassListMatcher clm; 278 return clm.string_match(class_pattern_list, class_name); 279 } 280 281 #ifdef ASSERT 282 static void 283 class_list_match_sane(const char* pat, const char* str, bool result = true) { 284 if (result) { 285 assert(StringUtils::class_list_match(pat, str), "%s ~ %s", pat, str); 286 } else { 287 assert(!StringUtils::class_list_match(pat, str), "%s !~ %s", pat, str); 288 } 289 } 290 291 static bool 292 class_list_match_sane() { 293 static bool done = false; 294 if (done) return true; 295 done = true; 296 class_list_match_sane("foo", "foo"); 297 class_list_match_sane("foo,", "foo"); 298 class_list_match_sane(",foo,", "foo"); 299 class_list_match_sane("bar,foo", "foo"); 300 class_list_match_sane("bar,foo,", "foo"); 301 class_list_match_sane("*", "foo"); 302 class_list_match_sane("foo.bar", "foo/bar"); 303 class_list_match_sane("foo/bar", "foo.bar"); 304 class_list_match_sane("\\foo", "foo"); 305 class_list_match_sane("\\*foo", "*foo"); 306 const char* foo = "foo!"; 307 char buf[100], buf2[100]; 308 const int m = strlen(foo); 309 for (int n = 0; n <= 1; n++) { 310 for (int a = -1; a <= 1; a++) { 311 for (int i = 0; i <= m; i++) { 312 for (int j = i; j <= m; j++) { 313 if (j == i && j > 0) continue; 314 for (int k = j; k <= m; k++) { 315 if (k == j && k > i) continue; 316 for (int l = k; l <= m; l++) { 317 if (l == k && l > j) continue; 318 char* bp = &buf[0]; 319 strncpy(bp, foo + 0, i - 0); bp += i - 0; 320 *bp++ = '*'; 321 strncpy(bp, foo + j, k - j); bp += k - j; 322 *bp++ = '*'; 323 strncpy(bp, foo + l, m - l); bp += m - l; 324 if (n) { 325 *bp++ = 'N'; // make it fail 326 } 327 *bp++ = '\0'; 328 if (a != 0) { 329 if (a < 0) { 330 strcpy(buf2, buf); 331 strcat(buf, "X*, "); 332 strcat(buf, buf2); 333 } else { 334 strcat(buf, ", Y"); 335 } 336 } 337 class_list_match_sane(buf, foo, !n); 338 } 339 } 340 } 341 } 342 } 343 } 344 return true; 345 } 346 #endif //ASSERT