1 /*
   2  * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "utilities/debug.hpp"
  27 #include "utilities/ostream.hpp"
  28 #include "utilities/stringUtils.hpp"
  29 
  30 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
  31   int replace_count = 0;
  32   size_t from_len = strlen(from);
  33   size_t to_len = strlen(to);
  34   assert(from_len >= to_len, "must not expand input");
  35 
  36   for (char* dst = string; *dst && (dst = strstr(dst, from)) != NULL;) {
  37     char* left_over = dst + from_len;
  38     memmove(dst, to, to_len);                       // does not copy trailing 0 of <to>
  39     dst += to_len;                                  // skip over the replacement.
  40     memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
  41     ++ replace_count;
  42   }
  43 
  44   return replace_count;
  45 }
  46 
  47 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {
  48   assert(str1 != NULL && str2 != NULL, "sanity");
  49 
  50   // filter out zero-length strings else we will underflow on len-1 below
  51   if (len1 == 0 || len2 == 0) {
  52     return 0.0;
  53   }
  54 
  55   size_t total = len1 + len2;
  56   size_t hit = 0;
  57 
  58   for (size_t i = 0; i < len1 - 1; i++) {
  59     for (size_t j = 0; j < len2 - 1; j++) {
  60       if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
  61         ++hit;
  62         break;
  63       }
  64     }
  65   }
  66 
  67   return 2.0 * (double) hit / (double) total;
  68 }
  69 
  70 class StringMatcher {
  71  public:
  72   typedef int getc_function_t(const char* &source, const char* limit);
  73   const getc_function_t* _pattern_getc;
  74   const getc_function_t* _string_getc;
  75 
  76   StringMatcher(getc_function_t pattern_getc,
  77                 getc_function_t string_getc)
  78     : _pattern_getc(pattern_getc),
  79       _string_getc(string_getc)
  80   { }
  81 
  82   enum {  // special results from _pattern_getc
  83     string_match_comma  = -0x100 + ',',
  84     string_match_star   = -0x100 + '*',
  85     string_match_eos    = -0x100 + '\0'
  86   };
  87 
  88  private:
  89   const char*
  90   skip_anchor_word(const char* match,
  91                    const char* match_end,
  92                    int anchor_length,
  93                    const char* pattern,
  94                    const char* pattern_end) {
  95     assert(pattern < pattern_end && anchor_length > 0, "");
  96     const char* begp = pattern;
  97     int ch1 = _pattern_getc(begp, pattern_end);
  98     // note that begp is now advanced over ch1
  99     assert(ch1 > 0, "regular char only");
 100     const char* matchp = match;
 101     const char* limitp = match_end - anchor_length;
 102     while (matchp <= limitp) {
 103       int mch = _string_getc(matchp, match_end);
 104       if (mch == ch1) {
 105         const char* patp = begp;
 106         const char* anchorp = matchp;
 107         while (patp < pattern_end) {
 108           char ch = _pattern_getc(patp, pattern_end);
 109           char mch = _string_getc(anchorp, match_end);
 110           if (mch != ch) {
 111             anchorp = NULL;
 112             break;
 113           }
 114         }
 115         if (anchorp != NULL) {
 116           return anchorp;  // Found a full copy of the anchor.
 117         }
 118         // That did not work, so restart the search for ch1.
 119       }
 120     }
 121     return NULL;
 122   }
 123 
 124  public:
 125   bool string_match(const char* pattern,
 126                     const char* string) {
 127     return string_match(pattern, pattern + strlen(pattern),
 128                         string, string + strlen(string));
 129   }
 130   bool string_match(const char* pattern, const char* pattern_end,
 131                     const char* string, const char* string_end) {
 132     const char* patp = pattern;
 133     switch (_pattern_getc(patp, pattern_end)) {
 134     case string_match_eos:
 135       return false;  // Empty pattern is always false.
 136     case string_match_star:
 137       if (patp == pattern_end) {
 138         return true;   // Lone star pattern is always true.
 139       }
 140       break;
 141     }
 142     patp = pattern;  // Reset after lookahead.
 143     const char* matchp = string;  // NULL if failing
 144     for (;;) {
 145       int ch = _pattern_getc(patp, pattern_end);
 146       switch (ch) {
 147       case string_match_eos:
 148       case string_match_comma:
 149         // End of a list item; see if it's a match.
 150         if (matchp == string_end) {
 151           return true;
 152         }
 153         if (ch == string_match_comma) {
 154           // Get ready to match the next item.
 155           matchp = string;
 156           continue;
 157         }
 158         return false;  // End of all items.
 159 
 160       case string_match_star:
 161         if (matchp != NULL) {
 162           // Wildcard:  Parse out following anchor word and look for it.
 163           const char* begp = patp;
 164           const char* endp = patp;
 165           int anchor_len = 0;
 166           for (;;) {
 167             // get as many following regular characters as possible
 168             endp = patp;
 169             ch = _pattern_getc(patp, pattern_end);
 170             if (ch <= 0) {
 171               break;
 172             }
 173             anchor_len += 1;
 174           }
 175           // Anchor word [begp..endp) does not contain ch, so back up.
 176           // Now do an eager match to the anchor word, and commit to it.
 177           patp = endp;
 178           if (ch == string_match_eos ||
 179               ch == string_match_comma) {
 180             // Anchor word is at end of pattern, so treat it as a fixed pattern.
 181             const char* limitp = (matchp + strlen(matchp)) - anchor_len;
 182             matchp = limitp;
 183             patp = begp;
 184             // Resume normal scanning at the only possible match position.
 185             continue;
 186           }
 187           // Find a floating occurrence of the anchor and continue matching.
 188           // Note:  This is greedy; there is no backtrack here.  Good enough.
 189           matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
 190         }
 191         continue;
 192       }
 193       // Normal character.
 194       if (matchp != NULL) {
 195         int mch = _string_getc(matchp, string_end);
 196         if (mch != ch) {
 197           matchp = NULL;
 198         }
 199       }
 200     }
 201   }
 202 };
 203 
 204 // Match a wildcarded class list to a proposed class name (in internal form).
 205 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
 206 class ClassListMatcher : public StringMatcher {
 207  public:
 208   ClassListMatcher()
 209     : StringMatcher(pattern_list_getc, class_name_getc)
 210   { }
 211 
 212  private:
 213   static int pattern_list_getc(const char* &pattern_ptr,
 214                                const char* pattern_end) {
 215     if (pattern_ptr == pattern_end) {
 216       return string_match_eos;
 217     }
 218     int ch = (unsigned char) *pattern_ptr++;
 219     switch (ch) {
 220     case ' ': case '\t': case '\n': case '\r':
 221     case ',':
 222       // End of list item.
 223       for (;;) {
 224         switch (*pattern_ptr) {
 225         case ' ': case '\t': case '\n': case '\r':
 226         case ',':
 227           pattern_ptr += 1;  // Collapse multiple commas or spaces.
 228           continue;
 229         }
 230         break;
 231       }
 232       return string_match_comma;
 233 
 234     case '*':
 235       // Wildcard, matching any number of chars.
 236       while (*pattern_ptr == '*') {
 237         pattern_ptr += 1;  // Collapse multiple stars.
 238       }
 239       return string_match_star;
 240 
 241     case '.':
 242       ch = '/';   // Look for internal form of package separator
 243       break;
 244 
 245     case '\\':
 246       // Superquote in pattern escapes * , whitespace, and itself.
 247       if (pattern_ptr < pattern_end) {
 248         ch = (unsigned char) *pattern_ptr++;
 249       }
 250       break;
 251     }
 252 
 253     assert(ch > 0, "regular char only");
 254     return ch;
 255   }
 256 
 257   static int class_name_getc(const char* &name_ptr,
 258                              const char* name_end) {
 259     if (name_ptr == name_end) {
 260       return string_match_eos;
 261     }
 262     int ch = (unsigned char) *name_ptr++;
 263     if (ch == '.') {
 264       ch = '/';   // Normalize to internal form of package separator
 265     }
 266     return ch;  // plain character
 267   }
 268 };
 269 
 270 static bool class_list_match_sane();
 271 
 272 bool StringUtils::class_list_match(const char* class_pattern_list,
 273                                    const char* class_name) {
 274   assert(class_list_match_sane(), "");
 275   if (class_pattern_list == NULL || class_name == NULL || class_name[0] == '\0')
 276     return false;
 277   ClassListMatcher clm;
 278   return clm.string_match(class_pattern_list, class_name);
 279 }
 280 
 281 #ifdef ASSERT
 282 static void
 283 class_list_match_sane(const char* pat, const char* str, bool result = true) {
 284   if (result) {
 285     assert(StringUtils::class_list_match(pat, str), "%s ~ %s", pat, str);
 286   } else {
 287     assert(!StringUtils::class_list_match(pat, str), "%s !~ %s", pat, str);
 288   }
 289 }
 290 
 291 static bool
 292 class_list_match_sane() {
 293   static bool done = false;
 294   if (done)  return true;
 295   done = true;
 296   class_list_match_sane("foo", "foo");
 297   class_list_match_sane("foo,", "foo");
 298   class_list_match_sane(",foo,", "foo");
 299   class_list_match_sane("bar,foo", "foo");
 300   class_list_match_sane("bar,foo,", "foo");
 301   class_list_match_sane("*", "foo");
 302   class_list_match_sane("foo.bar", "foo/bar");
 303   class_list_match_sane("foo/bar", "foo.bar");
 304   class_list_match_sane("\\foo", "foo");
 305   class_list_match_sane("\\*foo", "*foo");
 306   const char* foo = "foo!";
 307   char buf[100], buf2[100];
 308   const int m = strlen(foo);
 309   for (int n = 0; n <= 1; n++) {
 310     for (int a = -1; a <= 1; a++) {
 311       for (int i = 0; i <= m; i++) {
 312         for (int j = i; j <= m; j++) {
 313           if (j == i && j > 0)  continue;
 314           for (int k = j; k <= m; k++) {
 315             if (k == j && k > i)  continue;
 316             for (int l = k; l <= m; l++) {
 317               if (l == k && l > j)  continue;
 318               char* bp = &buf[0];
 319               strncpy(bp, foo + 0, i - 0); bp += i - 0;
 320               *bp++ = '*';
 321               strncpy(bp, foo + j, k - j); bp += k - j;
 322               *bp++ = '*';
 323               strncpy(bp, foo + l, m - l); bp += m - l;
 324               if (n) {
 325                 *bp++ = 'N';  // make it fail
 326               }
 327               *bp++ = '\0';
 328               if (a != 0) {
 329                 if (a < 0) {
 330                   strcpy(buf2, buf);
 331                   strcat(buf, "X*, ");
 332                   strcat(buf, buf2);
 333                 } else {
 334                   strcat(buf, ", Y");
 335                 }
 336               }
 337               class_list_match_sane(buf, foo, !n);
 338             }
 339           }
 340         }
 341       }
 342     }
 343   }
 344   return true;
 345 }
 346 #endif //ASSERT