1 /* 2 * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.lir.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.k7; 28 import static jdk.vm.ci.amd64.AMD64.rax; 29 import static jdk.vm.ci.amd64.AMD64.rcx; 30 import static jdk.vm.ci.amd64.AMD64.rdx; 31 import static jdk.vm.ci.code.ValueUtil.asRegister; 32 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; 33 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; 34 35 import java.util.EnumSet; 36 37 import org.graalvm.compiler.asm.Label; 38 import org.graalvm.compiler.asm.amd64.AMD64Address; 39 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 40 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; 41 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; 42 import org.graalvm.compiler.core.common.LIRKind; 43 import org.graalvm.compiler.lir.LIRInstructionClass; 44 import org.graalvm.compiler.lir.Opcode; 45 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 46 import org.graalvm.compiler.lir.gen.LIRGeneratorTool; 47 48 import jdk.vm.ci.amd64.AMD64; 49 import jdk.vm.ci.amd64.AMD64.CPUFeature; 50 import jdk.vm.ci.amd64.AMD64Kind; 51 import jdk.vm.ci.code.Register; 52 import jdk.vm.ci.code.TargetDescription; 53 import jdk.vm.ci.meta.JavaKind; 54 import jdk.vm.ci.meta.Value; 55 56 /** 57 * Emits code which compares two arrays lexicographically. If the CPU supports any vector 58 * instructions specialized code is emitted to leverage these instructions. 59 */ 60 @Opcode("ARRAY_COMPARE_TO") 61 public final class AMD64ArrayCompareToOp extends AMD64LIRInstruction { 62 public static final LIRInstructionClass<AMD64ArrayCompareToOp> TYPE = LIRInstructionClass.create(AMD64ArrayCompareToOp.class); 63 64 private final JavaKind kind1; 65 private final JavaKind kind2; 66 private final int array1BaseOffset; 67 private final int array2BaseOffset; 68 69 @Def({REG}) protected Value resultValue; 70 @Alive({REG}) protected Value array1Value; 71 @Alive({REG}) protected Value array2Value; 72 @Use({REG}) protected Value length1Value; 73 @Use({REG}) protected Value length2Value; 74 @Temp({REG}) protected Value length1ValueTemp; 75 @Temp({REG}) protected Value length2ValueTemp; 76 @Temp({REG}) protected Value temp1; 77 @Temp({REG}) protected Value temp2; 78 79 @Temp({REG, ILLEGAL}) protected Value vectorTemp1; 80 81 public AMD64ArrayCompareToOp(LIRGeneratorTool tool, JavaKind kind1, JavaKind kind2, Value result, Value array1, Value array2, Value length1, Value length2) { 82 super(TYPE); 83 this.kind1 = kind1; 84 this.kind2 = kind2; 85 86 // Both offsets should be the same but better be safe than sorry. 87 this.array1BaseOffset = tool.getProviders().getMetaAccess().getArrayBaseOffset(kind1); 88 this.array2BaseOffset = tool.getProviders().getMetaAccess().getArrayBaseOffset(kind2); 89 90 this.resultValue = result; 91 this.array1Value = array1; 92 this.array2Value = array2; 93 /* 94 * The length values are inputs but are also killed like temporaries so need both Use and 95 * Temp annotations, which will only work with fixed registers. 96 */ 97 this.length1Value = length1; 98 this.length2Value = length2; 99 this.length1ValueTemp = length1; 100 this.length2ValueTemp = length2; 101 102 // Allocate some temporaries. 103 this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind())); 104 this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind())); 105 106 // We only need the vector temporaries if we generate SSE code. 107 if (supportsSSE42(tool.target())) { 108 this.vectorTemp1 = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 109 } else { 110 this.vectorTemp1 = Value.ILLEGAL; 111 } 112 } 113 114 private static boolean supportsSSE42(TargetDescription target) { 115 AMD64 arch = (AMD64) target.arch; 116 return arch.getFeatures().contains(CPUFeature.SSE4_2); 117 } 118 119 private static boolean supportsAVX2(TargetDescription target) { 120 AMD64 arch = (AMD64) target.arch; 121 return arch.getFeatures().contains(CPUFeature.AVX2); 122 } 123 124 private static boolean supportsAVX512VLBW(TargetDescription target) { 125 EnumSet<CPUFeature> features = ((AMD64) target.arch).getFeatures(); 126 return features.contains(CPUFeature.AVX512BW) && features.contains(CPUFeature.AVX512VL); 127 } 128 129 @Override 130 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 131 Register result = asRegister(resultValue); 132 Register str1 = asRegister(temp1); 133 Register str2 = asRegister(temp2); 134 135 // Load array base addresses. 136 masm.leaq(str1, new AMD64Address(asRegister(array1Value), array1BaseOffset)); 137 masm.leaq(str2, new AMD64Address(asRegister(array2Value), array2BaseOffset)); 138 Register cnt1 = asRegister(length1Value); 139 Register cnt2 = asRegister(length2Value); 140 141 // Checkstyle: stop 142 Label LENGTH_DIFF_LABEL = new Label(); 143 Label POP_LABEL = new Label(); 144 Label DONE_LABEL = new Label(); 145 Label WHILE_HEAD_LABEL = new Label(); 146 Label COMPARE_WIDE_VECTORS_LOOP_FAILED = new Label(); // used only _LP64 && AVX3 147 int stride, stride2; 148 int adr_stride = -1; 149 int adr_stride1 = -1; 150 int adr_stride2 = -1; 151 // Checkstyle: resume 152 int stride2x2 = 0x40; 153 AMD64Address.Scale scale = null; 154 AMD64Address.Scale scale1 = null; 155 AMD64Address.Scale scale2 = null; 156 157 // if (ae != StrIntrinsicNode::LL) { 158 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 159 stride2x2 = 0x20; 160 } 161 162 // if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { 163 if (kind1 != kind2) { 164 masm.shrl(cnt2, 1); 165 } 166 // Compute the minimum of the string lengths and the 167 // difference of the string lengths (stack). 168 // Do the conditional move stuff 169 masm.movl(result, cnt1); 170 masm.subl(cnt1, cnt2); 171 masm.push(cnt1); 172 masm.cmovl(ConditionFlag.LessEqual, cnt2, result); // cnt2 = min(cnt1, cnt2) 173 174 // Is the minimum length zero? 175 masm.testl(cnt2, cnt2); 176 masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL); 177 // if (ae == StrIntrinsicNode::LL) { 178 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 179 // Load first bytes 180 masm.movzbl(result, new AMD64Address(str1, 0)); // result = str1[0] 181 masm.movzbl(cnt1, new AMD64Address(str2, 0)); // cnt1 = str2[0] 182 // } else if (ae == StrIntrinsicNode::UU) { 183 } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) { 184 // Load first characters 185 masm.movzwl(result, new AMD64Address(str1, 0)); 186 masm.movzwl(cnt1, new AMD64Address(str2, 0)); 187 } else { 188 masm.movzbl(result, new AMD64Address(str1, 0)); 189 masm.movzwl(cnt1, new AMD64Address(str2, 0)); 190 } 191 masm.subl(result, cnt1); 192 masm.jcc(ConditionFlag.NotZero, POP_LABEL); 193 194 // if (ae == StrIntrinsicNode::UU) { 195 if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) { 196 // Divide length by 2 to get number of chars 197 masm.shrl(cnt2, 1); 198 } 199 masm.cmpl(cnt2, 1); 200 masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL); 201 202 // Check if the strings start at the same location and setup scale and stride 203 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 204 if (kind1 == kind2) { 205 masm.cmpptr(str1, str2); 206 masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL); 207 // if (ae == StrIntrinsicNode::LL) { 208 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 209 scale = AMD64Address.Scale.Times1; 210 stride = 16; 211 } else { 212 scale = AMD64Address.Scale.Times2; 213 stride = 8; 214 } 215 } else { 216 scale1 = AMD64Address.Scale.Times1; 217 scale2 = AMD64Address.Scale.Times2; 218 // scale not used 219 stride = 8; 220 } 221 222 // if (UseAVX >= 2 && UseSSE42Intrinsics) { 223 if (supportsAVX2(crb.target) && supportsSSE42(crb.target)) { 224 Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE); 225 226 // Checkstyle: stop 227 Label COMPARE_WIDE_VECTORS = new Label(); 228 Label VECTOR_NOT_EQUAL = new Label(); 229 Label COMPARE_WIDE_TAIL = new Label(); 230 Label COMPARE_SMALL_STR = new Label(); 231 Label COMPARE_WIDE_VECTORS_LOOP = new Label(); 232 Label COMPARE_16_CHARS = new Label(); 233 Label COMPARE_INDEX_CHAR = new Label(); 234 Label COMPARE_WIDE_VECTORS_LOOP_AVX2 = new Label(); 235 Label COMPARE_TAIL_LONG = new Label(); 236 Label COMPARE_WIDE_VECTORS_LOOP_AVX3 = new Label(); // used only _LP64 && AVX3 237 // Checkstyle: resume 238 239 int pcmpmask = 0x19; 240 // if (ae == StrIntrinsicNode::LL) { 241 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 242 pcmpmask &= ~0x01; 243 } 244 245 // Setup to compare 16-chars (32-bytes) vectors, 246 // start from first character again because it has aligned address. 247 // if (ae == StrIntrinsicNode::LL) { 248 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 249 stride2 = 32; 250 } else { 251 stride2 = 16; 252 } 253 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 254 if (kind1 == kind2) { 255 adr_stride = stride << scale.log2; 256 } else { 257 adr_stride1 = 8; // stride << scale1; 258 adr_stride2 = 16; // stride << scale2; 259 } 260 261 assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri"; 262 // rax and rdx are used by pcmpestri as elements counters 263 masm.movl(result, cnt2); 264 masm.andl(cnt2, ~(stride2 - 1)); // cnt2 holds the vector count 265 masm.jcc(ConditionFlag.Zero, COMPARE_TAIL_LONG); 266 267 // fast path : compare first 2 8-char vectors. 268 masm.bind(COMPARE_16_CHARS); 269 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 270 if (kind1 == kind2) { 271 masm.movdqu(vec1, new AMD64Address(str1, 0)); 272 } else { 273 masm.pmovzxbw(vec1, new AMD64Address(str1, 0)); 274 } 275 masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask); 276 masm.jccb(ConditionFlag.Below, COMPARE_INDEX_CHAR); 277 278 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 279 if (kind1 == kind2) { 280 masm.movdqu(vec1, new AMD64Address(str1, adr_stride)); 281 masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride), pcmpmask); 282 } else { 283 masm.pmovzxbw(vec1, new AMD64Address(str1, adr_stride1)); 284 masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride2), pcmpmask); 285 } 286 masm.jccb(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS); 287 masm.addl(cnt1, stride); 288 289 // Compare the characters at index in cnt1 290 masm.bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character 291 loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1); 292 masm.subl(result, cnt2); 293 masm.jmp(POP_LABEL); 294 295 // Setup the registers to start vector comparison loop 296 masm.bind(COMPARE_WIDE_VECTORS); 297 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 298 if (kind1 == kind2) { 299 masm.leaq(str1, new AMD64Address(str1, result, scale)); 300 masm.leaq(str2, new AMD64Address(str2, result, scale)); 301 } else { 302 masm.leaq(str1, new AMD64Address(str1, result, scale1)); 303 masm.leaq(str2, new AMD64Address(str2, result, scale2)); 304 } 305 masm.subl(result, stride2); 306 masm.subl(cnt2, stride2); 307 masm.jcc(ConditionFlag.Zero, COMPARE_WIDE_TAIL); 308 masm.negq(result); 309 310 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) 311 masm.bind(COMPARE_WIDE_VECTORS_LOOP); 312 313 // if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop 314 if (supportsAVX512VLBW(crb.target)) { 315 masm.cmpl(cnt2, stride2x2); 316 masm.jccb(ConditionFlag.Below, COMPARE_WIDE_VECTORS_LOOP_AVX2); 317 masm.testl(cnt2, stride2x2 - 1); // cnt2 holds the vector count 318 // means we cannot subtract by 0x40 319 masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX2); 320 321 masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop 322 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 323 if (kind1 == kind2) { 324 masm.evmovdqu64(vec1, new AMD64Address(str1, result, scale)); 325 // k7 == 11..11, if operands equal, otherwise k7 has some 0 326 masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale)); 327 } else { 328 masm.evpmovzxbw(vec1, new AMD64Address(str1, result, scale1)); 329 // k7 == 11..11, if operands equal, otherwise k7 has some 0 330 masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale2)); 331 } 332 masm.kortestq(k7, k7); 333 masm.jcc(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare 334 masm.addq(result, stride2x2); // update since we already compared at this addr 335 masm.subl(cnt2, stride2x2); // and sub the size too 336 masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX3); 337 338 masm.vpxor(vec1, vec1, vec1); 339 masm.jmpb(COMPARE_WIDE_TAIL); 340 } 341 342 masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX2); 343 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 344 if (kind1 == kind2) { 345 masm.vmovdqu(vec1, new AMD64Address(str1, result, scale)); 346 masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale)); 347 } else { 348 masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1)); 349 masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2)); 350 } 351 masm.vptest(vec1, vec1); 352 masm.jcc(ConditionFlag.NotZero, VECTOR_NOT_EQUAL); 353 masm.addq(result, stride2); 354 masm.subl(cnt2, stride2); 355 masm.jcc(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP); 356 // clean upper bits of YMM registers 357 masm.vpxor(vec1, vec1, vec1); 358 359 // compare wide vectors tail 360 masm.bind(COMPARE_WIDE_TAIL); 361 masm.testq(result, result); 362 masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL); 363 364 masm.movl(result, stride2); 365 masm.movl(cnt2, result); 366 masm.negq(result); 367 masm.jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2); 368 369 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. 370 masm.bind(VECTOR_NOT_EQUAL); 371 // clean upper bits of YMM registers 372 masm.vpxor(vec1, vec1, vec1); 373 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 374 if (kind1 == kind2) { 375 masm.leaq(str1, new AMD64Address(str1, result, scale)); 376 masm.leaq(str2, new AMD64Address(str2, result, scale)); 377 } else { 378 masm.leaq(str1, new AMD64Address(str1, result, scale1)); 379 masm.leaq(str2, new AMD64Address(str2, result, scale2)); 380 } 381 masm.jmp(COMPARE_16_CHARS); 382 383 // Compare tail chars, length between 1 to 15 chars 384 masm.bind(COMPARE_TAIL_LONG); 385 masm.movl(cnt2, result); 386 masm.cmpl(cnt2, stride); 387 masm.jcc(ConditionFlag.Less, COMPARE_SMALL_STR); 388 389 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 390 if (kind1 == kind2) { 391 masm.movdqu(vec1, new AMD64Address(str1, 0)); 392 } else { 393 masm.pmovzxbw(vec1, new AMD64Address(str1, 0)); 394 } 395 masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask); 396 masm.jcc(ConditionFlag.Below, COMPARE_INDEX_CHAR); 397 masm.subq(cnt2, stride); 398 masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL); 399 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 400 if (kind1 == kind2) { 401 masm.leaq(str1, new AMD64Address(str1, result, scale)); 402 masm.leaq(str2, new AMD64Address(str2, result, scale)); 403 } else { 404 masm.leaq(str1, new AMD64Address(str1, result, scale1)); 405 masm.leaq(str2, new AMD64Address(str2, result, scale2)); 406 } 407 masm.negq(cnt2); 408 masm.jmpb(WHILE_HEAD_LABEL); 409 410 masm.bind(COMPARE_SMALL_STR); 411 } else if (supportsSSE42(crb.target)) { 412 Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE); 413 414 // Checkstyle: stop 415 Label COMPARE_WIDE_VECTORS = new Label(); 416 Label VECTOR_NOT_EQUAL = new Label(); 417 Label COMPARE_TAIL = new Label(); 418 // Checkstyle: resume 419 int pcmpmask = 0x19; 420 // Setup to compare 8-char (16-byte) vectors, 421 // start from first character again because it has aligned address. 422 masm.movl(result, cnt2); 423 masm.andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 424 // if (ae == StrIntrinsicNode::LL) { 425 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 426 pcmpmask &= ~0x01; 427 } 428 masm.jcc(ConditionFlag.Zero, COMPARE_TAIL); 429 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 430 if (kind1 == kind2) { 431 masm.leaq(str1, new AMD64Address(str1, result, scale)); 432 masm.leaq(str2, new AMD64Address(str2, result, scale)); 433 } else { 434 masm.leaq(str1, new AMD64Address(str1, result, scale1)); 435 masm.leaq(str2, new AMD64Address(str2, result, scale2)); 436 } 437 masm.negq(result); 438 439 // pcmpestri 440 // inputs: 441 // vec1- substring 442 // rax - negative string length (elements count) 443 // mem - scanned string 444 // rdx - string length (elements count) 445 // pcmpmask - cmp mode: 11000 (string compare with negated result) 446 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 447 // outputs: 448 // rcx - first mismatched element index 449 assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri"; 450 451 masm.bind(COMPARE_WIDE_VECTORS); 452 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 453 if (kind1 == kind2) { 454 masm.movdqu(vec1, new AMD64Address(str1, result, scale)); 455 masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask); 456 } else { 457 masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1)); 458 masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask); 459 } 460 // After pcmpestri cnt1(rcx) contains mismatched element index 461 462 masm.jccb(ConditionFlag.Below, VECTOR_NOT_EQUAL); // CF==1 463 masm.addq(result, stride); 464 masm.subq(cnt2, stride); 465 masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS); 466 467 // compare wide vectors tail 468 masm.testq(result, result); 469 masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL); 470 471 masm.movl(cnt2, stride); 472 masm.movl(result, stride); 473 masm.negq(result); 474 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 475 if (kind1 == kind2) { 476 masm.movdqu(vec1, new AMD64Address(str1, result, scale)); 477 masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask); 478 } else { 479 masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1)); 480 masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask); 481 } 482 masm.jccb(ConditionFlag.AboveEqual, LENGTH_DIFF_LABEL); 483 484 // Mismatched characters in the vectors 485 masm.bind(VECTOR_NOT_EQUAL); 486 masm.addq(cnt1, result); 487 loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1); 488 masm.subl(result, cnt2); 489 masm.jmpb(POP_LABEL); 490 491 masm.bind(COMPARE_TAIL); // limit is zero 492 masm.movl(cnt2, result); 493 // Fallthru to tail compare 494 } 495 496 // Shift str2 and str1 to the end of the arrays, negate min 497 // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { 498 if (kind1 == kind2) { 499 masm.leaq(str1, new AMD64Address(str1, cnt2, scale)); 500 masm.leaq(str2, new AMD64Address(str2, cnt2, scale)); 501 } else { 502 masm.leaq(str1, new AMD64Address(str1, cnt2, scale1)); 503 masm.leaq(str2, new AMD64Address(str2, cnt2, scale2)); 504 } 505 masm.decrementl(cnt2); // first character was compared already 506 masm.negq(cnt2); 507 508 // Compare the rest of the elements 509 masm.bind(WHILE_HEAD_LABEL); 510 loadNextElements(masm, result, cnt1, str1, str2, scale, scale1, scale2, cnt2); 511 masm.subl(result, cnt1); 512 masm.jccb(ConditionFlag.NotZero, POP_LABEL); 513 masm.incrementq(cnt2, 1); 514 masm.jccb(ConditionFlag.NotZero, WHILE_HEAD_LABEL); 515 516 // Strings are equal up to min length. Return the length difference. 517 masm.bind(LENGTH_DIFF_LABEL); 518 masm.pop(result); 519 // if (ae == StrIntrinsicNode::UU) { 520 if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) { 521 // Divide diff by 2 to get number of chars 522 masm.sarl(result, 1); 523 } 524 masm.jmpb(DONE_LABEL); 525 526 // if (VM_Version::supports_avx512vlbw()) { 527 if (supportsAVX512VLBW(crb.target)) { 528 masm.bind(COMPARE_WIDE_VECTORS_LOOP_FAILED); 529 530 masm.kmovq(cnt1, k7); 531 masm.notq(cnt1); 532 masm.bsfq(cnt2, cnt1); 533 // if (ae != StrIntrinsicNode::LL) { 534 if (kind1 != JavaKind.Byte && kind2 != JavaKind.Byte) { 535 // Divide diff by 2 to get number of chars 536 masm.sarl(cnt2, 1); 537 } 538 masm.addq(result, cnt2); 539 // if (ae == StrIntrinsicNode::LL) { 540 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 541 masm.movzbl(cnt1, new AMD64Address(str2, result, Scale.Times1)); 542 masm.movzbl(result, new AMD64Address(str1, result, Scale.Times1)); 543 } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) { 544 masm.movzwl(cnt1, new AMD64Address(str2, result, scale)); 545 masm.movzwl(result, new AMD64Address(str1, result, scale)); 546 } else { 547 masm.movzwl(cnt1, new AMD64Address(str2, result, scale2)); 548 masm.movzbl(result, new AMD64Address(str1, result, scale1)); 549 } 550 masm.subl(result, cnt1); 551 masm.jmpb(POP_LABEL); 552 } 553 554 // Discard the stored length difference 555 masm.bind(POP_LABEL); 556 masm.pop(cnt1); 557 558 // That's it 559 masm.bind(DONE_LABEL); 560 // if (ae == StrIntrinsicNode::UL) { 561 if (kind1 == JavaKind.Char && kind2 == JavaKind.Byte) { 562 masm.negl(result); 563 } 564 } 565 566 private void loadNextElements(AMD64MacroAssembler masm, Register elem1, Register elem2, Register str1, Register str2, 567 AMD64Address.Scale scale, AMD64Address.Scale scale1, 568 AMD64Address.Scale scale2, Register index) { 569 // if (ae == StrIntrinsicNode::LL) { 570 if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) { 571 masm.movzbl(elem1, new AMD64Address(str1, index, scale, 0)); 572 masm.movzbl(elem2, new AMD64Address(str2, index, scale, 0)); 573 // } else if (ae == StrIntrinsicNode::UU) { 574 } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) { 575 masm.movzwl(elem1, new AMD64Address(str1, index, scale, 0)); 576 masm.movzwl(elem2, new AMD64Address(str2, index, scale, 0)); 577 } else { 578 masm.movzbl(elem1, new AMD64Address(str1, index, scale1, 0)); 579 masm.movzwl(elem2, new AMD64Address(str2, index, scale2, 0)); 580 } 581 } 582 }