< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page


   1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.


4068     __ ldr(tmp3, Address(__ post(cnt1, 8)));
4069     __ umov(tmpL, vtmp, __ D, 1);
4070     __ eor(rscratch2, tmpU, tmpL);
4071     __ cbnz(rscratch2, DIFF1);
4072   }
4073 
4074   // r0  = result
4075   // r1  = str1
4076   // r2  = cnt1
4077   // r3  = str2
4078   // r4  = cnt2
4079   // r10 = tmp1
4080   // r11 = tmp2
4081   address generate_compare_long_string_different_encoding(bool isLU) {
4082     __ align(CodeEntryAlignment);
4083     StubCodeMark mark(this, "StubRoutines", isLU
4084         ? "compare_long_string_different_encoding LU"
4085         : "compare_long_string_different_encoding UL");
4086     address entry = __ pc();
4087     Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
4088         DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, SMALL_LOOP_ENTER,
4089         LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
4090     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
4091         tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
4092     FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
4093     RegSet spilled_regs = RegSet::of(tmp3, tmp4);
4094 
4095     int prefetchLoopExitCondition = MAX(32, SoftwarePrefetchHintDistance/2);
4096 
4097     __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
4098     // cnt2 == amount of characters left to compare
4099     // Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL))
4100     __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4101     __ add(str1, str1, isLU ? wordSize/2 : wordSize);
4102     __ add(str2, str2, isLU ? wordSize : wordSize/2);
4103     __ fmovd(isLU ? tmp1 : tmp2, vtmp);
4104     __ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
4105     __ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1);
4106     __ eor(rscratch2, tmp1, tmp2);
4107     __ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0);
4108     __ mov(rscratch1, tmp2);
4109     __ cbnz(rscratch2, CALCULATE_DIFFERENCE);
4110     Register strU = isLU ? str2 : str1,
4111              strL = isLU ? str1 : str2,
4112              tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison
4113              tmpL = isLU ? tmp1 : rscratch1; // where to keep L for comparison
4114     __ push(spilled_regs, sp);
4115     __ sub(tmp2, strL, cnt2); // strL pointer to load from
4116     __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
4117 
4118     __ ldr(tmp3, Address(__ post(cnt1, 8)));
4119 
4120     if (SoftwarePrefetchHintDistance >= 0) {
4121       __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4122       __ br(__ LT, SMALL_LOOP);
4123       __ bind(LARGE_LOOP_PREFETCH);
4124         __ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
4125         __ mov(tmp4, 2);
4126         __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
4127         __ bind(LARGE_LOOP_PREFETCH_REPEAT1);
4128           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4129           __ subs(tmp4, tmp4, 1);
4130           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1);
4131           __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
4132           __ mov(tmp4, 2);
4133         __ bind(LARGE_LOOP_PREFETCH_REPEAT2);
4134           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4135           __ subs(tmp4, tmp4, 1);
4136           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
4137           __ sub(cnt2, cnt2, 64);
4138           __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4139           __ br(__ GE, LARGE_LOOP_PREFETCH);
4140     }
4141     __ cbz(cnt2, LOAD_LAST); // no characters left except last load

4142     __ subs(cnt2, cnt2, 16);
4143     __ br(__ LT, TAIL);
4144     __ b(SMALL_LOOP_ENTER);
4145     __ bind(SMALL_LOOP); // smaller loop
4146       __ subs(cnt2, cnt2, 16);
4147     __ bind(SMALL_LOOP_ENTER);
4148       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4149       __ br(__ GE, SMALL_LOOP);
4150       __ cbz(cnt2, LOAD_LAST);
4151     __ bind(TAIL); // 1..15 characters left
4152       __ subs(zr, cnt2, -8);
4153       __ br(__ GT, TAIL_LOAD_16);
4154       __ ldrd(vtmp, Address(tmp2));
4155       __ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
4156 
4157       __ ldr(tmpU, Address(__ post(cnt1, 8)));
4158       __ fmovd(tmpL, vtmp3);
4159       __ eor(rscratch2, tmp3, tmpL);
4160       __ cbnz(rscratch2, DIFF2);
4161       __ umov(tmpL, vtmp3, __ D, 1);
4162       __ eor(rscratch2, tmpU, tmpL);
4163       __ cbnz(rscratch2, DIFF1);
4164       __ b(LOAD_LAST);
4165     __ bind(TAIL_LOAD_16);
4166       __ ldrq(vtmp, Address(tmp2));
4167       __ ldr(tmpU, Address(__ post(cnt1, 8)));
4168       __ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
4169       __ zip2(vtmp, __ T16B, vtmp, vtmpZ);
4170       __ fmovd(tmpL, vtmp3);
4171       __ eor(rscratch2, tmp3, tmpL);
4172       __ cbnz(rscratch2, DIFF2);
4173 
4174       __ ldr(tmp3, Address(__ post(cnt1, 8)));
4175       __ umov(tmpL, vtmp3, __ D, 1);
4176       __ eor(rscratch2, tmpU, tmpL);
4177       __ cbnz(rscratch2, DIFF1);
4178 
4179       __ ldr(tmpU, Address(__ post(cnt1, 8)));
4180       __ fmovd(tmpL, vtmp);
4181       __ eor(rscratch2, tmp3, tmpL);
4182       __ cbnz(rscratch2, DIFF2);
4183 
4184       __ umov(tmpL, vtmp, __ D, 1);
4185       __ eor(rscratch2, tmpU, tmpL);
4186       __ cbnz(rscratch2, DIFF1);
4187       __ b(LOAD_LAST);
4188     __ bind(DIFF2);
4189       __ mov(tmpU, tmp3);
4190     __ bind(DIFF1);
4191       __ pop(spilled_regs, sp);
4192       __ b(CALCULATE_DIFFERENCE);
4193     __ bind(LOAD_LAST);



4194       __ pop(spilled_regs, sp);
4195 
4196       __ ldrs(vtmp, Address(strL));
4197       __ ldr(tmpU, Address(strU));
4198       __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4199       __ fmovd(tmpL, vtmp);
4200 
4201       __ eor(rscratch2, tmpU, tmpL);
4202       __ cbz(rscratch2, DONE);
4203 
4204     // Find the first different characters in the longwords and
4205     // compute their difference.
4206     __ bind(CALCULATE_DIFFERENCE);
4207       __ rev(rscratch2, rscratch2);
4208       __ clz(rscratch2, rscratch2);
4209       __ andr(rscratch2, rscratch2, -16);
4210       __ lsrv(tmp1, tmp1, rscratch2);
4211       __ uxthw(tmp1, tmp1);
4212       __ lsrv(rscratch1, rscratch1, rscratch2);
4213       __ uxthw(rscratch1, rscratch1);
4214       __ subw(result, tmp1, rscratch1);
4215     __ bind(DONE);
4216       __ ret(lr);
4217     return entry;


4239     // to prefetch memory behind array border
4240     int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
4241     // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
4242     // update cnt2 counter with already loaded 8 bytes
4243     __ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
4244     // update pointers, because of previous read
4245     __ add(str1, str1, wordSize);
4246     __ add(str2, str2, wordSize);
4247     if (SoftwarePrefetchHintDistance >= 0) {
4248       __ bind(LARGE_LOOP_PREFETCH);
4249         __ prfm(Address(str1, SoftwarePrefetchHintDistance));
4250         __ prfm(Address(str2, SoftwarePrefetchHintDistance));
4251         compare_string_16_bytes_same(DIFF, DIFF2);
4252         compare_string_16_bytes_same(DIFF, DIFF2);
4253         __ sub(cnt2, cnt2, isLL ? 64 : 32);
4254         compare_string_16_bytes_same(DIFF, DIFF2);
4255         __ subs(rscratch2, cnt2, largeLoopExitCondition);
4256         compare_string_16_bytes_same(DIFF, DIFF2);
4257         __ br(__ GT, LARGE_LOOP_PREFETCH);
4258         __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?

4259         // less than 16 bytes left?
4260         __ subs(cnt2, cnt2, isLL ? 16 : 8);
4261         __ br(__ LT, TAIL);
4262     }
4263     __ bind(SMALL_LOOP);
4264       compare_string_16_bytes_same(DIFF, DIFF2);
4265       __ subs(cnt2, cnt2, isLL ? 16 : 8);
4266       __ br(__ GE, SMALL_LOOP);
4267     __ bind(TAIL);
4268       __ adds(cnt2, cnt2, isLL ? 16 : 8);
4269       __ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
4270       __ subs(cnt2, cnt2, isLL ? 8 : 4);
4271       __ br(__ LE, CHECK_LAST);
4272       __ eor(rscratch2, tmp1, tmp2);
4273       __ cbnz(rscratch2, DIFF);
4274       __ ldr(tmp1, Address(__ post(str1, 8)));
4275       __ ldr(tmp2, Address(__ post(str2, 8)));
4276       __ sub(cnt2, cnt2, isLL ? 8 : 4);
4277     __ bind(CHECK_LAST);
4278       if (!isLL) {
4279         __ add(cnt2, cnt2, cnt2); // now in bytes
4280       }
4281       __ eor(rscratch2, tmp1, tmp2);
4282       __ cbnz(rscratch2, DIFF);


   1 /*
   2  * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.


4068     __ ldr(tmp3, Address(__ post(cnt1, 8)));
4069     __ umov(tmpL, vtmp, __ D, 1);
4070     __ eor(rscratch2, tmpU, tmpL);
4071     __ cbnz(rscratch2, DIFF1);
4072   }
4073 
4074   // r0  = result
4075   // r1  = str1
4076   // r2  = cnt1
4077   // r3  = str2
4078   // r4  = cnt2
4079   // r10 = tmp1
4080   // r11 = tmp2
4081   address generate_compare_long_string_different_encoding(bool isLU) {
4082     __ align(CodeEntryAlignment);
4083     StubCodeMark mark(this, "StubRoutines", isLU
4084         ? "compare_long_string_different_encoding LU"
4085         : "compare_long_string_different_encoding UL");
4086     address entry = __ pc();
4087     Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
4088         DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, NO_PREFETCH,
4089         LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
4090     Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
4091         tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
4092     FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
4093     RegSet spilled_regs = RegSet::of(tmp3, tmp4);
4094 
4095     int prefetchLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance/2);
4096 
4097     __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
4098     // cnt2 == amount of characters left to compare
4099     // Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL))
4100     __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4101     __ add(str1, str1, isLU ? wordSize/2 : wordSize);
4102     __ add(str2, str2, isLU ? wordSize : wordSize/2);
4103     __ fmovd(isLU ? tmp1 : tmp2, vtmp);
4104     __ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
4105     __ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1);
4106     __ eor(rscratch2, tmp1, tmp2);
4107     __ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0);
4108     __ mov(rscratch1, tmp2);
4109     __ cbnz(rscratch2, CALCULATE_DIFFERENCE);
4110     Register strU = isLU ? str2 : str1,
4111              strL = isLU ? str1 : str2,
4112              tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison
4113              tmpL = isLU ? tmp1 : rscratch1; // where to keep L for comparison
4114     __ push(spilled_regs, sp);
4115     __ sub(tmp2, strL, cnt2); // strL pointer to load from
4116     __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
4117 
4118     __ ldr(tmp3, Address(__ post(cnt1, 8)));
4119 
4120     if (SoftwarePrefetchHintDistance >= 0) {
4121       __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4122       __ br(__ LT, NO_PREFETCH);
4123       __ bind(LARGE_LOOP_PREFETCH);
4124         __ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
4125         __ mov(tmp4, 2);
4126         __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
4127         __ bind(LARGE_LOOP_PREFETCH_REPEAT1);
4128           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4129           __ subs(tmp4, tmp4, 1);
4130           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1);
4131           __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
4132           __ mov(tmp4, 2);
4133         __ bind(LARGE_LOOP_PREFETCH_REPEAT2);
4134           compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4135           __ subs(tmp4, tmp4, 1);
4136           __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
4137           __ sub(cnt2, cnt2, 64);
4138           __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
4139           __ br(__ GE, LARGE_LOOP_PREFETCH);
4140     }
4141     __ cbz(cnt2, LOAD_LAST); // no characters left except last load
4142     __ bind(NO_PREFETCH);
4143     __ subs(cnt2, cnt2, 16);
4144     __ br(__ LT, TAIL);

4145     __ bind(SMALL_LOOP); // smaller loop
4146       __ subs(cnt2, cnt2, 16);

4147       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
4148       __ br(__ GE, SMALL_LOOP);
4149       __ cmn(cnt2, (u1)16);
4150       __ br(__ EQ, LOAD_LAST);
4151     __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
4152       __ add(cnt1, cnt1, cnt2, __ LSL, 1); // Address of 8 bytes before last 4 characters in UTF-16 string
4153       __ add(tmp2, tmp2, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string
4154       __ ldr(tmp3, Address(cnt1, -8));
4155       compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); // last 16 characters before last load






























4156       __ b(LOAD_LAST);
4157     __ bind(DIFF2);
4158       __ mov(tmpU, tmp3);
4159     __ bind(DIFF1);
4160       __ pop(spilled_regs, sp);
4161       __ b(CALCULATE_DIFFERENCE);
4162     __ bind(LOAD_LAST);
4163       // Last 4 UTF-16 characters are already pre-loaded into tmp3 by compare_string_16_x_LU.
4164       // No need to load it again
4165       __ mov(tmpU, tmp3);
4166       __ pop(spilled_regs, sp);
4167 
4168       __ ldrs(vtmp, Address(strL));

4169       __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
4170       __ fmovd(tmpL, vtmp);
4171 
4172       __ eor(rscratch2, tmpU, tmpL);
4173       __ cbz(rscratch2, DONE);
4174 
4175     // Find the first different characters in the longwords and
4176     // compute their difference.
4177     __ bind(CALCULATE_DIFFERENCE);
4178       __ rev(rscratch2, rscratch2);
4179       __ clz(rscratch2, rscratch2);
4180       __ andr(rscratch2, rscratch2, -16);
4181       __ lsrv(tmp1, tmp1, rscratch2);
4182       __ uxthw(tmp1, tmp1);
4183       __ lsrv(rscratch1, rscratch1, rscratch2);
4184       __ uxthw(rscratch1, rscratch1);
4185       __ subw(result, tmp1, rscratch1);
4186     __ bind(DONE);
4187       __ ret(lr);
4188     return entry;


4210     // to prefetch memory behind array border
4211     int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
4212     // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
4213     // update cnt2 counter with already loaded 8 bytes
4214     __ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
4215     // update pointers, because of previous read
4216     __ add(str1, str1, wordSize);
4217     __ add(str2, str2, wordSize);
4218     if (SoftwarePrefetchHintDistance >= 0) {
4219       __ bind(LARGE_LOOP_PREFETCH);
4220         __ prfm(Address(str1, SoftwarePrefetchHintDistance));
4221         __ prfm(Address(str2, SoftwarePrefetchHintDistance));
4222         compare_string_16_bytes_same(DIFF, DIFF2);
4223         compare_string_16_bytes_same(DIFF, DIFF2);
4224         __ sub(cnt2, cnt2, isLL ? 64 : 32);
4225         compare_string_16_bytes_same(DIFF, DIFF2);
4226         __ subs(rscratch2, cnt2, largeLoopExitCondition);
4227         compare_string_16_bytes_same(DIFF, DIFF2);
4228         __ br(__ GT, LARGE_LOOP_PREFETCH);
4229         __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
4230     }
4231     // less than 16 bytes left?
4232     __ subs(cnt2, cnt2, isLL ? 16 : 8);
4233     __ br(__ LT, TAIL);

4234     __ bind(SMALL_LOOP);
4235       compare_string_16_bytes_same(DIFF, DIFF2);
4236       __ subs(cnt2, cnt2, isLL ? 16 : 8);
4237       __ br(__ GE, SMALL_LOOP);
4238     __ bind(TAIL);
4239       __ adds(cnt2, cnt2, isLL ? 16 : 8);
4240       __ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
4241       __ subs(cnt2, cnt2, isLL ? 8 : 4);
4242       __ br(__ LE, CHECK_LAST);
4243       __ eor(rscratch2, tmp1, tmp2);
4244       __ cbnz(rscratch2, DIFF);
4245       __ ldr(tmp1, Address(__ post(str1, 8)));
4246       __ ldr(tmp2, Address(__ post(str2, 8)));
4247       __ sub(cnt2, cnt2, isLL ? 8 : 4);
4248     __ bind(CHECK_LAST);
4249       if (!isLL) {
4250         __ add(cnt2, cnt2, cnt2); // now in bytes
4251       }
4252       __ eor(rscratch2, tmp1, tmp2);
4253       __ cbnz(rscratch2, DIFF);


< prev index next >