1 /* 2 * Copyright (c) 2016, Intel Corporation. 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/assembler.hpp" 28 #include "asm/assembler.inline.hpp" 29 #include "runtime/stubRoutines.hpp" 30 #include "macroAssembler_x86.hpp" 31 32 // ofs and limit are used for multi-block byte array. 33 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 34 void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, 35 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, 36 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) { 37 38 Label start, done_hash, loop0; 39 40 address upper_word_mask = StubRoutines::x86::upper_word_mask_addr(); 41 address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr(); 42 43 bind(start); 44 movdqu(abcd, Address(state, 0)); 45 pinsrd(e0, Address(state, 16), 3); 46 movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000 47 pand(e0, shuf_mask); 48 pshufd(abcd, abcd, 0x1B); 49 movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f 50 51 bind(loop0); 52 // Save hash values for addition after rounds 53 movdqu(Address(rsp, 0), e0); 54 movdqu(Address(rsp, 16), abcd); 55 56 57 // Rounds 0 - 3 58 movdqu(msg0, Address(buf, 0)); 59 pshufb(msg0, shuf_mask); 60 paddd(e0, msg0); 61 movdqa(e1, abcd); 62 sha1rnds4(abcd, e0, 0); 63 64 // Rounds 4 - 7 65 movdqu(msg1, Address(buf, 16)); 66 pshufb(msg1, shuf_mask); 67 sha1nexte(e1, msg1); 68 movdqa(e0, abcd); 69 sha1rnds4(abcd, e1, 0); 70 sha1msg1(msg0, msg1); 71 72 // Rounds 8 - 11 73 movdqu(msg2, Address(buf, 32)); 74 pshufb(msg2, shuf_mask); 75 sha1nexte(e0, msg2); 76 movdqa(e1, abcd); 77 sha1rnds4(abcd, e0, 0); 78 sha1msg1(msg1, msg2); 79 pxor(msg0, msg2); 80 81 // Rounds 12 - 15 82 movdqu(msg3, Address(buf, 48)); 83 pshufb(msg3, shuf_mask); 84 sha1nexte(e1, msg3); 85 movdqa(e0, abcd); 86 sha1msg2(msg0, msg3); 87 sha1rnds4(abcd, e1, 0); 88 sha1msg1(msg2, msg3); 89 pxor(msg1, msg3); 90 91 // Rounds 16 - 19 92 sha1nexte(e0, msg0); 93 movdqa(e1, abcd); 94 sha1msg2(msg1, msg0); 95 sha1rnds4(abcd, e0, 0); 96 sha1msg1(msg3, msg0); 97 pxor(msg2, msg0); 98 99 // Rounds 20 - 23 100 sha1nexte(e1, msg1); 101 movdqa(e0, abcd); 102 sha1msg2(msg2, msg1); 103 sha1rnds4(abcd, e1, 1); 104 sha1msg1(msg0, msg1); 105 pxor(msg3, msg1); 106 107 // Rounds 24 - 27 108 sha1nexte(e0, msg2); 109 movdqa(e1, abcd); 110 sha1msg2(msg3, msg2); 111 sha1rnds4(abcd, e0, 1); 112 sha1msg1(msg1, msg2); 113 pxor(msg0, msg2); 114 115 // Rounds 28 - 31 116 sha1nexte(e1, msg3); 117 movdqa(e0, abcd); 118 sha1msg2(msg0, msg3); 119 sha1rnds4(abcd, e1, 1); 120 sha1msg1(msg2, msg3); 121 pxor(msg1, msg3); 122 123 // Rounds 32 - 35 124 sha1nexte(e0, msg0); 125 movdqa(e1, abcd); 126 sha1msg2(msg1, msg0); 127 sha1rnds4(abcd, e0, 1); 128 sha1msg1(msg3, msg0); 129 pxor(msg2, msg0); 130 131 // Rounds 36 - 39 132 sha1nexte(e1, msg1); 133 movdqa(e0, abcd); 134 sha1msg2(msg2, msg1); 135 sha1rnds4(abcd, e1, 1); 136 sha1msg1(msg0, msg1); 137 pxor(msg3, msg1); 138 139 // Rounds 40 - 43 140 sha1nexte(e0, msg2); 141 movdqa(e1, abcd); 142 sha1msg2(msg3, msg2); 143 sha1rnds4(abcd, e0, 2); 144 sha1msg1(msg1, msg2); 145 pxor(msg0, msg2); 146 147 // Rounds 44 - 47 148 sha1nexte(e1, msg3); 149 movdqa(e0, abcd); 150 sha1msg2(msg0, msg3); 151 sha1rnds4(abcd, e1, 2); 152 sha1msg1(msg2, msg3); 153 pxor(msg1, msg3); 154 155 // Rounds 48 - 51 156 sha1nexte(e0, msg0); 157 movdqa(e1, abcd); 158 sha1msg2(msg1, msg0); 159 sha1rnds4(abcd, e0, 2); 160 sha1msg1(msg3, msg0); 161 pxor(msg2, msg0); 162 163 // Rounds 52 - 55 164 sha1nexte(e1, msg1); 165 movdqa(e0, abcd); 166 sha1msg2(msg2, msg1); 167 sha1rnds4(abcd, e1, 2); 168 sha1msg1(msg0, msg1); 169 pxor(msg3, msg1); 170 171 // Rounds 56 - 59 172 sha1nexte(e0, msg2); 173 movdqa(e1, abcd); 174 sha1msg2(msg3, msg2); 175 sha1rnds4(abcd, e0, 2); 176 sha1msg1(msg1, msg2); 177 pxor(msg0, msg2); 178 179 // Rounds 60 - 63 180 sha1nexte(e1, msg3); 181 movdqa(e0, abcd); 182 sha1msg2(msg0, msg3); 183 sha1rnds4(abcd, e1, 3); 184 sha1msg1(msg2, msg3); 185 pxor(msg1, msg3); 186 187 // Rounds 64 - 67 188 sha1nexte(e0, msg0); 189 movdqa(e1, abcd); 190 sha1msg2(msg1, msg0); 191 sha1rnds4(abcd, e0, 3); 192 sha1msg1(msg3, msg0); 193 pxor(msg2, msg0); 194 195 // Rounds 68 - 71 196 sha1nexte(e1, msg1); 197 movdqa(e0, abcd); 198 sha1msg2(msg2, msg1); 199 sha1rnds4(abcd, e1, 3); 200 pxor(msg3, msg1); 201 202 // Rounds 72 - 75 203 sha1nexte(e0, msg2); 204 movdqa(e1, abcd); 205 sha1msg2(msg3, msg2); 206 sha1rnds4(abcd, e0, 3); 207 208 // Rounds 76 - 79 209 sha1nexte(e1, msg3); 210 movdqa(e0, abcd); 211 sha1rnds4(abcd, e1, 3); 212 213 // add current hash values with previously saved 214 movdqu(msg0, Address(rsp, 0)); 215 sha1nexte(e0, msg0); 216 movdqu(msg0, Address(rsp, 16)); 217 paddd(abcd, msg0); 218 219 if (multi_block) { 220 // increment data pointer and loop if more to process 221 addptr(buf, 64); 222 addptr(ofs, 64); 223 cmpptr(ofs, limit); 224 jcc(Assembler::belowEqual, loop0); 225 movptr(rax, ofs); //return ofs 226 } 227 // write hash values back in the correct order 228 pshufd(abcd, abcd, 0x1b); 229 movdqu(Address(state, 0), abcd); 230 pextrd(Address(state, 16), e0, 3); 231 232 bind(done_hash); 233 234 } 235 236 // xmm0 (msg) is used as an implicit argument to sh256rnds2 237 // and state0 and state1 can never use xmm0 register. 238 // ofs and limit are used for multi-block byte array. 239 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 240 #ifdef _LP64 241 void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 242 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 243 Register buf, Register state, Register ofs, Register limit, Register rsp, 244 bool multi_block, XMMRegister shuf_mask) { 245 #else 246 void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 247 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 248 Register buf, Register state, Register ofs, Register limit, Register rsp, 249 bool multi_block) { 250 #endif 251 Label start, done_hash, loop0; 252 253 address K256 = StubRoutines::x86::k256_addr(); 254 address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr(); 255 256 bind(start); 257 movdqu(state0, Address(state, 0)); 258 movdqu(state1, Address(state, 16)); 259 260 pshufd(state0, state0, 0xB1); 261 pshufd(state1, state1, 0x1B); 262 movdqa(msgtmp4, state0); 263 palignr(state0, state1, 8); 264 pblendw(state1, msgtmp4, 0xF0); 265 266 #ifdef _LP64 267 movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask)); 268 #endif 269 lea(rax, ExternalAddress(K256)); 270 271 bind(loop0); 272 movdqu(Address(rsp, 0), state0); 273 movdqu(Address(rsp, 16), state1); 274 275 // Rounds 0-3 276 movdqu(msg, Address(buf, 0)); 277 #ifdef _LP64 278 pshufb(msg, shuf_mask); 279 #else 280 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 281 #endif 282 movdqa(msgtmp0, msg); 283 paddd(msg, Address(rax, 0)); 284 sha256rnds2(state1, state0); 285 pshufd(msg, msg, 0x0E); 286 sha256rnds2(state0, state1); 287 288 // Rounds 4-7 289 movdqu(msg, Address(buf, 16)); 290 #ifdef _LP64 291 pshufb(msg, shuf_mask); 292 #else 293 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 294 #endif 295 movdqa(msgtmp1, msg); 296 paddd(msg, Address(rax, 16)); 297 sha256rnds2(state1, state0); 298 pshufd(msg, msg, 0x0E); 299 sha256rnds2(state0, state1); 300 sha256msg1(msgtmp0, msgtmp1); 301 302 // Rounds 8-11 303 movdqu(msg, Address(buf, 32)); 304 #ifdef _LP64 305 pshufb(msg, shuf_mask); 306 #else 307 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 308 #endif 309 movdqa(msgtmp2, msg); 310 paddd(msg, Address(rax, 32)); 311 sha256rnds2(state1, state0); 312 pshufd(msg, msg, 0x0E); 313 sha256rnds2(state0, state1); 314 sha256msg1(msgtmp1, msgtmp2); 315 316 // Rounds 12-15 317 movdqu(msg, Address(buf, 48)); 318 #ifdef _LP64 319 pshufb(msg, shuf_mask); 320 #else 321 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 322 #endif 323 movdqa(msgtmp3, msg); 324 paddd(msg, Address(rax, 48)); 325 sha256rnds2(state1, state0); 326 movdqa(msgtmp4, msgtmp3); 327 palignr(msgtmp4, msgtmp2, 4); 328 paddd(msgtmp0, msgtmp4); 329 sha256msg2(msgtmp0, msgtmp3); 330 pshufd(msg, msg, 0x0E); 331 sha256rnds2(state0, state1); 332 sha256msg1(msgtmp2, msgtmp3); 333 334 // Rounds 16-19 335 movdqa(msg, msgtmp0); 336 paddd(msg, Address(rax, 64)); 337 sha256rnds2(state1, state0); 338 movdqa(msgtmp4, msgtmp0); 339 palignr(msgtmp4, msgtmp3, 4); 340 paddd(msgtmp1, msgtmp4); 341 sha256msg2(msgtmp1, msgtmp0); 342 pshufd(msg, msg, 0x0E); 343 sha256rnds2(state0, state1); 344 sha256msg1(msgtmp3, msgtmp0); 345 346 // Rounds 20-23 347 movdqa(msg, msgtmp1); 348 paddd(msg, Address(rax, 80)); 349 sha256rnds2(state1, state0); 350 movdqa(msgtmp4, msgtmp1); 351 palignr(msgtmp4, msgtmp0, 4); 352 paddd(msgtmp2, msgtmp4); 353 sha256msg2(msgtmp2, msgtmp1); 354 pshufd(msg, msg, 0x0E); 355 sha256rnds2(state0, state1); 356 sha256msg1(msgtmp0, msgtmp1); 357 358 // Rounds 24-27 359 movdqa(msg, msgtmp2); 360 paddd(msg, Address(rax, 96)); 361 sha256rnds2(state1, state0); 362 movdqa(msgtmp4, msgtmp2); 363 palignr(msgtmp4, msgtmp1, 4); 364 paddd(msgtmp3, msgtmp4); 365 sha256msg2(msgtmp3, msgtmp2); 366 pshufd(msg, msg, 0x0E); 367 sha256rnds2(state0, state1); 368 sha256msg1(msgtmp1, msgtmp2); 369 370 // Rounds 28-31 371 movdqa(msg, msgtmp3); 372 paddd(msg, Address(rax, 112)); 373 sha256rnds2(state1, state0); 374 movdqa(msgtmp4, msgtmp3); 375 palignr(msgtmp4, msgtmp2, 4); 376 paddd(msgtmp0, msgtmp4); 377 sha256msg2(msgtmp0, msgtmp3); 378 pshufd(msg, msg, 0x0E); 379 sha256rnds2(state0, state1); 380 sha256msg1(msgtmp2, msgtmp3); 381 382 // Rounds 32-35 383 movdqa(msg, msgtmp0); 384 paddd(msg, Address(rax, 128)); 385 sha256rnds2(state1, state0); 386 movdqa(msgtmp4, msgtmp0); 387 palignr(msgtmp4, msgtmp3, 4); 388 paddd(msgtmp1, msgtmp4); 389 sha256msg2(msgtmp1, msgtmp0); 390 pshufd(msg, msg, 0x0E); 391 sha256rnds2(state0, state1); 392 sha256msg1(msgtmp3, msgtmp0); 393 394 // Rounds 36-39 395 movdqa(msg, msgtmp1); 396 paddd(msg, Address(rax, 144)); 397 sha256rnds2(state1, state0); 398 movdqa(msgtmp4, msgtmp1); 399 palignr(msgtmp4, msgtmp0, 4); 400 paddd(msgtmp2, msgtmp4); 401 sha256msg2(msgtmp2, msgtmp1); 402 pshufd(msg, msg, 0x0E); 403 sha256rnds2(state0, state1); 404 sha256msg1(msgtmp0, msgtmp1); 405 406 // Rounds 40-43 407 movdqa(msg, msgtmp2); 408 paddd(msg, Address(rax, 160)); 409 sha256rnds2(state1, state0); 410 movdqa(msgtmp4, msgtmp2); 411 palignr(msgtmp4, msgtmp1, 4); 412 paddd(msgtmp3, msgtmp4); 413 sha256msg2(msgtmp3, msgtmp2); 414 pshufd(msg, msg, 0x0E); 415 sha256rnds2(state0, state1); 416 sha256msg1(msgtmp1, msgtmp2); 417 418 // Rounds 44-47 419 movdqa(msg, msgtmp3); 420 paddd(msg, Address(rax, 176)); 421 sha256rnds2(state1, state0); 422 movdqa(msgtmp4, msgtmp3); 423 palignr(msgtmp4, msgtmp2, 4); 424 paddd(msgtmp0, msgtmp4); 425 sha256msg2(msgtmp0, msgtmp3); 426 pshufd(msg, msg, 0x0E); 427 sha256rnds2(state0, state1); 428 sha256msg1(msgtmp2, msgtmp3); 429 430 // Rounds 48-51 431 movdqa(msg, msgtmp0); 432 paddd(msg, Address(rax, 192)); 433 sha256rnds2(state1, state0); 434 movdqa(msgtmp4, msgtmp0); 435 palignr(msgtmp4, msgtmp3, 4); 436 paddd(msgtmp1, msgtmp4); 437 sha256msg2(msgtmp1, msgtmp0); 438 pshufd(msg, msg, 0x0E); 439 sha256rnds2(state0, state1); 440 sha256msg1(msgtmp3, msgtmp0); 441 442 // Rounds 52-55 443 movdqa(msg, msgtmp1); 444 paddd(msg, Address(rax, 208)); 445 sha256rnds2(state1, state0); 446 movdqa(msgtmp4, msgtmp1); 447 palignr(msgtmp4, msgtmp0, 4); 448 paddd(msgtmp2, msgtmp4); 449 sha256msg2(msgtmp2, msgtmp1); 450 pshufd(msg, msg, 0x0E); 451 sha256rnds2(state0, state1); 452 453 // Rounds 56-59 454 movdqa(msg, msgtmp2); 455 paddd(msg, Address(rax, 224)); 456 sha256rnds2(state1, state0); 457 movdqa(msgtmp4, msgtmp2); 458 palignr(msgtmp4, msgtmp1, 4); 459 paddd(msgtmp3, msgtmp4); 460 sha256msg2(msgtmp3, msgtmp2); 461 pshufd(msg, msg, 0x0E); 462 sha256rnds2(state0, state1); 463 464 // Rounds 60-63 465 movdqa(msg, msgtmp3); 466 paddd(msg, Address(rax, 240)); 467 sha256rnds2(state1, state0); 468 pshufd(msg, msg, 0x0E); 469 sha256rnds2(state0, state1); 470 movdqu(msg, Address(rsp, 0)); 471 paddd(state0, msg); 472 movdqu(msg, Address(rsp, 16)); 473 paddd(state1, msg); 474 475 if (multi_block) { 476 // increment data pointer and loop if more to process 477 addptr(buf, 64); 478 addptr(ofs, 64); 479 cmpptr(ofs, limit); 480 jcc(Assembler::belowEqual, loop0); 481 movptr(rax, ofs); //return ofs 482 } 483 484 pshufd(state0, state0, 0x1B); 485 pshufd(state1, state1, 0xB1); 486 movdqa(msgtmp4, state0); 487 pblendw(state0, state1, 0xF0); 488 palignr(state1, msgtmp4, 8); 489 490 movdqu(Address(state, 0), state0); 491 movdqu(Address(state, 16), state1); 492 493 bind(done_hash); 494 495 }