1 /* 2 * Copyright (c) 2016, Intel Corporation. 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/assembler.hpp" 28 #include "asm/assembler.inline.hpp" 29 #include "macroAssembler_x86.hpp" 30 31 // ofs and limit are used for multi-block byte array. 32 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 33 void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, 34 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, 35 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) { 36 37 Label start, done_hash, loop0; 38 39 address upper_word_mask = StubRoutines::x86::upper_word_mask_addr(); 40 address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr(); 41 42 bind(start); 43 movdqu(abcd, Address(state, 0)); 44 pinsrd(e0, Address(state, 16), 3); 45 movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000 46 pand(e0, shuf_mask); 47 pshufd(abcd, abcd, 0x1B); 48 movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f 49 50 bind(loop0); 51 // Save hash values for addition after rounds 52 movdqu(Address(rsp, 0), e0); 53 movdqu(Address(rsp, 16), abcd); 54 55 56 // Rounds 0 - 3 57 movdqu(msg0, Address(buf, 0)); 58 pshufb(msg0, shuf_mask); 59 paddd(e0, msg0); 60 movdqa(e1, abcd); 61 sha1rnds4(abcd, e0, 0); 62 63 // Rounds 4 - 7 64 movdqu(msg1, Address(buf, 16)); 65 pshufb(msg1, shuf_mask); 66 sha1nexte(e1, msg1); 67 movdqa(e0, abcd); 68 sha1rnds4(abcd, e1, 0); 69 sha1msg1(msg0, msg1); 70 71 // Rounds 8 - 11 72 movdqu(msg2, Address(buf, 32)); 73 pshufb(msg2, shuf_mask); 74 sha1nexte(e0, msg2); 75 movdqa(e1, abcd); 76 sha1rnds4(abcd, e0, 0); 77 sha1msg1(msg1, msg2); 78 pxor(msg0, msg2); 79 80 // Rounds 12 - 15 81 movdqu(msg3, Address(buf, 48)); 82 pshufb(msg3, shuf_mask); 83 sha1nexte(e1, msg3); 84 movdqa(e0, abcd); 85 sha1msg2(msg0, msg3); 86 sha1rnds4(abcd, e1, 0); 87 sha1msg1(msg2, msg3); 88 pxor(msg1, msg3); 89 90 // Rounds 16 - 19 91 sha1nexte(e0, msg0); 92 movdqa(e1, abcd); 93 sha1msg2(msg1, msg0); 94 sha1rnds4(abcd, e0, 0); 95 sha1msg1(msg3, msg0); 96 pxor(msg2, msg0); 97 98 // Rounds 20 - 23 99 sha1nexte(e1, msg1); 100 movdqa(e0, abcd); 101 sha1msg2(msg2, msg1); 102 sha1rnds4(abcd, e1, 1); 103 sha1msg1(msg0, msg1); 104 pxor(msg3, msg1); 105 106 // Rounds 24 - 27 107 sha1nexte(e0, msg2); 108 movdqa(e1, abcd); 109 sha1msg2(msg3, msg2); 110 sha1rnds4(abcd, e0, 1); 111 sha1msg1(msg1, msg2); 112 pxor(msg0, msg2); 113 114 // Rounds 28 - 31 115 sha1nexte(e1, msg3); 116 movdqa(e0, abcd); 117 sha1msg2(msg0, msg3); 118 sha1rnds4(abcd, e1, 1); 119 sha1msg1(msg2, msg3); 120 pxor(msg1, msg3); 121 122 // Rounds 32 - 35 123 sha1nexte(e0, msg0); 124 movdqa(e1, abcd); 125 sha1msg2(msg1, msg0); 126 sha1rnds4(abcd, e0, 1); 127 sha1msg1(msg3, msg0); 128 pxor(msg2, msg0); 129 130 // Rounds 36 - 39 131 sha1nexte(e1, msg1); 132 movdqa(e0, abcd); 133 sha1msg2(msg2, msg1); 134 sha1rnds4(abcd, e1, 1); 135 sha1msg1(msg0, msg1); 136 pxor(msg3, msg1); 137 138 // Rounds 40 - 43 139 sha1nexte(e0, msg2); 140 movdqa(e1, abcd); 141 sha1msg2(msg3, msg2); 142 sha1rnds4(abcd, e0, 2); 143 sha1msg1(msg1, msg2); 144 pxor(msg0, msg2); 145 146 // Rounds 44 - 47 147 sha1nexte(e1, msg3); 148 movdqa(e0, abcd); 149 sha1msg2(msg0, msg3); 150 sha1rnds4(abcd, e1, 2); 151 sha1msg1(msg2, msg3); 152 pxor(msg1, msg3); 153 154 // Rounds 48 - 51 155 sha1nexte(e0, msg0); 156 movdqa(e1, abcd); 157 sha1msg2(msg1, msg0); 158 sha1rnds4(abcd, e0, 2); 159 sha1msg1(msg3, msg0); 160 pxor(msg2, msg0); 161 162 // Rounds 52 - 55 163 sha1nexte(e1, msg1); 164 movdqa(e0, abcd); 165 sha1msg2(msg2, msg1); 166 sha1rnds4(abcd, e1, 2); 167 sha1msg1(msg0, msg1); 168 pxor(msg3, msg1); 169 170 // Rounds 56 - 59 171 sha1nexte(e0, msg2); 172 movdqa(e1, abcd); 173 sha1msg2(msg3, msg2); 174 sha1rnds4(abcd, e0, 2); 175 sha1msg1(msg1, msg2); 176 pxor(msg0, msg2); 177 178 // Rounds 60 - 63 179 sha1nexte(e1, msg3); 180 movdqa(e0, abcd); 181 sha1msg2(msg0, msg3); 182 sha1rnds4(abcd, e1, 3); 183 sha1msg1(msg2, msg3); 184 pxor(msg1, msg3); 185 186 // Rounds 64 - 67 187 sha1nexte(e0, msg0); 188 movdqa(e1, abcd); 189 sha1msg2(msg1, msg0); 190 sha1rnds4(abcd, e0, 3); 191 sha1msg1(msg3, msg0); 192 pxor(msg2, msg0); 193 194 // Rounds 68 - 71 195 sha1nexte(e1, msg1); 196 movdqa(e0, abcd); 197 sha1msg2(msg2, msg1); 198 sha1rnds4(abcd, e1, 3); 199 pxor(msg3, msg1); 200 201 // Rounds 72 - 75 202 sha1nexte(e0, msg2); 203 movdqa(e1, abcd); 204 sha1msg2(msg3, msg2); 205 sha1rnds4(abcd, e0, 3); 206 207 // Rounds 76 - 79 208 sha1nexte(e1, msg3); 209 movdqa(e0, abcd); 210 sha1rnds4(abcd, e1, 3); 211 212 // add current hash values with previously saved 213 movdqu(msg0, Address(rsp, 0)); 214 sha1nexte(e0, msg0); 215 movdqu(msg0, Address(rsp, 16)); 216 paddd(abcd, msg0); 217 218 if (multi_block) { 219 // increment data pointer and loop if more to process 220 addptr(buf, 64); 221 addptr(ofs, 64); 222 cmpptr(ofs, limit); 223 jcc(Assembler::belowEqual, loop0); 224 movptr(rax, ofs); //return ofs 225 } 226 // write hash values back in the correct order 227 pshufd(abcd, abcd, 0x1b); 228 movdqu(Address(state, 0), abcd); 229 pextrd(Address(state, 16), e0, 3); 230 231 bind(done_hash); 232 233 } 234 235 // xmm0 (msg) is used as an implicit argument to sh256rnds2 236 // and state0 and state1 can never use xmm0 register. 237 // ofs and limit are used for multi-block byte array. 238 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 239 void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, 240 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, 241 Register buf, Register state, Register ofs, Register limit, Register rsp, 242 bool multi_block LP64_ONLY(COMMA XMMRegister shuf_mask)) { 243 Label start, done_hash, loop0; 244 245 address K256 = StubRoutines::x86::k256_addr(); 246 address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr(); 247 248 bind(start); 249 movdqu(state0, Address(state, 0)); 250 movdqu(state1, Address(state, 16)); 251 252 pshufd(state0, state0, 0xB1); 253 pshufd(state1, state1, 0x1B); 254 movdqa(msgtmp4, state0); 255 palignr(state0, state1, 8); 256 pblendw(state1, msgtmp4, 0xF0); 257 258 #ifdef _LP64 259 movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask)); 260 #endif 261 lea(rax, ExternalAddress(K256)); 262 263 bind(loop0); 264 movdqu(Address(rsp, 0), state0); 265 movdqu(Address(rsp, 16), state1); 266 267 // Rounds 0-3 268 movdqu(msg, Address(buf, 0)); 269 #ifdef _LP64 270 pshufb(msg, shuf_mask); 271 #else 272 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 273 #endif 274 movdqa(msgtmp0, msg); 275 paddd(msg, Address(rax, 0)); 276 sha256rnds2(state1, state0); 277 pshufd(msg, msg, 0x0E); 278 sha256rnds2(state0, state1); 279 280 // Rounds 4-7 281 movdqu(msg, Address(buf, 16)); 282 #ifdef _LP64 283 pshufb(msg, shuf_mask); 284 #else 285 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 286 #endif 287 movdqa(msgtmp1, msg); 288 paddd(msg, Address(rax, 16)); 289 sha256rnds2(state1, state0); 290 pshufd(msg, msg, 0x0E); 291 sha256rnds2(state0, state1); 292 sha256msg1(msgtmp0, msgtmp1); 293 294 // Rounds 8-11 295 movdqu(msg, Address(buf, 32)); 296 #ifdef _LP64 297 pshufb(msg, shuf_mask); 298 #else 299 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 300 #endif 301 movdqa(msgtmp2, msg); 302 paddd(msg, Address(rax, 32)); 303 sha256rnds2(state1, state0); 304 pshufd(msg, msg, 0x0E); 305 sha256rnds2(state0, state1); 306 sha256msg1(msgtmp1, msgtmp2); 307 308 // Rounds 12-15 309 movdqu(msg, Address(buf, 48)); 310 #ifdef _LP64 311 pshufb(msg, shuf_mask); 312 #else 313 pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); 314 #endif 315 movdqa(msgtmp3, msg); 316 paddd(msg, Address(rax, 48)); 317 sha256rnds2(state1, state0); 318 movdqa(msgtmp4, msgtmp3); 319 palignr(msgtmp4, msgtmp2, 4); 320 paddd(msgtmp0, msgtmp4); 321 sha256msg2(msgtmp0, msgtmp3); 322 pshufd(msg, msg, 0x0E); 323 sha256rnds2(state0, state1); 324 sha256msg1(msgtmp2, msgtmp3); 325 326 // Rounds 16-19 327 movdqa(msg, msgtmp0); 328 paddd(msg, Address(rax, 64)); 329 sha256rnds2(state1, state0); 330 movdqa(msgtmp4, msgtmp0); 331 palignr(msgtmp4, msgtmp3, 4); 332 paddd(msgtmp1, msgtmp4); 333 sha256msg2(msgtmp1, msgtmp0); 334 pshufd(msg, msg, 0x0E); 335 sha256rnds2(state0, state1); 336 sha256msg1(msgtmp3, msgtmp0); 337 338 // Rounds 20-23 339 movdqa(msg, msgtmp1); 340 paddd(msg, Address(rax, 80)); 341 sha256rnds2(state1, state0); 342 movdqa(msgtmp4, msgtmp1); 343 palignr(msgtmp4, msgtmp0, 4); 344 paddd(msgtmp2, msgtmp4); 345 sha256msg2(msgtmp2, msgtmp1); 346 pshufd(msg, msg, 0x0E); 347 sha256rnds2(state0, state1); 348 sha256msg1(msgtmp0, msgtmp1); 349 350 // Rounds 24-27 351 movdqa(msg, msgtmp2); 352 paddd(msg, Address(rax, 96)); 353 sha256rnds2(state1, state0); 354 movdqa(msgtmp4, msgtmp2); 355 palignr(msgtmp4, msgtmp1, 4); 356 paddd(msgtmp3, msgtmp4); 357 sha256msg2(msgtmp3, msgtmp2); 358 pshufd(msg, msg, 0x0E); 359 sha256rnds2(state0, state1); 360 sha256msg1(msgtmp1, msgtmp2); 361 362 // Rounds 28-31 363 movdqa(msg, msgtmp3); 364 paddd(msg, Address(rax, 112)); 365 sha256rnds2(state1, state0); 366 movdqa(msgtmp4, msgtmp3); 367 palignr(msgtmp4, msgtmp2, 4); 368 paddd(msgtmp0, msgtmp4); 369 sha256msg2(msgtmp0, msgtmp3); 370 pshufd(msg, msg, 0x0E); 371 sha256rnds2(state0, state1); 372 sha256msg1(msgtmp2, msgtmp3); 373 374 // Rounds 32-35 375 movdqa(msg, msgtmp0); 376 paddd(msg, Address(rax, 128)); 377 sha256rnds2(state1, state0); 378 movdqa(msgtmp4, msgtmp0); 379 palignr(msgtmp4, msgtmp3, 4); 380 paddd(msgtmp1, msgtmp4); 381 sha256msg2(msgtmp1, msgtmp0); 382 pshufd(msg, msg, 0x0E); 383 sha256rnds2(state0, state1); 384 sha256msg1(msgtmp3, msgtmp0); 385 386 // Rounds 36-39 387 movdqa(msg, msgtmp1); 388 paddd(msg, Address(rax, 144)); 389 sha256rnds2(state1, state0); 390 movdqa(msgtmp4, msgtmp1); 391 palignr(msgtmp4, msgtmp0, 4); 392 paddd(msgtmp2, msgtmp4); 393 sha256msg2(msgtmp2, msgtmp1); 394 pshufd(msg, msg, 0x0E); 395 sha256rnds2(state0, state1); 396 sha256msg1(msgtmp0, msgtmp1); 397 398 // Rounds 40-43 399 movdqa(msg, msgtmp2); 400 paddd(msg, Address(rax, 160)); 401 sha256rnds2(state1, state0); 402 movdqa(msgtmp4, msgtmp2); 403 palignr(msgtmp4, msgtmp1, 4); 404 paddd(msgtmp3, msgtmp4); 405 sha256msg2(msgtmp3, msgtmp2); 406 pshufd(msg, msg, 0x0E); 407 sha256rnds2(state0, state1); 408 sha256msg1(msgtmp1, msgtmp2); 409 410 // Rounds 44-47 411 movdqa(msg, msgtmp3); 412 paddd(msg, Address(rax, 176)); 413 sha256rnds2(state1, state0); 414 movdqa(msgtmp4, msgtmp3); 415 palignr(msgtmp4, msgtmp2, 4); 416 paddd(msgtmp0, msgtmp4); 417 sha256msg2(msgtmp0, msgtmp3); 418 pshufd(msg, msg, 0x0E); 419 sha256rnds2(state0, state1); 420 sha256msg1(msgtmp2, msgtmp3); 421 422 // Rounds 48-51 423 movdqa(msg, msgtmp0); 424 paddd(msg, Address(rax, 192)); 425 sha256rnds2(state1, state0); 426 movdqa(msgtmp4, msgtmp0); 427 palignr(msgtmp4, msgtmp3, 4); 428 paddd(msgtmp1, msgtmp4); 429 sha256msg2(msgtmp1, msgtmp0); 430 pshufd(msg, msg, 0x0E); 431 sha256rnds2(state0, state1); 432 sha256msg1(msgtmp3, msgtmp0); 433 434 // Rounds 52-55 435 movdqa(msg, msgtmp1); 436 paddd(msg, Address(rax, 208)); 437 sha256rnds2(state1, state0); 438 movdqa(msgtmp4, msgtmp1); 439 palignr(msgtmp4, msgtmp0, 4); 440 paddd(msgtmp2, msgtmp4); 441 sha256msg2(msgtmp2, msgtmp1); 442 pshufd(msg, msg, 0x0E); 443 sha256rnds2(state0, state1); 444 445 // Rounds 56-59 446 movdqa(msg, msgtmp2); 447 paddd(msg, Address(rax, 224)); 448 sha256rnds2(state1, state0); 449 movdqa(msgtmp4, msgtmp2); 450 palignr(msgtmp4, msgtmp1, 4); 451 paddd(msgtmp3, msgtmp4); 452 sha256msg2(msgtmp3, msgtmp2); 453 pshufd(msg, msg, 0x0E); 454 sha256rnds2(state0, state1); 455 456 // Rounds 60-63 457 movdqa(msg, msgtmp3); 458 paddd(msg, Address(rax, 240)); 459 sha256rnds2(state1, state0); 460 pshufd(msg, msg, 0x0E); 461 sha256rnds2(state0, state1); 462 movdqu(msg, Address(rsp, 0)); 463 paddd(state0, msg); 464 movdqu(msg, Address(rsp, 16)); 465 paddd(state1, msg); 466 467 if (multi_block) { 468 // increment data pointer and loop if more to process 469 addptr(buf, 64); 470 addptr(ofs, 64); 471 cmpptr(ofs, limit); 472 jcc(Assembler::belowEqual, loop0); 473 movptr(rax, ofs); //return ofs 474 } 475 476 pshufd(state0, state0, 0x1B); 477 pshufd(state1, state1, 0xB1); 478 movdqa(msgtmp4, state0); 479 pblendw(state0, state1, 0xF0); 480 palignr(state1, msgtmp4, 8); 481 482 movdqu(Address(state, 0), state0); 483 movdqu(Address(state, 16), state1); 484 485 bind(done_hash); 486 487 }