1 /* 2 * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP 27 #define OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP 28 29 #include "runtime/atomic.hpp" 30 #include "runtime/os.hpp" 31 #include "vm_version_s390.hpp" 32 33 // Note that the compare-and-swap instructions on System z perform 34 // a serialization function before the storage operand is fetched 35 // and again after the operation is completed. 36 // 37 // Used constraint modifiers: 38 // = write-only access: Value on entry to inline-assembler code irrelevant. 39 // + read/write access: Value on entry is used; on exit value is changed. 40 // read-only access: Value on entry is used and never changed. 41 // & early-clobber access: Might be modified before all read-only operands 42 // have been used. 43 // a address register operand (not GR0). 44 // d general register operand (including GR0) 45 // Q memory operand w/o index register. 46 // 0..9 operand reference (by operand position). 47 // Used for operands that fill multiple roles. One example would be a 48 // write-only operand receiving its initial value from a read-only operand. 49 // Refer to cmpxchg(..) operand #0 and variable cmp_val for a real-life example. 50 // 51 52 // On System z, all store operations are atomic if the address where the data is stored into 53 // is an integer multiple of the data length. Furthermore, all stores are ordered: 54 // a store which occurs conceptually before another store becomes visible to other CPUs 55 // before the other store becomes visible. 56 57 58 //------------ 59 // Atomic::add 60 //------------ 61 // These methods force the value in memory to be augmented by the passed increment. 62 // Both, memory value and increment, are treated as 32bit signed binary integers. 63 // No overflow exceptions are recognized, and the condition code does not hold 64 // information about the value in memory. 65 // 66 // The value in memory is updated by using a compare-and-swap instruction. The 67 // instruction is retried as often as required. 68 // 69 // The return value of the method is the value that was successfully stored. At the 70 // time the caller receives back control, the value in memory may have changed already. 71 72 template <> 73 inline int32_t Atomic::specialized_add<int32_t>(int32_t inc, volatile int32_t* dest) { 74 unsigned int old, upd; 75 76 if (VM_Version::has_LoadAndALUAtomicV1()) { 77 __asm__ __volatile__ ( 78 " LGFR 0,%[inc] \n\t" // save increment 79 " LA 3,%[mem] \n\t" // force data address into ARG2 80 // " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value 81 // " LAA 2,0,0(3) \n\t" // actually coded instruction 82 " .byte 0xeb \n\t" // LAA main opcode 83 " .byte 0x20 \n\t" // R1,R3 84 " .byte 0x30 \n\t" // R2,disp1 85 " .byte 0x00 \n\t" // disp2,disp3 86 " .byte 0x00 \n\t" // disp4,disp5 87 " .byte 0xf8 \n\t" // LAA minor opcode 88 " AR 2,0 \n\t" // calc new value in register 89 " LR %[upd],2 \n\t" // move to result register 90 //---< outputs >--- 91 : [upd] "=&d" (upd) // write-only, updated counter value 92 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 93 //---< inputs >--- 94 : [inc] "a" (inc) // read-only. 95 //---< clobbered >--- 96 : "cc", "r0", "r2", "r3" 97 ); 98 } else { 99 __asm__ __volatile__ ( 100 " LLGF %[old],%[mem] \n\t" // get old value 101 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result 102 " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 103 " JNE 0b \n\t" // no success? -> retry 104 //---< outputs >--- 105 : [old] "=&a" (old) // write-only, old counter value 106 , [upd] "=&d" (upd) // write-only, updated counter value 107 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 108 //---< inputs >--- 109 : [inc] "a" (inc) // read-only. 110 //---< clobbered >--- 111 : "cc" 112 ); 113 } 114 115 return (int32_t)upd; 116 } 117 118 119 template <> 120 inline int64_t Atomic::specialized_add<int64_t>(int64_t inc, volatile int64_t* dest) { 121 unsigned long old, upd; 122 123 if (VM_Version::has_LoadAndALUAtomicV1()) { 124 __asm__ __volatile__ ( 125 " LGR 0,%[inc] \n\t" // save increment 126 " LA 3,%[mem] \n\t" // force data address into ARG2 127 // " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value 128 // " LAAG 2,0,0(3) \n\t" // actually coded instruction 129 " .byte 0xeb \n\t" // LAA main opcode 130 " .byte 0x20 \n\t" // R1,R3 131 " .byte 0x30 \n\t" // R2,disp1 132 " .byte 0x00 \n\t" // disp2,disp3 133 " .byte 0x00 \n\t" // disp4,disp5 134 " .byte 0xe8 \n\t" // LAA minor opcode 135 " AGR 2,0 \n\t" // calc new value in register 136 " LGR %[upd],2 \n\t" // move to result register 137 //---< outputs >--- 138 : [upd] "=&d" (upd) // write-only, updated counter value 139 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 140 //---< inputs >--- 141 : [inc] "a" (inc) // read-only. 142 //---< clobbered >--- 143 : "cc", "r0", "r2", "r3" 144 ); 145 } else { 146 __asm__ __volatile__ ( 147 " LG %[old],%[mem] \n\t" // get old value 148 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result 149 " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 150 " JNE 0b \n\t" // no success? -> retry 151 //---< outputs >--- 152 : [old] "=&a" (old) // write-only, old counter value 153 , [upd] "=&d" (upd) // write-only, updated counter value 154 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 155 //---< inputs >--- 156 : [inc] "a" (inc) // read-only. 157 //---< clobbered >--- 158 : "cc" 159 ); 160 } 161 162 return (int64_t)upd; 163 } 164 165 //------------ 166 167 // Atomic::inc 168 //------------ 169 // These methods force the value in memory to be incremented (augmented by 1). 170 // Both, memory value and increment, are treated as 32bit signed binary integers. 171 // No overflow exceptions are recognized, and the condition code does not hold 172 // information about the value in memory. 173 // 174 // The value in memory is updated by using a compare-and-swap instruction. The 175 // instruction is retried as often as required. 176 177 template <> 178 inline void Atomic::specialized_inc<int32_t>(volatile int32_t* dest) { 179 unsigned int old, upd; 180 181 if (VM_Version::has_LoadAndALUAtomicV1()) { 182 // tty->print_cr("Atomic::inc called... dest @%p", dest); 183 __asm__ __volatile__ ( 184 " LGHI 2,1 \n\t" // load increment 185 " LA 3,%[mem] \n\t" // force data address into ARG2 186 // " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value 187 // " LAA 2,2,0(3) \n\t" // actually coded instruction 188 " .byte 0xeb \n\t" // LAA main opcode 189 " .byte 0x22 \n\t" // R1,R3 190 " .byte 0x30 \n\t" // R2,disp1 191 " .byte 0x00 \n\t" // disp2,disp3 192 " .byte 0x00 \n\t" // disp4,disp5 193 " .byte 0xf8 \n\t" // LAA minor opcode 194 " AGHI 2,1 \n\t" // calc new value in register 195 " LR %[upd],2 \n\t" // move to result register 196 //---< outputs >--- 197 : [upd] "=&d" (upd) // write-only, updated counter value 198 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 199 //---< inputs >--- 200 : 201 // : [inc] "a" (inc) // read-only. 202 //---< clobbered >--- 203 : "cc", "r2", "r3" 204 ); 205 } else { 206 __asm__ __volatile__ ( 207 " LLGF %[old],%[mem] \n\t" // get old value 208 "0: LA %[upd],1(,%[old]) \n\t" // calc result 209 " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 210 " JNE 0b \n\t" // no success? -> retry 211 //---< outputs >--- 212 : [old] "=&a" (old) // write-only, old counter value 213 , [upd] "=&d" (upd) // write-only, updated counter value 214 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 215 //---< inputs >--- 216 : 217 //---< clobbered >--- 218 : "cc" 219 ); 220 } 221 } 222 223 template <> 224 inline void Atomic::specialized_inc<int64_t>(volatile int64_t* dest) { 225 unsigned long old, upd; 226 227 if (VM_Version::has_LoadAndALUAtomicV1()) { 228 __asm__ __volatile__ ( 229 " LGHI 2,1 \n\t" // load increment 230 " LA 3,%[mem] \n\t" // force data address into ARG2 231 // " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value 232 // " LAAG 2,2,0(3) \n\t" // actually coded instruction 233 " .byte 0xeb \n\t" // LAA main opcode 234 " .byte 0x22 \n\t" // R1,R3 235 " .byte 0x30 \n\t" // R2,disp1 236 " .byte 0x00 \n\t" // disp2,disp3 237 " .byte 0x00 \n\t" // disp4,disp5 238 " .byte 0xe8 \n\t" // LAA minor opcode 239 " AGHI 2,1 \n\t" // calc new value in register 240 " LR %[upd],2 \n\t" // move to result register 241 //---< outputs >--- 242 : [upd] "=&d" (upd) // write-only, updated counter value 243 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 244 //---< inputs >--- 245 : 246 // : [inc] "a" (inc) // read-only. 247 //---< clobbered >--- 248 : "cc", "r2", "r3" 249 ); 250 } else { 251 __asm__ __volatile__ ( 252 " LG %[old],%[mem] \n\t" // get old value 253 "0: LA %[upd],1(,%[old]) \n\t" // calc result 254 " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 255 " JNE 0b \n\t" // no success? -> retry 256 //---< outputs >--- 257 : [old] "=&a" (old) // write-only, old counter value 258 , [upd] "=&d" (upd) // write-only, updated counter value 259 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 260 //---< inputs >--- 261 : 262 //---< clobbered >--- 263 : "cc" 264 ); 265 } 266 } 267 268 269 //------------ 270 // Atomic::dec 271 //------------ 272 // These methods force the value in memory to be decremented (augmented by -1). 273 // Both, memory value and decrement, are treated as 32bit signed binary integers. 274 // No overflow exceptions are recognized, and the condition code does not hold 275 // information about the value in memory. 276 // 277 // The value in memory is updated by using a compare-and-swap instruction. The 278 // instruction is retried as often as required. 279 280 template <> 281 inline void Atomic::specialized_dec<int32_t>(volatile int32_t* dest) { 282 unsigned int old, upd; 283 284 if (VM_Version::has_LoadAndALUAtomicV1()) { 285 __asm__ __volatile__ ( 286 " LGHI 2,-1 \n\t" // load increment 287 " LA 3,%[mem] \n\t" // force data address into ARG2 288 // " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value 289 // " LAA 2,2,0(3) \n\t" // actually coded instruction 290 " .byte 0xeb \n\t" // LAA main opcode 291 " .byte 0x22 \n\t" // R1,R3 292 " .byte 0x30 \n\t" // R2,disp1 293 " .byte 0x00 \n\t" // disp2,disp3 294 " .byte 0x00 \n\t" // disp4,disp5 295 " .byte 0xf8 \n\t" // LAA minor opcode 296 " AGHI 2,-1 \n\t" // calc new value in register 297 " LR %[upd],2 \n\t" // move to result register 298 //---< outputs >--- 299 : [upd] "=&d" (upd) // write-only, updated counter value 300 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 301 //---< inputs >--- 302 : 303 // : [inc] "a" (inc) // read-only. 304 //---< clobbered >--- 305 : "cc", "r2", "r3" 306 ); 307 } else { 308 __asm__ __volatile__ ( 309 " LLGF %[old],%[mem] \n\t" // get old value 310 // LAY not supported by inline assembler 311 // "0: LAY %[upd],-1(,%[old]) \n\t" // calc result 312 "0: LR %[upd],%[old] \n\t" // calc result 313 " AHI %[upd],-1 \n\t" 314 " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 315 " JNE 0b \n\t" // no success? -> retry 316 //---< outputs >--- 317 : [old] "=&a" (old) // write-only, old counter value 318 , [upd] "=&d" (upd) // write-only, updated counter value 319 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 320 //---< inputs >--- 321 : 322 //---< clobbered >--- 323 : "cc" 324 ); 325 } 326 } 327 328 template <> 329 inline void Atomic::specialized_dec<int64_t>(volatile int64_t* dest) { 330 unsigned long old, upd; 331 332 if (VM_Version::has_LoadAndALUAtomicV1()) { 333 __asm__ __volatile__ ( 334 " LGHI 2,-1 \n\t" // load increment 335 " LA 3,%[mem] \n\t" // force data address into ARG2 336 // " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value 337 // " LAAG 2,2,0(3) \n\t" // actually coded instruction 338 " .byte 0xeb \n\t" // LAA main opcode 339 " .byte 0x22 \n\t" // R1,R3 340 " .byte 0x30 \n\t" // R2,disp1 341 " .byte 0x00 \n\t" // disp2,disp3 342 " .byte 0x00 \n\t" // disp4,disp5 343 " .byte 0xe8 \n\t" // LAA minor opcode 344 " AGHI 2,-1 \n\t" // calc new value in register 345 " LR %[upd],2 \n\t" // move to result register 346 //---< outputs >--- 347 : [upd] "=&d" (upd) // write-only, updated counter value 348 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 349 //---< inputs >--- 350 : 351 // : [inc] "a" (inc) // read-only. 352 //---< clobbered >--- 353 : "cc", "r2", "r3" 354 ); 355 } else { 356 __asm__ __volatile__ ( 357 " LG %[old],%[mem] \n\t" // get old value 358 // LAY not supported by inline assembler 359 // "0: LAY %[upd],-1(,%[old]) \n\t" // calc result 360 "0: LGR %[upd],%[old] \n\t" // calc result 361 " AGHI %[upd],-1 \n\t" 362 " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 363 " JNE 0b \n\t" // no success? -> retry 364 //---< outputs >--- 365 : [old] "=&a" (old) // write-only, old counter value 366 , [upd] "=&d" (upd) // write-only, updated counter value 367 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 368 //---< inputs >--- 369 : 370 //---< clobbered >--- 371 : "cc" 372 ); 373 } 374 } 375 376 377 //------------- 378 // Atomic::xchg 379 //------------- 380 // These methods force the value in memory to be replaced by the new value passed 381 // in as argument. 382 // 383 // The value in memory is replaced by using a compare-and-swap instruction. The 384 // instruction is retried as often as required. This makes sure that the new 385 // value can be seen, at least for a very short period of time, by other CPUs. 386 // 387 // If we would use a normal "load(old value) store(new value)" sequence, 388 // the new value could be lost unnoticed, due to a store(new value) from 389 // another thread. 390 // 391 // The return value is the (unchanged) value from memory as it was when the 392 // replacement succeeded. 393 template <> 394 inline int32_t Atomic::specialized_xchg<int32_t>(int32_t xchg_val, volatile int32_t* dest) { 395 unsigned int old; 396 397 __asm__ __volatile__ ( 398 " LLGF %[old],%[mem] \n\t" // get old value 399 "0: CS %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 400 " JNE 0b \n\t" // no success? -> retry 401 //---< outputs >--- 402 : [old] "=&d" (old) // write-only, prev value irrelevant 403 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 404 //---< inputs >--- 405 : [upd] "d" (xchg_val) // read-only, value to be written to memory 406 //---< clobbered >--- 407 : "cc" 408 ); 409 410 return (int32_t)old; 411 } 412 413 template <> 414 inline int64_t Atomic::specialized_xchg<int64_t>(int64_t xchg_val, volatile int64_t* dest) { 415 unsigned long old; 416 417 __asm__ __volatile__ ( 418 " LG %[old],%[mem] \n\t" // get old value 419 "0: CSG %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 420 " JNE 0b \n\t" // no success? -> retry 421 //---< outputs >--- 422 : [old] "=&d" (old) // write-only, init from memory 423 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 424 //---< inputs >--- 425 : [upd] "d" (xchg_val) // read-only, value to be written to memory 426 //---< clobbered >--- 427 : "cc" 428 ); 429 430 return (intptr_t)old; 431 } 432 433 434 //---------------- 435 // Atomic::cmpxchg 436 //---------------- 437 // These methods compare the value in memory with a given compare value. 438 // If both values compare equal, the value in memory is replaced with 439 // the exchange value. 440 // 441 // The value in memory is compared and replaced by using a compare-and-swap 442 // instruction. The instruction is NOT retried (one shot only). 443 // 444 // The return value is the (unchanged) value from memory as it was when the 445 // compare-and-swap instruction completed. A successful exchange operation 446 // is indicated by (return value == compare_value). If unsuccessful, a new 447 // exchange value can be calculated based on the return value which is the 448 // latest contents of the memory location. 449 // 450 // Inspecting the return value is the only way for the caller to determine 451 // if the compare-and-swap instruction was successful: 452 // - If return value and compare value compare equal, the compare-and-swap 453 // instruction was successful and the value in memory was replaced by the 454 // exchange value. 455 // - If return value and compare value compare unequal, the compare-and-swap 456 // instruction was not successful. The value in memory was left unchanged. 457 // 458 // The s390 processors always fence before and after the csg instructions. 459 // Thus we ignore the memory ordering argument. The docu says: "A serialization 460 // function is performed before the operand is fetched and again after the 461 // operation is completed." 462 463 template <> 464 inline int32_t Atomic::specialized_cmpxchg<int32_t>(int32_t xchg_val, volatile int32_t* dest, int32_t cmp_val, cmpxchg_memory_order order) { 465 unsigned long old; 466 467 __asm__ __volatile__ ( 468 " CS %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 469 // outputs 470 : [old] "=&d" (old) // Write-only, prev value irrelevant. 471 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 472 // inputs 473 : [upd] "d" (xchg_val) 474 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 475 // clobbered 476 : "cc" 477 ); 478 479 return (int32_t)old; 480 } 481 482 template <> 483 inline int64_t Atomic::specialized_cmpxchg<int64_t>(int64_t xchg_val, volatile int64_t* dest, int64_t cmp_val, cmpxchg_memory_order order) { 484 unsigned long old; 485 486 __asm__ __volatile__ ( 487 " CSG %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 488 // outputs 489 : [old] "=&d" (old) // Write-only, prev value irrelevant. 490 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 491 // inputs 492 : [upd] "d" (xchg_val) 493 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 494 // clobbered 495 : "cc" 496 ); 497 498 return (int64_t)old; 499 } 500 501 #endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP