1 /* 2 * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2018 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP 27 #define OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP 28 29 #include "runtime/atomic.hpp" 30 #include "runtime/os.hpp" 31 #include "vm_version_s390.hpp" 32 33 // Note that the compare-and-swap instructions on System z perform 34 // a serialization function before the storage operand is fetched 35 // and again after the operation is completed. 36 // 37 // Used constraint modifiers: 38 // = write-only access: Value on entry to inline-assembler code irrelevant. 39 // + read/write access: Value on entry is used; on exit value is changed. 40 // read-only access: Value on entry is used and never changed. 41 // & early-clobber access: Might be modified before all read-only operands 42 // have been used. 43 // a address register operand (not GR0). 44 // d general register operand (including GR0) 45 // Q memory operand w/o index register. 46 // 0..9 operand reference (by operand position). 47 // Used for operands that fill multiple roles. One example would be a 48 // write-only operand receiving its initial value from a read-only operand. 49 // Refer to cmpxchg(..) operand #0 and variable cmp_val for a real-life example. 50 // 51 52 // On System z, all store operations are atomic if the address where the data is stored into 53 // is an integer multiple of the data length. Furthermore, all stores are ordered: 54 // a store which occurs conceptually before another store becomes visible to other CPUs 55 // before the other store becomes visible. 56 57 //------------ 58 // Atomic::add 59 //------------ 60 // These methods force the value in memory to be augmented by the passed increment. 61 // Both, memory value and increment, are treated as 32bit signed binary integers. 62 // No overflow exceptions are recognized, and the condition code does not hold 63 // information about the value in memory. 64 // 65 // The value in memory is updated by using a compare-and-swap instruction. The 66 // instruction is retried as often as required. 67 // 68 // The return value of the method is the value that was successfully stored. At the 69 // time the caller receives back control, the value in memory may have changed already. 70 71 // New atomic operations only include specific-operand-serialization, not full 72 // memory barriers. We can use the Fast-BCR-Serialization Facility for them. 73 inline void z196_fast_sync() { 74 __asm__ __volatile__ ("bcr 14, 0" : : : "memory"); 75 } 76 77 template<size_t byte_size> 78 struct Atomic::PlatformAdd 79 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 80 { 81 template<typename I, typename D> 82 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; 83 }; 84 85 template<> 86 template<typename I, typename D> 87 inline D Atomic::PlatformAdd<4>::add_and_fetch(I inc, D volatile* dest, 88 atomic_memory_order order) const { 89 STATIC_ASSERT(4 == sizeof(I)); 90 STATIC_ASSERT(4 == sizeof(D)); 91 92 D old, upd; 93 94 if (VM_Version::has_LoadAndALUAtomicV1()) { 95 if (order == memory_order_conservative) { z196_fast_sync(); } 96 __asm__ __volatile__ ( 97 " LGFR 0,%[inc] \n\t" // save increment 98 " LA 3,%[mem] \n\t" // force data address into ARG2 99 // " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value 100 // " LAA 2,0,0(3) \n\t" // actually coded instruction 101 " .byte 0xeb \n\t" // LAA main opcode 102 " .byte 0x20 \n\t" // R1,R3 103 " .byte 0x30 \n\t" // R2,disp1 104 " .byte 0x00 \n\t" // disp2,disp3 105 " .byte 0x00 \n\t" // disp4,disp5 106 " .byte 0xf8 \n\t" // LAA minor opcode 107 " AR 2,0 \n\t" // calc new value in register 108 " LR %[upd],2 \n\t" // move to result register 109 //---< outputs >--- 110 : [upd] "=&d" (upd) // write-only, updated counter value 111 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 112 //---< inputs >--- 113 : [inc] "a" (inc) // read-only. 114 //---< clobbered >--- 115 : "cc", "r0", "r2", "r3", "memory" 116 ); 117 if (order == memory_order_conservative) { z196_fast_sync(); } 118 } else { 119 __asm__ __volatile__ ( 120 " LLGF %[old],%[mem] \n\t" // get old value 121 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result 122 " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 123 " JNE 0b \n\t" // no success? -> retry 124 //---< outputs >--- 125 : [old] "=&a" (old) // write-only, old counter value 126 , [upd] "=&d" (upd) // write-only, updated counter value 127 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 128 //---< inputs >--- 129 : [inc] "a" (inc) // read-only. 130 //---< clobbered >--- 131 : "cc", "memory" 132 ); 133 } 134 135 return upd; 136 } 137 138 139 template<> 140 template<typename I, typename D> 141 inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest, 142 atomic_memory_order order) const { 143 STATIC_ASSERT(8 == sizeof(I)); 144 STATIC_ASSERT(8 == sizeof(D)); 145 146 D old, upd; 147 148 if (VM_Version::has_LoadAndALUAtomicV1()) { 149 if (order == memory_order_conservative) { z196_fast_sync(); } 150 __asm__ __volatile__ ( 151 " LGR 0,%[inc] \n\t" // save increment 152 " LA 3,%[mem] \n\t" // force data address into ARG2 153 // " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value 154 // " LAAG 2,0,0(3) \n\t" // actually coded instruction 155 " .byte 0xeb \n\t" // LAA main opcode 156 " .byte 0x20 \n\t" // R1,R3 157 " .byte 0x30 \n\t" // R2,disp1 158 " .byte 0x00 \n\t" // disp2,disp3 159 " .byte 0x00 \n\t" // disp4,disp5 160 " .byte 0xe8 \n\t" // LAA minor opcode 161 " AGR 2,0 \n\t" // calc new value in register 162 " LGR %[upd],2 \n\t" // move to result register 163 //---< outputs >--- 164 : [upd] "=&d" (upd) // write-only, updated counter value 165 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 166 //---< inputs >--- 167 : [inc] "a" (inc) // read-only. 168 //---< clobbered >--- 169 : "cc", "r0", "r2", "r3", "memory" 170 ); 171 if (order == memory_order_conservative) { z196_fast_sync(); } 172 } else { 173 __asm__ __volatile__ ( 174 " LG %[old],%[mem] \n\t" // get old value 175 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result 176 " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 177 " JNE 0b \n\t" // no success? -> retry 178 //---< outputs >--- 179 : [old] "=&a" (old) // write-only, old counter value 180 , [upd] "=&d" (upd) // write-only, updated counter value 181 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 182 //---< inputs >--- 183 : [inc] "a" (inc) // read-only. 184 //---< clobbered >--- 185 : "cc", "memory" 186 ); 187 } 188 189 return upd; 190 } 191 192 193 //------------- 194 // Atomic::xchg 195 //------------- 196 // These methods force the value in memory to be replaced by the new value passed 197 // in as argument. 198 // 199 // The value in memory is replaced by using a compare-and-swap instruction. The 200 // instruction is retried as often as required. This makes sure that the new 201 // value can be seen, at least for a very short period of time, by other CPUs. 202 // 203 // If we would use a normal "load(old value) store(new value)" sequence, 204 // the new value could be lost unnoticed, due to a store(new value) from 205 // another thread. 206 // 207 // The return value is the (unchanged) value from memory as it was when the 208 // replacement succeeded. 209 template<> 210 template<typename T> 211 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 212 T volatile* dest, 213 atomic_memory_order unused) const { 214 STATIC_ASSERT(4 == sizeof(T)); 215 T old; 216 217 __asm__ __volatile__ ( 218 " LLGF %[old],%[mem] \n\t" // get old value 219 "0: CS %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 220 " JNE 0b \n\t" // no success? -> retry 221 //---< outputs >--- 222 : [old] "=&d" (old) // write-only, prev value irrelevant 223 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 224 //---< inputs >--- 225 : [upd] "d" (exchange_value) // read-only, value to be written to memory 226 //---< clobbered >--- 227 : "cc", "memory" 228 ); 229 230 return old; 231 } 232 233 template<> 234 template<typename T> 235 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 236 T volatile* dest, 237 atomic_memory_order unused) const { 238 STATIC_ASSERT(8 == sizeof(T)); 239 T old; 240 241 __asm__ __volatile__ ( 242 " LG %[old],%[mem] \n\t" // get old value 243 "0: CSG %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 244 " JNE 0b \n\t" // no success? -> retry 245 //---< outputs >--- 246 : [old] "=&d" (old) // write-only, init from memory 247 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 248 //---< inputs >--- 249 : [upd] "d" (exchange_value) // read-only, value to be written to memory 250 //---< clobbered >--- 251 : "cc", "memory" 252 ); 253 254 return old; 255 } 256 257 //---------------- 258 // Atomic::cmpxchg 259 //---------------- 260 // These methods compare the value in memory with a given compare value. 261 // If both values compare equal, the value in memory is replaced with 262 // the exchange value. 263 // 264 // The value in memory is compared and replaced by using a compare-and-swap 265 // instruction. The instruction is NOT retried (one shot only). 266 // 267 // The return value is the (unchanged) value from memory as it was when the 268 // compare-and-swap instruction completed. A successful exchange operation 269 // is indicated by (return value == compare_value). If unsuccessful, a new 270 // exchange value can be calculated based on the return value which is the 271 // latest contents of the memory location. 272 // 273 // Inspecting the return value is the only way for the caller to determine 274 // if the compare-and-swap instruction was successful: 275 // - If return value and compare value compare equal, the compare-and-swap 276 // instruction was successful and the value in memory was replaced by the 277 // exchange value. 278 // - If return value and compare value compare unequal, the compare-and-swap 279 // instruction was not successful. The value in memory was left unchanged. 280 // 281 // The s390 processors always fence before and after the csg instructions. 282 // Thus we ignore the memory ordering argument. The docu says: "A serialization 283 // function is performed before the operand is fetched and again after the 284 // operation is completed." 285 286 // No direct support for cmpxchg of bytes; emulate using int. 287 template<> 288 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; 289 290 template<> 291 template<typename T> 292 inline T Atomic::PlatformCmpxchg<4>::operator()(T xchg_val, 293 T volatile* dest, 294 T cmp_val, 295 atomic_memory_order unused) const { 296 STATIC_ASSERT(4 == sizeof(T)); 297 T old; 298 299 __asm__ __volatile__ ( 300 " CS %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 301 // outputs 302 : [old] "=&d" (old) // Write-only, prev value irrelevant. 303 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 304 // inputs 305 : [upd] "d" (xchg_val) 306 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 307 // clobbered 308 : "cc", "memory" 309 ); 310 311 return old; 312 } 313 314 template<> 315 template<typename T> 316 inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val, 317 T volatile* dest, 318 T cmp_val, 319 atomic_memory_order unused) const { 320 STATIC_ASSERT(8 == sizeof(T)); 321 T old; 322 323 __asm__ __volatile__ ( 324 " CSG %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 325 // outputs 326 : [old] "=&d" (old) // Write-only, prev value irrelevant. 327 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 328 // inputs 329 : [upd] "d" (xchg_val) 330 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 331 // clobbered 332 : "cc", "memory" 333 ); 334 335 return old; 336 } 337 338 #endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP