61 // Both, memory value and increment, are treated as 32bit signed binary integers. 62 // No overflow exceptions are recognized, and the condition code does not hold 63 // information about the value in memory. 64 // 65 // The value in memory is updated by using a compare-and-swap instruction. The 66 // instruction is retried as often as required. 67 // 68 // The return value of the method is the value that was successfully stored. At the 69 // time the caller receives back control, the value in memory may have changed already. 70 71 // New atomic operations only include specific-operand-serialization, not full 72 // memory barriers. We can use the Fast-BCR-Serialization Facility for them. 73 inline void z196_fast_sync() { 74 __asm__ __volatile__ ("bcr 14, 0" : : : "memory"); 75 } 76 77 template<size_t byte_size> 78 struct Atomic::PlatformAdd 79 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 80 { 81 template<typename I, typename D> 82 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; 83 }; 84 85 template<> 86 template<typename I, typename D> 87 inline D Atomic::PlatformAdd<4>::add_and_fetch(I inc, D volatile* dest, 88 atomic_memory_order order) const { 89 STATIC_ASSERT(4 == sizeof(I)); 90 STATIC_ASSERT(4 == sizeof(D)); 91 92 D old, upd; 93 94 if (VM_Version::has_LoadAndALUAtomicV1()) { 95 if (order == memory_order_conservative) { z196_fast_sync(); } 96 __asm__ __volatile__ ( 97 " LGFR 0,%[inc] \n\t" // save increment 98 " LA 3,%[mem] \n\t" // force data address into ARG2 99 // " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value 100 // " LAA 2,0,0(3) \n\t" // actually coded instruction 101 " .byte 0xeb \n\t" // LAA main opcode 102 " .byte 0x20 \n\t" // R1,R3 103 " .byte 0x30 \n\t" // R2,disp1 104 " .byte 0x00 \n\t" // disp2,disp3 105 " .byte 0x00 \n\t" // disp4,disp5 106 " .byte 0xf8 \n\t" // LAA minor opcode 107 " AR 2,0 \n\t" // calc new value in register 120 " LLGF %[old],%[mem] \n\t" // get old value 121 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result 122 " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 123 " JNE 0b \n\t" // no success? -> retry 124 //---< outputs >--- 125 : [old] "=&a" (old) // write-only, old counter value 126 , [upd] "=&d" (upd) // write-only, updated counter value 127 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 128 //---< inputs >--- 129 : [inc] "a" (inc) // read-only. 130 //---< clobbered >--- 131 : "cc", "memory" 132 ); 133 } 134 135 return upd; 136 } 137 138 139 template<> 140 template<typename I, typename D> 141 inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest, 142 atomic_memory_order order) const { 143 STATIC_ASSERT(8 == sizeof(I)); 144 STATIC_ASSERT(8 == sizeof(D)); 145 146 D old, upd; 147 148 if (VM_Version::has_LoadAndALUAtomicV1()) { 149 if (order == memory_order_conservative) { z196_fast_sync(); } 150 __asm__ __volatile__ ( 151 " LGR 0,%[inc] \n\t" // save increment 152 " LA 3,%[mem] \n\t" // force data address into ARG2 153 // " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value 154 // " LAAG 2,0,0(3) \n\t" // actually coded instruction 155 " .byte 0xeb \n\t" // LAA main opcode 156 " .byte 0x20 \n\t" // R1,R3 157 " .byte 0x30 \n\t" // R2,disp1 158 " .byte 0x00 \n\t" // disp2,disp3 159 " .byte 0x00 \n\t" // disp4,disp5 160 " .byte 0xe8 \n\t" // LAA minor opcode 161 " AGR 2,0 \n\t" // calc new value in register 191 192 193 //------------- 194 // Atomic::xchg 195 //------------- 196 // These methods force the value in memory to be replaced by the new value passed 197 // in as argument. 198 // 199 // The value in memory is replaced by using a compare-and-swap instruction. The 200 // instruction is retried as often as required. This makes sure that the new 201 // value can be seen, at least for a very short period of time, by other CPUs. 202 // 203 // If we would use a normal "load(old value) store(new value)" sequence, 204 // the new value could be lost unnoticed, due to a store(new value) from 205 // another thread. 206 // 207 // The return value is the (unchanged) value from memory as it was when the 208 // replacement succeeded. 209 template<> 210 template<typename T> 211 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 212 T volatile* dest, 213 atomic_memory_order unused) const { 214 STATIC_ASSERT(4 == sizeof(T)); 215 T old; 216 217 __asm__ __volatile__ ( 218 " LLGF %[old],%[mem] \n\t" // get old value 219 "0: CS %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 220 " JNE 0b \n\t" // no success? -> retry 221 //---< outputs >--- 222 : [old] "=&d" (old) // write-only, prev value irrelevant 223 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 224 //---< inputs >--- 225 : [upd] "d" (exchange_value) // read-only, value to be written to memory 226 //---< clobbered >--- 227 : "cc", "memory" 228 ); 229 230 return old; 231 } 232 233 template<> 234 template<typename T> 235 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 236 T volatile* dest, 237 atomic_memory_order unused) const { 238 STATIC_ASSERT(8 == sizeof(T)); 239 T old; 240 241 __asm__ __volatile__ ( 242 " LG %[old],%[mem] \n\t" // get old value 243 "0: CSG %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 244 " JNE 0b \n\t" // no success? -> retry 245 //---< outputs >--- 246 : [old] "=&d" (old) // write-only, init from memory 247 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 248 //---< inputs >--- 249 : [upd] "d" (exchange_value) // read-only, value to be written to memory 250 //---< clobbered >--- 251 : "cc", "memory" 252 ); 253 254 return old; 255 } 256 272 // 273 // Inspecting the return value is the only way for the caller to determine 274 // if the compare-and-swap instruction was successful: 275 // - If return value and compare value compare equal, the compare-and-swap 276 // instruction was successful and the value in memory was replaced by the 277 // exchange value. 278 // - If return value and compare value compare unequal, the compare-and-swap 279 // instruction was not successful. The value in memory was left unchanged. 280 // 281 // The s390 processors always fence before and after the csg instructions. 282 // Thus we ignore the memory ordering argument. The docu says: "A serialization 283 // function is performed before the operand is fetched and again after the 284 // operation is completed." 285 286 // No direct support for cmpxchg of bytes; emulate using int. 287 template<> 288 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; 289 290 template<> 291 template<typename T> 292 inline T Atomic::PlatformCmpxchg<4>::operator()(T xchg_val, 293 T volatile* dest, 294 T cmp_val, 295 atomic_memory_order unused) const { 296 STATIC_ASSERT(4 == sizeof(T)); 297 T old; 298 299 __asm__ __volatile__ ( 300 " CS %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 301 // outputs 302 : [old] "=&d" (old) // Write-only, prev value irrelevant. 303 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 304 // inputs 305 : [upd] "d" (xchg_val) 306 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 307 // clobbered 308 : "cc", "memory" 309 ); 310 311 return old; 312 } 313 314 template<> 315 template<typename T> 316 inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val, 317 T volatile* dest, 318 T cmp_val, 319 atomic_memory_order unused) const { 320 STATIC_ASSERT(8 == sizeof(T)); 321 T old; 322 323 __asm__ __volatile__ ( 324 " CSG %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 325 // outputs 326 : [old] "=&d" (old) // Write-only, prev value irrelevant. 327 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 328 // inputs 329 : [upd] "d" (xchg_val) 330 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 331 // clobbered 332 : "cc", "memory" 333 ); 334 335 return old; 336 } 337 338 template<size_t byte_size> | 61 // Both, memory value and increment, are treated as 32bit signed binary integers. 62 // No overflow exceptions are recognized, and the condition code does not hold 63 // information about the value in memory. 64 // 65 // The value in memory is updated by using a compare-and-swap instruction. The 66 // instruction is retried as often as required. 67 // 68 // The return value of the method is the value that was successfully stored. At the 69 // time the caller receives back control, the value in memory may have changed already. 70 71 // New atomic operations only include specific-operand-serialization, not full 72 // memory barriers. We can use the Fast-BCR-Serialization Facility for them. 73 inline void z196_fast_sync() { 74 __asm__ __volatile__ ("bcr 14, 0" : : : "memory"); 75 } 76 77 template<size_t byte_size> 78 struct Atomic::PlatformAdd 79 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 80 { 81 template<typename D, typename I> 82 D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; 83 }; 84 85 template<> 86 template<typename D, typename I> 87 inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I inc, 88 atomic_memory_order order) const { 89 STATIC_ASSERT(4 == sizeof(I)); 90 STATIC_ASSERT(4 == sizeof(D)); 91 92 D old, upd; 93 94 if (VM_Version::has_LoadAndALUAtomicV1()) { 95 if (order == memory_order_conservative) { z196_fast_sync(); } 96 __asm__ __volatile__ ( 97 " LGFR 0,%[inc] \n\t" // save increment 98 " LA 3,%[mem] \n\t" // force data address into ARG2 99 // " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value 100 // " LAA 2,0,0(3) \n\t" // actually coded instruction 101 " .byte 0xeb \n\t" // LAA main opcode 102 " .byte 0x20 \n\t" // R1,R3 103 " .byte 0x30 \n\t" // R2,disp1 104 " .byte 0x00 \n\t" // disp2,disp3 105 " .byte 0x00 \n\t" // disp4,disp5 106 " .byte 0xf8 \n\t" // LAA minor opcode 107 " AR 2,0 \n\t" // calc new value in register 120 " LLGF %[old],%[mem] \n\t" // get old value 121 "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result 122 " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem 123 " JNE 0b \n\t" // no success? -> retry 124 //---< outputs >--- 125 : [old] "=&a" (old) // write-only, old counter value 126 , [upd] "=&d" (upd) // write-only, updated counter value 127 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 128 //---< inputs >--- 129 : [inc] "a" (inc) // read-only. 130 //---< clobbered >--- 131 : "cc", "memory" 132 ); 133 } 134 135 return upd; 136 } 137 138 139 template<> 140 template<typename D, typename I> 141 inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I inc, 142 atomic_memory_order order) const { 143 STATIC_ASSERT(8 == sizeof(I)); 144 STATIC_ASSERT(8 == sizeof(D)); 145 146 D old, upd; 147 148 if (VM_Version::has_LoadAndALUAtomicV1()) { 149 if (order == memory_order_conservative) { z196_fast_sync(); } 150 __asm__ __volatile__ ( 151 " LGR 0,%[inc] \n\t" // save increment 152 " LA 3,%[mem] \n\t" // force data address into ARG2 153 // " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value 154 // " LAAG 2,0,0(3) \n\t" // actually coded instruction 155 " .byte 0xeb \n\t" // LAA main opcode 156 " .byte 0x20 \n\t" // R1,R3 157 " .byte 0x30 \n\t" // R2,disp1 158 " .byte 0x00 \n\t" // disp2,disp3 159 " .byte 0x00 \n\t" // disp4,disp5 160 " .byte 0xe8 \n\t" // LAA minor opcode 161 " AGR 2,0 \n\t" // calc new value in register 191 192 193 //------------- 194 // Atomic::xchg 195 //------------- 196 // These methods force the value in memory to be replaced by the new value passed 197 // in as argument. 198 // 199 // The value in memory is replaced by using a compare-and-swap instruction. The 200 // instruction is retried as often as required. This makes sure that the new 201 // value can be seen, at least for a very short period of time, by other CPUs. 202 // 203 // If we would use a normal "load(old value) store(new value)" sequence, 204 // the new value could be lost unnoticed, due to a store(new value) from 205 // another thread. 206 // 207 // The return value is the (unchanged) value from memory as it was when the 208 // replacement succeeded. 209 template<> 210 template<typename T> 211 inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, 212 T exchange_value, 213 atomic_memory_order unused) const { 214 STATIC_ASSERT(4 == sizeof(T)); 215 T old; 216 217 __asm__ __volatile__ ( 218 " LLGF %[old],%[mem] \n\t" // get old value 219 "0: CS %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 220 " JNE 0b \n\t" // no success? -> retry 221 //---< outputs >--- 222 : [old] "=&d" (old) // write-only, prev value irrelevant 223 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 224 //---< inputs >--- 225 : [upd] "d" (exchange_value) // read-only, value to be written to memory 226 //---< clobbered >--- 227 : "cc", "memory" 228 ); 229 230 return old; 231 } 232 233 template<> 234 template<typename T> 235 inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, 236 T exchange_value, 237 atomic_memory_order unused) const { 238 STATIC_ASSERT(8 == sizeof(T)); 239 T old; 240 241 __asm__ __volatile__ ( 242 " LG %[old],%[mem] \n\t" // get old value 243 "0: CSG %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem 244 " JNE 0b \n\t" // no success? -> retry 245 //---< outputs >--- 246 : [old] "=&d" (old) // write-only, init from memory 247 , [mem] "+Q" (*dest) // read/write, memory to be updated atomically 248 //---< inputs >--- 249 : [upd] "d" (exchange_value) // read-only, value to be written to memory 250 //---< clobbered >--- 251 : "cc", "memory" 252 ); 253 254 return old; 255 } 256 272 // 273 // Inspecting the return value is the only way for the caller to determine 274 // if the compare-and-swap instruction was successful: 275 // - If return value and compare value compare equal, the compare-and-swap 276 // instruction was successful and the value in memory was replaced by the 277 // exchange value. 278 // - If return value and compare value compare unequal, the compare-and-swap 279 // instruction was not successful. The value in memory was left unchanged. 280 // 281 // The s390 processors always fence before and after the csg instructions. 282 // Thus we ignore the memory ordering argument. The docu says: "A serialization 283 // function is performed before the operand is fetched and again after the 284 // operation is completed." 285 286 // No direct support for cmpxchg of bytes; emulate using int. 287 template<> 288 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; 289 290 template<> 291 template<typename T> 292 inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, 293 T cmp_val, 294 T xchg_val, 295 atomic_memory_order unused) const { 296 STATIC_ASSERT(4 == sizeof(T)); 297 T old; 298 299 __asm__ __volatile__ ( 300 " CS %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 301 // outputs 302 : [old] "=&d" (old) // Write-only, prev value irrelevant. 303 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 304 // inputs 305 : [upd] "d" (xchg_val) 306 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 307 // clobbered 308 : "cc", "memory" 309 ); 310 311 return old; 312 } 313 314 template<> 315 template<typename T> 316 inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, 317 T cmp_val, 318 T xchg_val, 319 atomic_memory_order unused) const { 320 STATIC_ASSERT(8 == sizeof(T)); 321 T old; 322 323 __asm__ __volatile__ ( 324 " CSG %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. 325 // outputs 326 : [old] "=&d" (old) // Write-only, prev value irrelevant. 327 , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. 328 // inputs 329 : [upd] "d" (xchg_val) 330 , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). 331 // clobbered 332 : "cc", "memory" 333 ); 334 335 return old; 336 } 337 338 template<size_t byte_size> |