--- /dev/null Sat Jan 5 17:46:58 2013 +++ new/src/share/classes/java/util/concurrent/atomic/Striped64.java Sat Jan 5 17:46:57 2013 @@ -0,0 +1,429 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * This file is available under and governed by the GNU General Public + * License version 2 only, as published by the Free Software Foundation. + * However, the following notice accompanied the original version of this + * file: + * + * Written by Doug Lea with assistance from members of JCP JSR-166 + * Expert Group and released to the public domain, as explained at + * http://creativecommons.org/publicdomain/zero/1.0/ + */ + +package java.util.concurrent.atomic; +import java.util.function.LongBinaryOperator; +import java.util.function.DoubleBinaryOperator; + +/** + * A package-local class holding common representation and mechanics + * for classes supporting dynamic striping on 64bit values. The class + * extends Number so that concrete subclasses must publicly do so. + */ +abstract class Striped64 extends Number { + /* + * This class maintains a lazily-initialized table of atomically + * updated variables, plus an extra "base" field. The table size + * is a power of two. Indexing uses masked per-thread hash codes. + * Nearly all declarations in this class are package-private, + * accessed directly by subclasses. + * + * Table entries are of class Cell; a variant of AtomicLong padded + * to reduce cache contention on most processors. Padding is + * overkill for most Atomics because they are usually irregularly + * scattered in memory and thus don't interfere much with each + * other. But Atomic objects residing in arrays will tend to be + * placed adjacent to each other, and so will most often share + * cache lines (with a huge negative performance impact) without + * this precaution. + * + * In part because Cells are relatively large, we avoid creating + * them until they are needed. When there is no contention, all + * updates are made to the base field. Upon first contention (a + * failed CAS on base update), the table is initialized to size 2. + * The table size is doubled upon further contention until + * reaching the nearest power of two greater than or equal to the + * number of CPUS. Table slots remain empty (null) until they are + * needed. + * + * A single spinlock ("cellsBusy") is used for initializing and + * resizing the table, as well as populating slots with new Cells. + * There is no need for a blocking lock: When the lock is not + * available, threads try other slots (or the base). During these + * retries, there is increased contention and reduced locality, + * which is still better than alternatives. + * + * Per-thread hash codes are initialized to values that typically + * do not often conflict. Contention and/or table collisions are + * indicated by failed CASes when performing an update operation + * (see method retryUpdate). Upon a collision, if the table size + * is less than the capacity, it is doubled in size unless some + * other thread holds the lock. If a hashed slot is empty, and + * lock is available, a new Cell is created. Otherwise, if the + * slot exists, a CAS is tried. Retries proceed by "double + * hashing", using a secondary hash (Marsaglia XorShift) to try to + * find a free slot. + * + * The table size is capped because, when there are more threads + * than CPUs, supposing that each thread were bound to a CPU, + * there would exist a perfect hash function mapping threads to + * slots that eliminates collisions. When we reach capacity, we + * search for this mapping by randomly varying the hash codes of + * colliding threads. Because search is random, and collisions + * only become known via CAS failures, convergence can be slow, + * and because threads are typically not bound to CPUS forever, + * may not occur at all. However, despite these limitations, + * observed contention rates are typically low in these cases. + * + * It is possible for a Cell to become unused when threads that + * once hashed to it terminate, as well as in the case where + * doubling the table causes no thread to hash to it under + * expanded mask. We do not try to detect or remove such cells, + * under the assumption that for long-running instances, observed + * contention levels will recur, so the cells will eventually be + * needed again; and for short-lived ones, it does not matter. + */ + + /** + * Padded variant of AtomicLong supporting only raw accesses plus CAS. + * The value field is placed between pads, hoping that the JVM doesn't + * reorder them. + * + * JVM intrinsics note: It would be possible to use a release-only + * form of CAS here, if it were provided. + */ + static final class Cell { + volatile long p0, p1, p2, p3, p4, p5, p6; + volatile long value; + volatile long q0, q1, q2, q3, q4, q5, q6; + Cell(long x) { value = x; } + + final boolean cas(long cmp, long val) { + return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val); + } + + // Unsafe mechanics + private static final sun.misc.Unsafe UNSAFE; + private static final long valueOffset; + static { + try { + UNSAFE = sun.misc.Unsafe.getUnsafe(); + Class ak = Cell.class; + valueOffset = UNSAFE.objectFieldOffset + (ak.getDeclaredField("value")); + } catch (Exception e) { + throw new Error(e); + } + } + } + + /** + * Holder for the thread-local hash code determining which Cell to + * use. The code is initialized via the cellHashCodeGenerator, but + * may be moved upon collisions. + */ + static final class CellHashCode { + int code; + } + + /** + * Generates initial value for per-thread CellHashCodes + */ + static final AtomicInteger cellHashCodeGenerator = new AtomicInteger(); + + /** + * Increment for cellHashCodeGenerator. See class ThreadLocal + * for explanation. + */ + static final int SEED_INCREMENT = 0x61c88647; + + /** + * Per-thread cell hash codes. Shared across all instances + */ + static final ThreadLocal threadCellHashCode = + new ThreadLocal(); + + /** Number of CPUS, to place bound on table size */ + static final int NCPU = Runtime.getRuntime().availableProcessors(); + + /** + * Table of cells. When non-null, size is a power of 2. + */ + transient volatile Cell[] cells; + + /** + * Base value, used mainly when there is no contention, but also as + * a fallback during table initialization races. Updated via CAS. + */ + transient volatile long base; + + /** + * Spinlock (locked via CAS) used when resizing and/or creating Cells. + */ + transient volatile int cellsBusy; + + /** + * Package-private default constructor + */ + Striped64() { + } + + /** + * CASes the base field. + */ + final boolean casBase(long cmp, long val) { + return UNSAFE.compareAndSwapLong(this, BASE, cmp, val); + } + + /** + * CASes the cellsBusy field from 0 to 1 to acquire lock. + */ + final boolean casCellsBusy() { + return UNSAFE.compareAndSwapInt(this, CELLSBUSY, 0, 1); + } + + /** + * Handles cases of updates involving initialization, resizing, + * creating new Cells, and/or contention. See above for + * explanation. This method suffers the usual non-modularity + * problems of optimistic retry code, relying on rechecked sets of + * reads. + * + * @param x the value + * @param hc the hash code holder + * @param fn the update function, or null for add (this convention + * avoids the need for an extra field or function in LongAdder). + * @param wasUncontended false if CAS failed before call + */ + final void longAccumulate(long x, CellHashCode hc, + LongBinaryOperator fn, + boolean wasUncontended) { + int h; + if (hc == null) { + hc = new CellHashCode(); + int s = cellHashCodeGenerator.addAndGet(SEED_INCREMENT); + h = hc.code = (s == 0) ? 1 : s; // Avoid zero + threadCellHashCode.set(hc); + } + else + h = hc.code; + boolean collide = false; // True if last slot nonempty + for (;;) { + Cell[] as; Cell a; int n; long v; + if ((as = cells) != null && (n = as.length) > 0) { + if ((a = as[(n - 1) & h]) == null) { + if (cellsBusy == 0) { // Try to attach new Cell + Cell r = new Cell(x); // Optimistically create + if (cellsBusy == 0 && casCellsBusy()) { + boolean created = false; + try { // Recheck under lock + Cell[] rs; int m, j; + if ((rs = cells) != null && + (m = rs.length) > 0 && + rs[j = (m - 1) & h] == null) { + rs[j] = r; + created = true; + } + } finally { + cellsBusy = 0; + } + if (created) + break; + continue; // Slot is now non-empty + } + } + collide = false; + } + else if (!wasUncontended) // CAS already known to fail + wasUncontended = true; // Continue after rehash + else if (a.cas(v = a.value, ((fn == null) ? v + x : + fn.operateAsLong(v, x)))) // ## rename when JDK8 syncs with lambda, applyAsLong + break; + else if (n >= NCPU || cells != as) + collide = false; // At max size or stale + else if (!collide) + collide = true; + else if (cellsBusy == 0 && casCellsBusy()) { + try { + if (cells == as) { // Expand table unless stale + Cell[] rs = new Cell[n << 1]; + for (int i = 0; i < n; ++i) + rs[i] = as[i]; + cells = rs; + } + } finally { + cellsBusy = 0; + } + collide = false; + continue; // Retry with expanded table + } + h ^= h << 13; // Rehash + h ^= h >>> 17; + h ^= h << 5; + } + else if (cellsBusy == 0 && cells == as && casCellsBusy()) { + boolean init = false; + try { // Initialize table + if (cells == as) { + Cell[] rs = new Cell[2]; + rs[h & 1] = new Cell(x); + cells = rs; + init = true; + } + } finally { + cellsBusy = 0; + } + if (init) + break; + } + else if (casBase(v = base, ((fn == null) ? v + x : + fn.operateAsLong(v, x)))) // ## rename when JDK8 syncs with lambda, applyAsLong + break; // Fall back on using base + } + hc.code = h; // Record index for next time + } + + /** + * Same as longAccumulate, but injecting long/double conversions + * in too many places to sensibly merge with long version, given + * the low-overhead requirements of this class. So must instead be + * maintained by copy/paste/adapt. + */ + final void doubleAccumulate(double x, CellHashCode hc, + DoubleBinaryOperator fn, + boolean wasUncontended) { + int h; + if (hc == null) { + hc = new CellHashCode(); + int s = cellHashCodeGenerator.addAndGet(SEED_INCREMENT); + h = hc.code = (s == 0) ? 1 : s; // Avoid zero + threadCellHashCode.set(hc); + } + else + h = hc.code; + boolean collide = false; // True if last slot nonempty + for (;;) { + Cell[] as; Cell a; int n; long v; + if ((as = cells) != null && (n = as.length) > 0) { + if ((a = as[(n - 1) & h]) == null) { + if (cellsBusy == 0) { // Try to attach new Cell + Cell r = new Cell(Double.doubleToRawLongBits(x)); + if (cellsBusy == 0 && casCellsBusy()) { + boolean created = false; + try { // Recheck under lock + Cell[] rs; int m, j; + if ((rs = cells) != null && + (m = rs.length) > 0 && + rs[j = (m - 1) & h] == null) { + rs[j] = r; + created = true; + } + } finally { + cellsBusy = 0; + } + if (created) + break; + continue; // Slot is now non-empty + } + } + collide = false; + } + else if (!wasUncontended) // CAS already known to fail + wasUncontended = true; // Continue after rehash + else if (a.cas(v = a.value, + ((fn == null) ? + Double.doubleToRawLongBits + (Double.longBitsToDouble(v) + x) : + Double.doubleToRawLongBits + (fn.operateAsDouble // ## rename when JDK8 syncs with lambda, applyAsDouble + (Double.longBitsToDouble(v), x))))) + break; + else if (n >= NCPU || cells != as) + collide = false; // At max size or stale + else if (!collide) + collide = true; + else if (cellsBusy == 0 && casCellsBusy()) { + try { + if (cells == as) { // Expand table unless stale + Cell[] rs = new Cell[n << 1]; + for (int i = 0; i < n; ++i) + rs[i] = as[i]; + cells = rs; + } + } finally { + cellsBusy = 0; + } + collide = false; + continue; // Retry with expanded table + } + h ^= h << 13; // Rehash + h ^= h >>> 17; + h ^= h << 5; + } + else if (cellsBusy == 0 && cells == as && casCellsBusy()) { + boolean init = false; + try { // Initialize table + if (cells == as) { + Cell[] rs = new Cell[2]; + rs[h & 1] = new Cell(Double.doubleToRawLongBits(x)); + cells = rs; + init = true; + } + } finally { + cellsBusy = 0; + } + if (init) + break; + } + else if (casBase(v = base, + ((fn == null) ? + Double.doubleToRawLongBits + (Double.longBitsToDouble(v) + x) : + Double.doubleToRawLongBits + (fn.operateAsDouble // ## rename when JDK8 syncs with lambda, applyAsDouble + (Double.longBitsToDouble(v), x))))) + break; // Fall back on using base + } + hc.code = h; // Record index for next time + } + + // Unsafe mechanics + private static final sun.misc.Unsafe UNSAFE; + private static final long BASE; + private static final long CELLSBUSY; + static { + try { + UNSAFE = sun.misc.Unsafe.getUnsafe(); + Class sk = Striped64.class; + BASE = UNSAFE.objectFieldOffset + (sk.getDeclaredField("base")); + CELLSBUSY = UNSAFE.objectFieldOffset + (sk.getDeclaredField("cellsBusy")); + } catch (Exception e) { + throw new Error(e); + } + } + +}