1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 /*
  26  * This file is available under and governed by the GNU General Public
  27  * License version 2 only, as published by the Free Software Foundation.
  28  * However, the following notice accompanied the original version of this
  29  * file:
  30  *
  31  * Written by Doug Lea with assistance from members of JCP JSR-166
  32  * Expert Group and released to the public domain, as explained at
  33  * http://creativecommons.org/publicdomain/zero/1.0/
  34  */
  35 
  36 package java.util.concurrent.atomic;
  37 
  38 import java.lang.invoke.MethodHandles;
  39 import java.lang.invoke.VarHandle;
  40 import java.util.Arrays;
  41 import java.util.concurrent.ThreadLocalRandom;
  42 import java.util.function.DoubleBinaryOperator;
  43 import java.util.function.LongBinaryOperator;
  44 
  45 /**
  46  * A package-local class holding common representation and mechanics
  47  * for classes supporting dynamic striping on 64bit values. The class
  48  * extends Number so that concrete subclasses must publicly do so.
  49  */
  50 @SuppressWarnings("serial")
  51 abstract class Striped64 extends Number {
  52     /*
  53      * This class maintains a lazily-initialized table of atomically
  54      * updated variables, plus an extra "base" field. The table size
  55      * is a power of two. Indexing uses masked per-thread hash codes.
  56      * Nearly all declarations in this class are package-private,
  57      * accessed directly by subclasses.
  58      *
  59      * Table entries are of class Cell; a variant of AtomicLong padded
  60      * (via @Contended) to reduce cache contention. Padding is
  61      * overkill for most Atomics because they are usually irregularly
  62      * scattered in memory and thus don't interfere much with each
  63      * other. But Atomic objects residing in arrays will tend to be
  64      * placed adjacent to each other, and so will most often share
  65      * cache lines (with a huge negative performance impact) without
  66      * this precaution.
  67      *
  68      * In part because Cells are relatively large, we avoid creating
  69      * them until they are needed.  When there is no contention, all
  70      * updates are made to the base field.  Upon first contention (a
  71      * failed CAS on base update), the table is initialized to size 2.
  72      * The table size is doubled upon further contention until
  73      * reaching the nearest power of two greater than or equal to the
  74      * number of CPUS. Table slots remain empty (null) until they are
  75      * needed.
  76      *
  77      * A single spinlock ("cellsBusy") is used for initializing and
  78      * resizing the table, as well as populating slots with new Cells.
  79      * There is no need for a blocking lock; when the lock is not
  80      * available, threads try other slots (or the base).  During these
  81      * retries, there is increased contention and reduced locality,
  82      * which is still better than alternatives.
  83      *
  84      * The Thread probe fields maintained via ThreadLocalRandom serve
  85      * as per-thread hash codes. We let them remain uninitialized as
  86      * zero (if they come in this way) until they contend at slot
  87      * 0. They are then initialized to values that typically do not
  88      * often conflict with others.  Contention and/or table collisions
  89      * are indicated by failed CASes when performing an update
  90      * operation. Upon a collision, if the table size is less than
  91      * the capacity, it is doubled in size unless some other thread
  92      * holds the lock. If a hashed slot is empty, and lock is
  93      * available, a new Cell is created. Otherwise, if the slot
  94      * exists, a CAS is tried.  Retries proceed by "double hashing",
  95      * using a secondary hash (Marsaglia XorShift) to try to find a
  96      * free slot.
  97      *
  98      * The table size is capped because, when there are more threads
  99      * than CPUs, supposing that each thread were bound to a CPU,
 100      * there would exist a perfect hash function mapping threads to
 101      * slots that eliminates collisions. When we reach capacity, we
 102      * search for this mapping by randomly varying the hash codes of
 103      * colliding threads.  Because search is random, and collisions
 104      * only become known via CAS failures, convergence can be slow,
 105      * and because threads are typically not bound to CPUS forever,
 106      * may not occur at all. However, despite these limitations,
 107      * observed contention rates are typically low in these cases.
 108      *
 109      * It is possible for a Cell to become unused when threads that
 110      * once hashed to it terminate, as well as in the case where
 111      * doubling the table causes no thread to hash to it under
 112      * expanded mask.  We do not try to detect or remove such cells,
 113      * under the assumption that for long-running instances, observed
 114      * contention levels will recur, so the cells will eventually be
 115      * needed again; and for short-lived ones, it does not matter.
 116      */
 117 
 118     /**
 119      * Padded variant of AtomicLong supporting only raw accesses plus CAS.
 120      *
 121      * JVM intrinsics note: It would be possible to use a release-only
 122      * form of CAS here, if it were provided.
 123      */
 124     @jdk.internal.vm.annotation.Contended static final class Cell {
 125         volatile long value;
 126         Cell(long x) { value = x; }
 127         final boolean cas(long cmp, long val) {
 128             return VALUE.compareAndSet(this, cmp, val);
 129         }
 130         final void reset() {
 131             VALUE.setVolatile(this, 0L);
 132         }
 133         final void reset(long identity) {
 134             VALUE.setVolatile(this, identity);
 135         }
 136         final long getAndSet(long val) {
 137             return (long)VALUE.getAndSet(this, val);
 138         }
 139 
 140         // VarHandle mechanics
 141         private static final VarHandle VALUE;
 142         static {
 143             try {
 144                 MethodHandles.Lookup l = MethodHandles.lookup();
 145                 VALUE = l.findVarHandle(Cell.class, "value", long.class);
 146             } catch (ReflectiveOperationException e) {
 147                 throw new ExceptionInInitializerError(e);
 148             }
 149         }
 150     }
 151 
 152     /** Number of CPUS, to place bound on table size */
 153     static final int NCPU = Runtime.getRuntime().availableProcessors();
 154 
 155     /**
 156      * Table of cells. When non-null, size is a power of 2.
 157      */
 158     transient volatile Cell[] cells;
 159 
 160     /**
 161      * Base value, used mainly when there is no contention, but also as
 162      * a fallback during table initialization races. Updated via CAS.
 163      */
 164     transient volatile long base;
 165 
 166     /**
 167      * Spinlock (locked via CAS) used when resizing and/or creating Cells.
 168      */
 169     transient volatile int cellsBusy;
 170 
 171     /**
 172      * Package-private default constructor.
 173      */
 174     Striped64() {
 175     }
 176 
 177     /**
 178      * CASes the base field.
 179      */
 180     final boolean casBase(long cmp, long val) {
 181         return BASE.compareAndSet(this, cmp, val);
 182     }
 183 
 184     final long getAndSetBase(long val) {
 185         return (long)BASE.getAndSet(this, val);
 186     }
 187 
 188     /**
 189      * CASes the cellsBusy field from 0 to 1 to acquire lock.
 190      */
 191     final boolean casCellsBusy() {
 192         return CELLSBUSY.compareAndSet(this, 0, 1);
 193     }
 194 
 195     /**
 196      * Returns the probe value for the current thread.
 197      * Duplicated from ThreadLocalRandom because of packaging restrictions.
 198      */
 199     static final int getProbe() {
 200         return (int) THREAD_PROBE.get(Thread.currentThread());
 201     }
 202 
 203     /**
 204      * Pseudo-randomly advances and records the given probe value for the
 205      * given thread.
 206      * Duplicated from ThreadLocalRandom because of packaging restrictions.
 207      */
 208     static final int advanceProbe(int probe) {
 209         probe ^= probe << 13;   // xorshift
 210         probe ^= probe >>> 17;
 211         probe ^= probe << 5;
 212         THREAD_PROBE.set(Thread.currentThread(), probe);
 213         return probe;
 214     }
 215 
 216     /**
 217      * Handles cases of updates involving initialization, resizing,
 218      * creating new Cells, and/or contention. See above for
 219      * explanation. This method suffers the usual non-modularity
 220      * problems of optimistic retry code, relying on rechecked sets of
 221      * reads.
 222      *
 223      * @param x the value
 224      * @param fn the update function, or null for add (this convention
 225      * avoids the need for an extra field or function in LongAdder).
 226      * @param wasUncontended false if CAS failed before call
 227      */
 228     final void longAccumulate(long x, LongBinaryOperator fn,
 229                               boolean wasUncontended) {
 230         int h;
 231         if ((h = getProbe()) == 0) {
 232             ThreadLocalRandom.current(); // force initialization
 233             h = getProbe();
 234             wasUncontended = true;
 235         }
 236         boolean collide = false;                // True if last slot nonempty
 237         done: for (;;) {
 238             Cell[] cs; Cell c; int n; long v;
 239             if ((cs = cells) != null && (n = cs.length) > 0) {
 240                 if ((c = cs[(n - 1) & h]) == null) {
 241                     if (cellsBusy == 0) {       // Try to attach new Cell
 242                         Cell r = new Cell(x);   // Optimistically create
 243                         if (cellsBusy == 0 && casCellsBusy()) {
 244                             try {               // Recheck under lock
 245                                 Cell[] rs; int m, j;
 246                                 if ((rs = cells) != null &&
 247                                     (m = rs.length) > 0 &&
 248                                     rs[j = (m - 1) & h] == null) {
 249                                     rs[j] = r;
 250                                     break done;
 251                                 }
 252                             } finally {
 253                                 cellsBusy = 0;
 254                             }
 255                             continue;           // Slot is now non-empty
 256                         }
 257                     }
 258                     collide = false;
 259                 }
 260                 else if (!wasUncontended)       // CAS already known to fail
 261                     wasUncontended = true;      // Continue after rehash
 262                 else if (c.cas(v = c.value,
 263                                (fn == null) ? v + x : fn.applyAsLong(v, x)))
 264                     break;
 265                 else if (n >= NCPU || cells != cs)
 266                     collide = false;            // At max size or stale
 267                 else if (!collide)
 268                     collide = true;
 269                 else if (cellsBusy == 0 && casCellsBusy()) {
 270                     try {
 271                         if (cells == cs)        // Expand table unless stale
 272                             cells = Arrays.copyOf(cs, n << 1);
 273                     } finally {
 274                         cellsBusy = 0;
 275                     }
 276                     collide = false;
 277                     continue;                   // Retry with expanded table
 278                 }
 279                 h = advanceProbe(h);
 280             }
 281             else if (cellsBusy == 0 && cells == cs && casCellsBusy()) {
 282                 try {                           // Initialize table
 283                     if (cells == cs) {
 284                         Cell[] rs = new Cell[2];
 285                         rs[h & 1] = new Cell(x);
 286                         cells = rs;
 287                         break done;
 288                     }
 289                 } finally {
 290                     cellsBusy = 0;
 291                 }
 292             }
 293             // Fall back on using base
 294             else if (casBase(v = base,
 295                              (fn == null) ? v + x : fn.applyAsLong(v, x)))
 296                 break done;
 297         }
 298     }
 299 
 300     private static long apply(DoubleBinaryOperator fn, long v, double x) {
 301         double d = Double.longBitsToDouble(v);
 302         d = (fn == null) ? d + x : fn.applyAsDouble(d, x);
 303         return Double.doubleToRawLongBits(d);
 304     }
 305 
 306     /**
 307      * Same as longAccumulate, but injecting long/double conversions
 308      * in too many places to sensibly merge with long version, given
 309      * the low-overhead requirements of this class. So must instead be
 310      * maintained by copy/paste/adapt.
 311      */
 312     final void doubleAccumulate(double x, DoubleBinaryOperator fn,
 313                                 boolean wasUncontended) {
 314         int h;
 315         if ((h = getProbe()) == 0) {
 316             ThreadLocalRandom.current(); // force initialization
 317             h = getProbe();
 318             wasUncontended = true;
 319         }
 320         boolean collide = false;                // True if last slot nonempty
 321         done: for (;;) {
 322             Cell[] cs; Cell c; int n; long v;
 323             if ((cs = cells) != null && (n = cs.length) > 0) {
 324                 if ((c = cs[(n - 1) & h]) == null) {
 325                     if (cellsBusy == 0) {       // Try to attach new Cell
 326                         Cell r = new Cell(Double.doubleToRawLongBits(x));
 327                         if (cellsBusy == 0 && casCellsBusy()) {
 328                             try {               // Recheck under lock
 329                                 Cell[] rs; int m, j;
 330                                 if ((rs = cells) != null &&
 331                                     (m = rs.length) > 0 &&
 332                                     rs[j = (m - 1) & h] == null) {
 333                                     rs[j] = r;
 334                                     break done;
 335                                 }
 336                             } finally {
 337                                 cellsBusy = 0;
 338                             }
 339                             continue;           // Slot is now non-empty
 340                         }
 341                     }
 342                     collide = false;
 343                 }
 344                 else if (!wasUncontended)       // CAS already known to fail
 345                     wasUncontended = true;      // Continue after rehash
 346                 else if (c.cas(v = c.value, apply(fn, v, x)))
 347                     break;
 348                 else if (n >= NCPU || cells != cs)
 349                     collide = false;            // At max size or stale
 350                 else if (!collide)
 351                     collide = true;
 352                 else if (cellsBusy == 0 && casCellsBusy()) {
 353                     try {
 354                         if (cells == cs)        // Expand table unless stale
 355                             cells = Arrays.copyOf(cs, n << 1);
 356                     } finally {
 357                         cellsBusy = 0;
 358                     }
 359                     collide = false;
 360                     continue;                   // Retry with expanded table
 361                 }
 362                 h = advanceProbe(h);
 363             }
 364             else if (cellsBusy == 0 && cells == cs && casCellsBusy()) {
 365                 try {                           // Initialize table
 366                     if (cells == cs) {
 367                         Cell[] rs = new Cell[2];
 368                         rs[h & 1] = new Cell(Double.doubleToRawLongBits(x));
 369                         cells = rs;
 370                         break done;
 371                     }
 372                 } finally {
 373                     cellsBusy = 0;
 374                 }
 375             }
 376             // Fall back on using base
 377             else if (casBase(v = base, apply(fn, v, x)))
 378                 break done;
 379         }
 380     }
 381 
 382     // VarHandle mechanics
 383     private static final VarHandle BASE;
 384     private static final VarHandle CELLSBUSY;
 385     private static final VarHandle THREAD_PROBE;
 386     static {
 387         try {
 388             MethodHandles.Lookup l = MethodHandles.lookup();
 389             BASE = l.findVarHandle(Striped64.class,
 390                     "base", long.class);
 391             CELLSBUSY = l.findVarHandle(Striped64.class,
 392                     "cellsBusy", int.class);
 393             l = java.security.AccessController.doPrivileged(
 394                     new java.security.PrivilegedAction<>() {
 395                         public MethodHandles.Lookup run() {
 396                             try {
 397                                 return MethodHandles.privateLookupIn(Thread.class, MethodHandles.lookup());
 398                             } catch (ReflectiveOperationException e) {
 399                                 throw new ExceptionInInitializerError(e);
 400                             }
 401                         }});
 402             THREAD_PROBE = l.findVarHandle(Thread.class,
 403                     "threadLocalRandomProbe", int.class);
 404         } catch (ReflectiveOperationException e) {
 405             throw new ExceptionInInitializerError(e);
 406         }
 407     }
 408 
 409 }