1 /*
   2  * Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package sun.nio.ch;
  27 
  28 import java.io.IOException;
  29 import java.util.LinkedList;
  30 import java.util.HashSet;
  31 
  32 /**
  33  * Manipulates a native array of epoll_event structs on Linux:
  34  *
  35  * typedef union epoll_data {
  36  *     void *ptr;
  37  *     int fd;
  38  *     __uint32_t u32;
  39  *     __uint64_t u64;
  40  *  } epoll_data_t;
  41  *
  42  * struct epoll_event {
  43  *     __uint32_t events;
  44  *     epoll_data_t data;
  45  * };
  46  *
  47  * The system call to wait for I/O events is epoll_wait(2). It populates an
  48  * array of epoll_event structures that are passed to the call. The data
  49  * member of the epoll_event structure contains the same data as was set
  50  * when the file descriptor was registered to epoll via epoll_ctl(2). In
  51  * this implementation we set data.fd to be the file descriptor that we
  52  * register. That way, we have the file descriptor available when we
  53  * process the events.
  54  *
  55  * All file descriptors registered with epoll have the POLLHUP and POLLERR
  56  * events enabled even when registered with an event set of 0. To ensure
  57  * that epoll_wait doesn't poll an idle file descriptor when the underlying
  58  * connection is closed or reset then its registration is deleted from
  59  * epoll (it will be re-added again if the event set is changed)
  60  */
  61 
  62 class EPollArrayWrapper {
  63     // EPOLL_EVENTS
  64     static final int EPOLLIN      = 0x001;
  65 
  66     // opcodes
  67     static final int EPOLL_CTL_ADD      = 1;
  68     static final int EPOLL_CTL_DEL      = 2;
  69     static final int EPOLL_CTL_MOD      = 3;
  70 
  71     // Miscellaneous constants
  72     static final int SIZE_EPOLLEVENT  = sizeofEPollEvent();
  73     static final int EVENT_OFFSET     = 0;
  74     static final int DATA_OFFSET      = offsetofData();
  75     static final int FD_OFFSET        = DATA_OFFSET;
  76     static final int NUM_EPOLLEVENTS  = Math.min(fdLimit(), 8192);
  77 
  78     // Base address of the native pollArray
  79     private final long pollArrayAddress;
  80 
  81     // Set of "idle" channels
  82     private final HashSet<SelChImpl> idleSet;
  83 
  84     EPollArrayWrapper() {
  85         // creates the epoll file descriptor
  86         epfd = epollCreate();
  87 
  88         // the epoll_event array passed to epoll_wait
  89         int allocationSize = NUM_EPOLLEVENTS * SIZE_EPOLLEVENT;
  90         pollArray = new AllocatedNativeObject(allocationSize, true);
  91         pollArrayAddress = pollArray.address();
  92 
  93         for (int i=0; i<NUM_EPOLLEVENTS; i++) {
  94             putEventOps(i, 0);
  95             putData(i, 0L);
  96         }
  97 
  98         // create idle set
  99         idleSet = new HashSet<SelChImpl>();
 100     }
 101 
 102     // Used to update file description registrations
 103     private static class Updator {
 104         SelChImpl channel;
 105         int opcode;
 106         int events;
 107         Updator(SelChImpl channel, int opcode, int events) {
 108             this.channel = channel;
 109             this.opcode = opcode;
 110             this.events = events;
 111         }
 112         Updator(SelChImpl channel, int opcode) {
 113             this(channel, opcode, 0);
 114         }
 115     }
 116 
 117     private LinkedList<Updator> updateList = new LinkedList<Updator>();
 118 
 119     // The epoll_event array for results from epoll_wait
 120     private AllocatedNativeObject pollArray;
 121 
 122     // The fd of the epoll driver
 123     final int epfd;
 124 
 125     // The fd of the interrupt line going out
 126     int outgoingInterruptFD;
 127 
 128     // The fd of the interrupt line coming in
 129     int incomingInterruptFD;
 130 
 131     // The index of the interrupt FD
 132     int interruptedIndex;
 133 
 134     // Number of updated pollfd entries
 135     int updated;
 136 
 137     void initInterrupt(int fd0, int fd1) {
 138         outgoingInterruptFD = fd1;
 139         incomingInterruptFD = fd0;
 140         epollCtl(epfd, EPOLL_CTL_ADD, fd0, EPOLLIN);
 141     }
 142 
 143     void putEventOps(int i, int event) {
 144         int offset = SIZE_EPOLLEVENT * i + EVENT_OFFSET;
 145         pollArray.putInt(offset, event);
 146     }
 147 
 148     void putData(int i, long value) {
 149         int offset = SIZE_EPOLLEVENT * i + DATA_OFFSET;
 150         pollArray.putLong(offset, value);
 151     }
 152 
 153     void putDescriptor(int i, int fd) {
 154         int offset = SIZE_EPOLLEVENT * i + FD_OFFSET;
 155         pollArray.putInt(offset, fd);
 156     }
 157 
 158     int getEventOps(int i) {
 159         int offset = SIZE_EPOLLEVENT * i + EVENT_OFFSET;
 160         return pollArray.getInt(offset);
 161     }
 162 
 163     int getDescriptor(int i) {
 164         int offset = SIZE_EPOLLEVENT * i + FD_OFFSET;
 165         return pollArray.getInt(offset);
 166     }
 167 
 168     /**
 169      * Update the events for a given channel.
 170      */
 171     void setInterest(SelChImpl channel, int mask) {
 172         synchronized (updateList) {
 173             // if the previous pending operation is to add this file descriptor
 174             // to epoll then update its event set
 175             if (updateList.size() > 0) {
 176                 Updator last = updateList.getLast();
 177                 if (last.channel == channel && last.opcode == EPOLL_CTL_ADD) {
 178                     last.events = mask;
 179                     return;
 180                 }
 181             }
 182 
 183             // update existing registration
 184             updateList.add(new Updator(channel, EPOLL_CTL_MOD, mask));
 185         }
 186     }
 187 
 188     /**
 189      * Add a channel's file descriptor to epoll
 190      */
 191     void add(SelChImpl channel) {
 192         synchronized (updateList) {
 193             updateList.add(new Updator(channel, EPOLL_CTL_ADD));
 194         }
 195     }
 196 
 197     /**
 198      * Remove a channel's file descriptor from epoll
 199      */
 200     void release(SelChImpl channel) {
 201         synchronized (updateList) {
 202             // flush any pending updates
 203             int i = 0;
 204             while (i < updateList.size()) {
 205                 if (updateList.get(i).channel == channel) {
 206                     updateList.remove(i);
 207                 } else {
 208                     i++;
 209                 }
 210             }
 211 
 212             // remove from the idle set (if present)
 213             idleSet.remove(channel);
 214 
 215             // remove from epoll (if registered)
 216             epollCtl(epfd, EPOLL_CTL_DEL, channel.getFDVal(), 0);
 217         }
 218     }
 219 
 220     /**
 221      * Close epoll file descriptor and free poll array
 222      */
 223     void closeEPollFD() throws IOException {
 224         FileDispatcherImpl.closeIntFD(epfd);
 225         pollArray.free();
 226     }
 227 
 228     int poll(long timeout) throws IOException {
 229         updateRegistrations();
 230         updated = epollWait(pollArrayAddress, NUM_EPOLLEVENTS, timeout, epfd);
 231         for (int i=0; i<updated; i++) {
 232             if (getDescriptor(i) == incomingInterruptFD) {
 233                 interruptedIndex = i;
 234                 interrupted = true;
 235                 break;
 236             }
 237         }
 238         return updated;
 239     }
 240 
 241     /**
 242      * Update the pending registrations.
 243      */
 244     void updateRegistrations() {
 245         synchronized (updateList) {
 246             Updator u = null;
 247             while ((u = updateList.poll()) != null) {
 248                 SelChImpl ch = u.channel;
 249                 if (!ch.isOpen())
 250                     continue;
 251 
 252                 // if the events are 0 then file descriptor is put into "idle
 253                 // set" to prevent it being polled
 254                 if (u.events == 0) {
 255                     boolean added = idleSet.add(u.channel);
 256                     // if added to idle set then remove from epoll if registered
 257                     if (added && (u.opcode == EPOLL_CTL_MOD))
 258                         epollCtl(epfd, EPOLL_CTL_DEL, ch.getFDVal(), 0);
 259                 } else {
 260                     // events are specified. If file descriptor was in idle set
 261                     // it must be re-registered (by converting opcode to ADD)
 262                     boolean idle = false;
 263                     if (!idleSet.isEmpty())
 264                         idle = idleSet.remove(u.channel);
 265                     int opcode = (idle) ? EPOLL_CTL_ADD : u.opcode;
 266                     epollCtl(epfd, opcode, ch.getFDVal(), u.events);
 267                 }
 268             }
 269         }
 270     }
 271 
 272     // interrupt support
 273     boolean interrupted = false;
 274 
 275     public void interrupt() {
 276         interrupt(outgoingInterruptFD);
 277     }
 278 
 279     public int interruptedIndex() {
 280         return interruptedIndex;
 281     }
 282 
 283     boolean interrupted() {
 284         return interrupted;
 285     }
 286 
 287     void clearInterrupted() {
 288         interrupted = false;
 289     }
 290 
 291     static {
 292         init();
 293     }
 294 
 295     private native int epollCreate();
 296     private native void epollCtl(int epfd, int opcode, int fd, int events);
 297     private native int epollWait(long pollAddress, int numfds, long timeout,
 298                                  int epfd) throws IOException;
 299     private static native int sizeofEPollEvent();
 300     private static native int offsetofData();
 301     private static native int fdLimit();
 302     private static native void interrupt(int fd);
 303     private static native void init();
 304 }