1 /*
   2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP
  26 #define OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP
  27 
  28 static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
  29 #ifdef AMD64
  30   (void)memmove(to, from, count * HeapWordSize);
  31 #else
  32   // Includes a zero-count check.
  33   intx temp = 0;
  34   __asm__ volatile("        testl   %6,%6         ;"
  35                    "        jz      7f            ;"
  36                    "        cmpl    %4,%5         ;"
  37                    "        leal    -4(%4,%6,4),%3;"
  38                    "        jbe     1f            ;"
  39                    "        cmpl    %7,%5         ;"
  40                    "        jbe     4f            ;"
  41                    "1:      cmpl    $32,%6        ;"
  42                    "        ja      3f            ;"
  43                    "        subl    %4,%1         ;"
  44                    "2:      movl    (%4),%3       ;"
  45                    "        movl    %7,(%5,%4,1)  ;"
  46                    "        addl    $4,%0         ;"
  47                    "        subl    $1,%2          ;"
  48                    "        jnz     2b            ;"
  49                    "        jmp     7f            ;"
  50                    "3:      rep;    smovl         ;"
  51                    "        jmp     7f            ;"
  52                    "4:      cmpl    $32,%2        ;"
  53                    "        movl    %7,%0         ;"
  54                    "        leal    -4(%5,%6,4),%1;"
  55                    "        ja      6f            ;"
  56                    "        subl    %4,%1         ;"
  57                    "5:      movl    (%4),%3       ;"
  58                    "        movl    %7,(%5,%4,1)  ;"
  59                    "        subl    $4,%0         ;"
  60                    "        subl    $1,%2          ;"
  61                    "        jnz     5b            ;"
  62                    "        jmp     7f            ;"
  63                    "6:      std                   ;"
  64                    "        rep;    smovl         ;"
  65                    "        cld                   ;"
  66                    "7:      nop                    "
  67                    : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
  68                    : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
  69                    : "memory", "flags");
  70 #endif // AMD64
  71 }
  72 
  73 static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
  74 #ifdef AMD64
  75   switch (count) {
  76   case 8:  to[7] = from[7];
  77   case 7:  to[6] = from[6];
  78   case 6:  to[5] = from[5];
  79   case 5:  to[4] = from[4];
  80   case 4:  to[3] = from[3];
  81   case 3:  to[2] = from[2];
  82   case 2:  to[1] = from[1];
  83   case 1:  to[0] = from[0];
  84   case 0:  break;
  85   default:
  86     (void)memcpy(to, from, count * HeapWordSize);
  87     break;
  88   }
  89 #else
  90   // Includes a zero-count check.
  91   intx temp = 0;
  92   __asm__ volatile("        testl   %6,%6       ;"
  93                    "        jz      3f          ;"
  94                    "        cmpl    $32,%6      ;"
  95                    "        ja      2f          ;"
  96                    "        subl    %4,%1       ;"
  97                    "1:      movl    (%4),%3     ;"
  98                    "        movl    %7,(%5,%4,1);"
  99                    "        addl    $4,%0       ;"
 100                    "        subl    $1,%2        ;"
 101                    "        jnz     1b          ;"
 102                    "        jmp     3f          ;"
 103                    "2:      rep;    smovl       ;"
 104                    "3:      nop                  "
 105                    : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
 106                    : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
 107                    : "memory", "cc");
 108 #endif // AMD64
 109 }
 110 
 111 static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
 112 #ifdef AMD64
 113   switch (count) {
 114   case 8:  to[7] = from[7];
 115   case 7:  to[6] = from[6];
 116   case 6:  to[5] = from[5];
 117   case 5:  to[4] = from[4];
 118   case 4:  to[3] = from[3];
 119   case 3:  to[2] = from[2];
 120   case 2:  to[1] = from[1];
 121   case 1:  to[0] = from[0];
 122   case 0:  break;
 123   default:
 124     while (count-- > 0) {
 125       *to++ = *from++;
 126     }
 127     break;
 128   }
 129 #else
 130   // pd_disjoint_words is word-atomic in this implementation.
 131   pd_disjoint_words(from, to, count);
 132 #endif // AMD64
 133 }
 134 
 135 static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
 136   pd_conjoint_words(from, to, count);
 137 }
 138 
 139 static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
 140   pd_disjoint_words(from, to, count);
 141 }
 142 
 143 static void pd_conjoint_bytes(void* from, void* to, size_t count) {
 144 #ifdef AMD64
 145   (void)memmove(to, from, count);
 146 #else
 147   // Includes a zero-count check.
 148   intx temp = 0;
 149   __asm__ volatile("        testl   %6,%6          ;"
 150                    "        jz      13f            ;"
 151                    "        cmpl    %4,%5          ;"
 152                    "        leal    -1(%4,%6),%3   ;"
 153                    "        jbe     1f             ;"
 154                    "        cmpl    %7,%5          ;"
 155                    "        jbe     8f             ;"
 156                    "1:      cmpl    $3,%6          ;"
 157                    "        jbe     6f             ;"
 158                    "        movl    %6,%3          ;"
 159                    "        movl    $4,%2          ;"
 160                    "        subl    %4,%2          ;"
 161                    "        andl    $3,%2          ;"
 162                    "        jz      2f             ;"
 163                    "        subl    %6,%3          ;"
 164                    "        rep;    smovb          ;"
 165                    "2:      movl    %7,%2          ;"
 166                    "        shrl    $2,%2          ;"
 167                    "        jz      5f             ;"
 168                    "        cmpl    $32,%2         ;"
 169                    "        ja      4f             ;"
 170                    "        subl    %4,%1          ;"
 171                    "3:      movl    (%4),%%edx     ;"
 172                    "        movl    %%edx,(%5,%4,1);"
 173                    "        addl    $4,%0          ;"
 174                    "        subl    $1,%2           ;"
 175                    "        jnz     3b             ;"
 176                    "        addl    %4,%1          ;"
 177                    "        jmp     5f             ;"
 178                    "4:      rep;    smovl          ;"
 179                    "5:      movl    %7,%2          ;"
 180                    "        andl    $3,%2          ;"
 181                    "        jz      13f            ;"
 182                    "6:      xorl    %7,%3          ;"
 183                    "7:      movb    (%4,%7,1),%%dl ;"
 184                    "        movb    %%dl,(%5,%7,1) ;"
 185                    "        addl    $1,%3          ;"
 186                    "        subl    $1,%2           ;"
 187                    "        jnz     7b             ;"
 188                    "        jmp     13f            ;"
 189                    "8:      std                    ;"
 190                    "        cmpl    $12,%2         ;"
 191                    "        ja      9f             ;"
 192                    "        movl    %7,%0          ;"
 193                    "        leal    -1(%6,%5),%1   ;"
 194                    "        jmp     11f            ;"
 195                    "9:      xchgl   %3,%2          ;"
 196                    "        movl    %6,%0          ;"
 197                    "        addl    $1,%2          ;"
 198                    "        leal    -1(%7,%5),%1   ;"
 199                    "        andl    $3,%2          ;"
 200                    "        jz      10f            ;"
 201                    "        subl    %6,%3          ;"
 202                    "        rep;    smovb          ;"
 203                    "10:     movl    %7,%2          ;"
 204                    "        subl    $3,%0          ;"
 205                    "        shrl    $2,%2          ;"
 206                    "        subl    $3,%1          ;"
 207                    "        rep;    smovl          ;"
 208                    "        andl    $3,%3          ;"
 209                    "        jz      12f            ;"
 210                    "        movl    %7,%2          ;"
 211                    "        addl    $3,%0          ;"
 212                    "        addl    $3,%1          ;"
 213                    "11:     rep;    smovb          ;"
 214                    "12:     cld                    ;"
 215                    "13:     nop                    ;"
 216                    : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
 217                    : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
 218                    : "memory", "flags", "%edx");
 219 #endif // AMD64
 220 }
 221 
 222 static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
 223   pd_conjoint_bytes(from, to, count);
 224 }
 225 
 226 static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
 227   _Copy_conjoint_jshorts_atomic(from, to, count);
 228 }
 229 
 230 static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
 231 #ifdef AMD64
 232   _Copy_conjoint_jints_atomic(from, to, count);
 233 #else
 234   assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
 235   // pd_conjoint_words is word-atomic in this implementation.
 236   pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
 237 #endif // AMD64
 238 }
 239 
 240 static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
 241 #ifdef AMD64
 242   _Copy_conjoint_jlongs_atomic(from, to, count);
 243 #else
 244   // Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't.
 245   if (from > to) {
 246     while (count-- > 0) {
 247       __asm__ volatile("fildll (%0); fistpll (%1)"
 248                        :
 249                        : "r" (from), "r" (to)
 250                        : "memory" );
 251       ++from;
 252       ++to;
 253     }
 254   } else {
 255     while (count-- > 0) {
 256       __asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)"
 257                        :
 258                        : "r" (from), "r" (to), "r" (count)
 259                        : "memory" );
 260     }
 261   }
 262 #endif // AMD64
 263 }
 264 
 265 static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
 266 #ifdef AMD64
 267   assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
 268   _Copy_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
 269 #else
 270   assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
 271   // pd_conjoint_words is word-atomic in this implementation.
 272   pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
 273 #endif // AMD64
 274 }
 275 
 276 static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
 277   _Copy_arrayof_conjoint_bytes(from, to, count);
 278 }
 279 
 280 static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
 281   _Copy_arrayof_conjoint_jshorts(from, to, count);
 282 }
 283 
 284 static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
 285 #ifdef AMD64
 286    _Copy_arrayof_conjoint_jints(from, to, count);
 287 #else
 288   pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
 289 #endif // AMD64
 290 }
 291 
 292 static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
 293 #ifdef AMD64
 294   _Copy_arrayof_conjoint_jlongs(from, to, count);
 295 #else
 296   pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
 297 #endif // AMD64
 298 }
 299 
 300 static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
 301 #ifdef AMD64
 302   assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
 303   _Copy_arrayof_conjoint_jlongs(from, to, count);
 304 #else
 305   pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
 306 #endif // AMD64
 307 }
 308 
 309 #endif // OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP