1 # 
   2 # Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 # 
  23 
  24  
  25 #ifdef __APPLE__
  26 # Darwin uses _ prefixed global symbols
  27 #define SYMBOL(s) _ ## s
  28 #define ELF_TYPE(name, description)
  29 #else
  30 #define SYMBOL(s) s
  31 #define ELF_TYPE(name, description) .type name,description
  32 #endif
  33 
  34         .globl SYMBOL(fixcw)
  35         
  36         # NOTE WELL!  The _Copy functions are called directly
  37         # from server-compiler-generated code via CallLeafNoFP,
  38         # which means that they *must* either not use floating
  39         # point or use it in the same manner as does the server
  40         # compiler.
  41         
  42         .globl SYMBOL(_Copy_conjoint_bytes)
  43         .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
  44         .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
  45         .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
  46         .globl SYMBOL(_Copy_conjoint_jints_atomic)
  47         .globl SYMBOL(_Copy_arrayof_conjoint_jints)
  48         .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
  49         .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts)
  50 
  51         .globl SYMBOL(_Atomic_cmpxchg_long)
  52         .globl SYMBOL(_Atomic_move_long)
  53 
  54         .text
  55 
  56 # Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
  57 # Set fpu to 53 bit precision.  This happens too early to use a stub.
  58 # ported from solaris_x86_32.s
  59         .p2align 4,,15
  60 SYMBOL(fixcw):
  61         pushl    $0x27f
  62         fldcw    0(%esp)
  63         popl     %eax
  64         ret
  65 
  66         .globl  SYMBOL(SafeFetch32), SYMBOL(Fetch32PFI), SYMBOL(Fetch32Resume)
  67         .globl  SYMBOL(SafeFetchN)
  68         ## TODO: avoid exposing Fetch32PFI and Fetch32Resume.
  69         ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP)
  70         ## routine to vet the address.  If the address is the faulting LD then
  71         ## SafeFetchTriage() would return the resume-at EIP, otherwise null.
  72         ELF_TYPE(SafeFetch32,@function)
  73         .p2align 4,,15
  74 SYMBOL(SafeFetch32):
  75 SYMBOL(SafeFetchN):
  76          movl    0x8(%esp), %eax
  77          movl    0x4(%esp), %ecx
  78 SYMBOL(Fetch32PFI):
  79          movl    (%ecx), %eax
  80 SYMBOL(Fetch32Resume):
  81          ret
  82 
  83 
  84         .globl  SYMBOL(SpinPause)
  85         ELF_TYPE(SpinPause,@function)
  86         .p2align 4,,15
  87 SYMBOL(SpinPause):
  88         rep
  89         nop
  90         movl    $1, %eax
  91         ret
  92 
  93         # Support for void Copy::conjoint_bytes(void* from,
  94         #                                       void* to,
  95         #                                       size_t count)
  96         .p2align 4,,15
  97         ELF_TYPE(_Copy_conjoint_bytes,@function)
  98 SYMBOL(_Copy_conjoint_bytes):
  99         pushl    %esi
 100         movl     4+12(%esp),%ecx      # count
 101         pushl    %edi
 102         movl     8+ 4(%esp),%esi      # from
 103         movl     8+ 8(%esp),%edi      # to
 104         cmpl     %esi,%edi
 105         leal     -1(%esi,%ecx),%eax   # from + count - 1
 106         jbe      cb_CopyRight
 107         cmpl     %eax,%edi
 108         jbe      cb_CopyLeft
 109         # copy from low to high
 110 cb_CopyRight:
 111         cmpl     $3,%ecx
 112         jbe      5f                   # <= 3 bytes
 113         # align source address at dword address boundary
 114         movl     %ecx,%eax            # original count
 115         movl     $4,%ecx
 116         subl     %esi,%ecx
 117         andl     $3,%ecx              # prefix byte count
 118         jz       1f                   # no prefix
 119         subl     %ecx,%eax            # byte count less prefix
 120         # copy prefix
 121         subl     %esi,%edi
 122 0:      movb     (%esi),%dl
 123         movb     %dl,(%edi,%esi,1)
 124         addl     $1,%esi
 125         subl     $1,%ecx
 126         jnz      0b
 127         addl     %esi,%edi
 128 1:      movl     %eax,%ecx            # byte count less prefix
 129         shrl     $2,%ecx              # dword count
 130         jz       4f                   # no dwords to move
 131         cmpl     $32,%ecx
 132         jbe      2f                   # <= 32 dwords
 133         # copy aligned dwords
 134         rep;     smovl
 135         jmp      4f
 136         # copy aligned dwords
 137 2:      subl     %esi,%edi
 138         .p2align 4,,15
 139 3:      movl     (%esi),%edx
 140         movl     %edx,(%edi,%esi,1)
 141         addl     $4,%esi
 142         subl     $1,%ecx
 143         jnz      3b
 144         addl     %esi,%edi
 145 4:      movl     %eax,%ecx            # byte count less prefix
 146 5:      andl     $3,%ecx              # suffix byte count
 147         jz       7f                   # no suffix
 148         # copy suffix
 149         xorl     %eax,%eax
 150 6:      movb     (%esi,%eax,1),%dl
 151         movb     %dl,(%edi,%eax,1)
 152         addl     $1,%eax
 153         subl     $1,%ecx
 154         jnz      6b
 155 7:      popl     %edi
 156         popl     %esi
 157         ret
 158         # copy from high to low
 159 cb_CopyLeft:
 160         std
 161         leal     -4(%edi,%ecx),%edi   # to + count - 4
 162         movl     %eax,%esi            # from + count - 1
 163         movl     %ecx,%eax
 164         subl     $3,%esi              # from + count - 4
 165         cmpl     $3,%ecx
 166         jbe      5f                   # <= 3 bytes
 167 1:      shrl     $2,%ecx              # dword count
 168         jz       4f                   # no dwords to move
 169         cmpl     $32,%ecx
 170         ja       3f                   # > 32 dwords
 171         # copy dwords, aligned or not
 172         subl     %esi,%edi
 173         .p2align 4,,15
 174 2:      movl     (%esi),%edx
 175         movl     %edx,(%edi,%esi,1)
 176         subl     $4,%esi
 177         subl     $1,%ecx
 178         jnz      2b
 179         addl     %esi,%edi
 180         jmp      4f
 181         # copy dwords, aligned or not
 182 3:      rep;     smovl
 183 4:      movl     %eax,%ecx            # byte count
 184 5:      andl     $3,%ecx              # suffix byte count
 185         jz       7f                   # no suffix
 186         # copy suffix
 187         subl     %esi,%edi
 188         addl     $3,%esi
 189 6:      movb     (%esi),%dl
 190         movb     %dl,(%edi,%esi,1)
 191         subl     $1,%esi
 192         subl     $1,%ecx
 193         jnz      6b
 194 7:      cld
 195         popl     %edi
 196         popl     %esi
 197         ret
 198 
 199         # Support for void Copy::arrayof_conjoint_bytes(void* from,
 200         #                                               void* to,
 201         #                                               size_t count)
 202         #
 203         # Same as _Copy_conjoint_bytes, except no source alignment check.
 204         .p2align 4,,15
 205         ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
 206 SYMBOL(_Copy_arrayof_conjoint_bytes):
 207         pushl    %esi
 208         movl     4+12(%esp),%ecx      # count
 209         pushl    %edi
 210         movl     8+ 4(%esp),%esi      # from
 211         movl     8+ 8(%esp),%edi      # to
 212         cmpl     %esi,%edi
 213         leal     -1(%esi,%ecx),%eax   # from + count - 1
 214         jbe      acb_CopyRight
 215         cmpl     %eax,%edi
 216         jbe      acb_CopyLeft 
 217         # copy from low to high
 218 acb_CopyRight:
 219         cmpl     $3,%ecx
 220         jbe      5f
 221 1:      movl     %ecx,%eax
 222         shrl     $2,%ecx
 223         jz       4f
 224         cmpl     $32,%ecx
 225         ja       3f
 226         # copy aligned dwords
 227         subl     %esi,%edi
 228         .p2align 4,,15
 229 2:      movl     (%esi),%edx
 230         movl     %edx,(%edi,%esi,1)
 231         addl     $4,%esi
 232         subl     $1,%ecx
 233         jnz      2b
 234         addl     %esi,%edi
 235         jmp      4f
 236         # copy aligned dwords
 237 3:      rep;     smovl
 238 4:      movl     %eax,%ecx
 239 5:      andl     $3,%ecx
 240         jz       7f
 241         # copy suffix
 242         xorl     %eax,%eax
 243 6:      movb     (%esi,%eax,1),%dl
 244         movb     %dl,(%edi,%eax,1)
 245         addl     $1,%eax
 246         subl     $1,%ecx
 247         jnz      6b
 248 7:      popl     %edi
 249         popl     %esi
 250         ret
 251 acb_CopyLeft:
 252         std
 253         leal     -4(%edi,%ecx),%edi   # to + count - 4
 254         movl     %eax,%esi            # from + count - 1
 255         movl     %ecx,%eax
 256         subl     $3,%esi              # from + count - 4
 257         cmpl     $3,%ecx
 258         jbe      5f
 259 1:      shrl     $2,%ecx
 260         jz       4f
 261         cmpl     $32,%ecx
 262         jbe      2f                   # <= 32 dwords
 263         rep;     smovl
 264         jmp      4f
 265         .=.+8
 266 2:      subl     %esi,%edi
 267         .p2align 4,,15
 268 3:      movl     (%esi),%edx
 269         movl     %edx,(%edi,%esi,1)
 270         subl     $4,%esi
 271         subl     $1,%ecx
 272         jnz      3b
 273         addl     %esi,%edi
 274 4:      movl     %eax,%ecx
 275 5:      andl     $3,%ecx
 276         jz       7f
 277         subl     %esi,%edi
 278         addl     $3,%esi
 279 6:      movb     (%esi),%dl
 280         movb     %dl,(%edi,%esi,1)
 281         subl     $1,%esi
 282         subl     $1,%ecx
 283         jnz      6b
 284 7:      cld
 285         popl     %edi
 286         popl     %esi
 287         ret
 288 
 289         # Support for void Copy::conjoint_jshorts_atomic(void* from,
 290         #                                                void* to,
 291         #                                                size_t count)
 292         .p2align 4,,15
 293         ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
 294 SYMBOL(_Copy_conjoint_jshorts_atomic):
 295         pushl    %esi
 296         movl     4+12(%esp),%ecx      # count
 297         pushl    %edi
 298         movl     8+ 4(%esp),%esi      # from
 299         movl     8+ 8(%esp),%edi      # to
 300         cmpl     %esi,%edi
 301         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 302         jbe      cs_CopyRight
 303         cmpl     %eax,%edi
 304         jbe      cs_CopyLeft 
 305         # copy from low to high
 306 cs_CopyRight:
 307         # align source address at dword address boundary
 308         movl     %esi,%eax            # original from
 309         andl     $3,%eax              # either 0 or 2
 310         jz       1f                   # no prefix
 311         # copy prefix
 312         subl     $1,%ecx
 313         jl       5f                   # zero count
 314         movw     (%esi),%dx
 315         movw     %dx,(%edi)
 316         addl     %eax,%esi            # %eax == 2
 317         addl     %eax,%edi
 318 1:      movl     %ecx,%eax            # word count less prefix
 319         sarl     %ecx                 # dword count
 320         jz       4f                   # no dwords to move
 321         cmpl     $32,%ecx
 322         jbe      2f                   # <= 32 dwords
 323         # copy aligned dwords
 324         rep;     smovl
 325         jmp      4f 
 326         # copy aligned dwords
 327 2:      subl     %esi,%edi
 328         .p2align 4,,15
 329 3:      movl     (%esi),%edx
 330         movl     %edx,(%edi,%esi,1)
 331         addl     $4,%esi
 332         subl     $1,%ecx
 333         jnz      3b
 334         addl     %esi,%edi
 335 4:      andl     $1,%eax              # suffix count
 336         jz       5f                   # no suffix
 337         # copy suffix
 338         movw     (%esi),%dx
 339         movw     %dx,(%edi)
 340 5:      popl     %edi
 341         popl     %esi
 342         ret
 343         # copy from high to low
 344 cs_CopyLeft:
 345         std
 346         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 347         movl     %eax,%esi            # from + count*2 - 2
 348         movl     %ecx,%eax
 349         subl     $2,%esi              # from + count*2 - 4
 350 1:      sarl     %ecx                 # dword count
 351         jz       4f                   # no dwords to move
 352         cmpl     $32,%ecx
 353         ja       3f                   # > 32 dwords
 354         subl     %esi,%edi
 355         .p2align 4,,15
 356 2:      movl     (%esi),%edx
 357         movl     %edx,(%edi,%esi,1)
 358         subl     $4,%esi
 359         subl     $1,%ecx
 360         jnz      2b
 361         addl     %esi,%edi
 362         jmp      4f
 363 3:      rep;     smovl
 364 4:      andl     $1,%eax              # suffix count
 365         jz       5f                   # no suffix
 366         # copy suffix
 367         addl     $2,%esi
 368         addl     $2,%edi
 369         movw     (%esi),%dx
 370         movw     %dx,(%edi)
 371 5:      cld
 372         popl     %edi
 373         popl     %esi
 374         ret
 375 
 376         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 377         #                                                 void* to,
 378         #                                                 size_t count)
 379         .p2align 4,,15
 380         ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
 381 SYMBOL(_Copy_arrayof_conjoint_jshorts):
 382         pushl    %esi
 383         movl     4+12(%esp),%ecx      # count
 384         pushl    %edi
 385         movl     8+ 4(%esp),%esi      # from
 386         movl     8+ 8(%esp),%edi      # to
 387         cmpl     %esi,%edi
 388         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 389         jbe      acs_CopyRight
 390         cmpl     %eax,%edi
 391         jbe      acs_CopyLeft 
 392 acs_CopyRight:
 393         movl     %ecx,%eax            # word count
 394         sarl     %ecx                 # dword count
 395         jz       4f                   # no dwords to move
 396         cmpl     $32,%ecx
 397         jbe      2f                   # <= 32 dwords
 398         # copy aligned dwords
 399         rep;     smovl
 400         jmp      4f 
 401         # copy aligned dwords
 402         .=.+5
 403 2:      subl     %esi,%edi 
 404         .p2align 4,,15
 405 3:      movl     (%esi),%edx
 406         movl     %edx,(%edi,%esi,1)
 407         addl     $4,%esi
 408         subl     $1,%ecx
 409         jnz      3b
 410         addl     %esi,%edi
 411 4:      andl     $1,%eax              # suffix count
 412         jz       5f                   # no suffix
 413         # copy suffix
 414         movw     (%esi),%dx
 415         movw     %dx,(%edi)
 416 5:      popl     %edi
 417         popl     %esi
 418         ret
 419 acs_CopyLeft:
 420         std
 421         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 422         movl     %eax,%esi            # from + count*2 - 2
 423         movl     %ecx,%eax
 424         subl     $2,%esi              # from + count*2 - 4
 425         sarl     %ecx                 # dword count
 426         jz       4f                   # no dwords to move
 427         cmpl     $32,%ecx
 428         ja       3f                   # > 32 dwords
 429         subl     %esi,%edi
 430         .p2align 4,,15
 431 2:      movl     (%esi),%edx
 432         movl     %edx,(%edi,%esi,1)
 433         subl     $4,%esi
 434         subl     $1,%ecx
 435         jnz      2b
 436         addl     %esi,%edi
 437         jmp      4f
 438 3:      rep;     smovl
 439 4:      andl     $1,%eax              # suffix count
 440         jz       5f                   # no suffix
 441         # copy suffix
 442         addl     $2,%esi
 443         addl     $2,%edi
 444         movw     (%esi),%dx
 445         movw     %dx,(%edi)
 446 5:      cld
 447         popl     %edi
 448         popl     %esi
 449         ret
 450 
 451         # Support for void Copy::conjoint_jints_atomic(void* from,
 452         #                                              void* to,
 453         #                                              size_t count)
 454         # Equivalent to
 455         #   arrayof_conjoint_jints
 456         .p2align 4,,15
 457         ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
 458         ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
 459 SYMBOL(_Copy_conjoint_jints_atomic):
 460 SYMBOL(_Copy_arrayof_conjoint_jints):
 461         pushl    %esi
 462         movl     4+12(%esp),%ecx      # count
 463         pushl    %edi
 464         movl     8+ 4(%esp),%esi      # from
 465         movl     8+ 8(%esp),%edi      # to
 466         cmpl     %esi,%edi
 467         leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
 468         jbe      ci_CopyRight
 469         cmpl     %eax,%edi
 470         jbe      ci_CopyLeft 
 471 ci_CopyRight:
 472         cmpl     $32,%ecx
 473         jbe      2f                   # <= 32 dwords
 474         rep;     smovl
 475         popl     %edi
 476         popl     %esi
 477         ret
 478         .=.+10
 479 2:      subl     %esi,%edi
 480         jmp      4f
 481         .p2align 4,,15
 482 3:      movl     (%esi),%edx
 483         movl     %edx,(%edi,%esi,1)
 484         addl     $4,%esi
 485 4:      subl     $1,%ecx
 486         jge      3b
 487         popl     %edi
 488         popl     %esi
 489         ret
 490 ci_CopyLeft:
 491         std
 492         leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
 493         cmpl     $32,%ecx
 494         ja       4f                   # > 32 dwords
 495         subl     %eax,%edi            # eax == from + count*4 - 4
 496         jmp      3f
 497         .p2align 4,,15
 498 2:      movl     (%eax),%edx
 499         movl     %edx,(%edi,%eax,1)
 500         subl     $4,%eax
 501 3:      subl     $1,%ecx
 502         jge      2b
 503         cld
 504         popl     %edi
 505         popl     %esi
 506         ret
 507 4:      movl     %eax,%esi            # from + count*4 - 4
 508         rep;     smovl
 509         cld
 510         popl     %edi
 511         popl     %esi
 512         ret
 513         
 514         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
 515         #                                               jlong* to,
 516         #                                               size_t count)
 517         #
 518         # 32-bit
 519         #
 520         # count treated as signed
 521         #
 522         # // if (from > to) {
 523         #   while (--count >= 0) {
 524         #     *to++ = *from++;
 525         #   }
 526         # } else {
 527         #   while (--count >= 0) {
 528         #     to[count] = from[count];
 529         #   }
 530         # }
 531         .p2align 4,,15
 532         ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
 533 SYMBOL(_Copy_conjoint_jlongs_atomic):
 534         movl     4+8(%esp),%ecx       # count
 535         movl     4+0(%esp),%eax       # from
 536         movl     4+4(%esp),%edx       # to
 537         cmpl     %eax,%edx
 538         jae      cla_CopyLeft
 539 cla_CopyRight:
 540         subl     %eax,%edx
 541         jmp      2f
 542         .p2align 4,,15
 543 1:      fildll   (%eax)
 544         fistpll  (%edx,%eax,1)
 545         addl     $8,%eax
 546 2:      subl     $1,%ecx
 547         jge      1b
 548         ret
 549         .p2align 4,,15
 550 3:      fildll   (%eax,%ecx,8)
 551         fistpll  (%edx,%ecx,8)
 552 cla_CopyLeft:
 553         subl     $1,%ecx
 554         jge      3b
 555         ret
 556 
 557         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 558         #                                                 void* to,
 559         #                                                 size_t count)
 560         .p2align 4,,15
 561         ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function)
 562 SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts):
 563         pushl    %esi
 564         movl     4+12(%esp),%ecx
 565         pushl    %edi
 566         movl     8+ 4(%esp),%esi
 567         movl     8+ 8(%esp),%edi
 568         cmpl     %esi,%edi
 569         leal     -2(%esi,%ecx,2),%eax
 570         jbe      mmx_acs_CopyRight
 571         cmpl     %eax,%edi
 572         jbe      mmx_acs_CopyLeft
 573 mmx_acs_CopyRight:
 574         movl     %ecx,%eax
 575         sarl     %ecx
 576         je       5f
 577         cmpl     $33,%ecx
 578         jae      3f
 579 1:      subl     %esi,%edi 
 580         .p2align 4,,15
 581 2:      movl     (%esi),%edx
 582         movl     %edx,(%edi,%esi,1)
 583         addl     $4,%esi
 584         subl     $1,%ecx
 585         jnz      2b
 586         addl     %esi,%edi
 587         jmp      5f 
 588 3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
 589         subl     $1,%ecx
 590 4:      .p2align 4,,15
 591         movq     0(%esi),%mm0
 592         addl     $64,%edi
 593         movq     8(%esi),%mm1
 594         subl     $16,%ecx
 595         movq     16(%esi),%mm2
 596         movq     %mm0,-64(%edi)
 597         movq     24(%esi),%mm0
 598         movq     %mm1,-56(%edi)
 599         movq     32(%esi),%mm1
 600         movq     %mm2,-48(%edi)
 601         movq     40(%esi),%mm2
 602         movq     %mm0,-40(%edi)
 603         movq     48(%esi),%mm0
 604         movq     %mm1,-32(%edi)
 605         movq     56(%esi),%mm1
 606         movq     %mm2,-24(%edi)
 607         movq     %mm0,-16(%edi)
 608         addl     $64,%esi
 609         movq     %mm1,-8(%edi)
 610         cmpl     $16,%ecx
 611         jge      4b
 612         emms
 613         testl    %ecx,%ecx
 614         ja       1b
 615 5:      andl     $1,%eax
 616         je       7f
 617 6:      movw     (%esi),%dx
 618         movw     %dx,(%edi)
 619 7:      popl     %edi
 620         popl     %esi
 621         ret
 622 mmx_acs_CopyLeft:
 623         std
 624         leal     -4(%edi,%ecx,2),%edi
 625         movl     %eax,%esi
 626         movl     %ecx,%eax
 627         subl     $2,%esi
 628         sarl     %ecx
 629         je       4f
 630         cmpl     $32,%ecx
 631         ja       3f
 632         subl     %esi,%edi
 633         .p2align 4,,15
 634 2:      movl     (%esi),%edx
 635         movl     %edx,(%edi,%esi,1)
 636         subl     $4,%esi
 637         subl     $1,%ecx
 638         jnz      2b
 639         addl     %esi,%edi
 640         jmp      4f
 641 3:      rep;     smovl
 642 4:      andl     $1,%eax
 643         je       6f
 644         addl     $2,%esi
 645         addl     $2,%edi
 646 5:      movw     (%esi),%dx
 647         movw     %dx,(%edi)
 648 6:      cld
 649         popl     %edi
 650         popl     %esi
 651         ret
 652 
 653 
 654         # Support for jlong Atomic::cmpxchg(jlong exchange_value,
 655         #                                   volatile jlong* dest,
 656         #                                   jlong compare_value,
 657         #                                   bool is_MP)
 658         #
 659         .p2align 4,,15
 660         ELF_TYPE(_Atomic_cmpxchg_long,@function)
 661 SYMBOL(_Atomic_cmpxchg_long):
 662                                    #  8(%esp) : return PC
 663         pushl    %ebx              #  4(%esp) : old %ebx
 664         pushl    %edi              #  0(%esp) : old %edi
 665         movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
 666         movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
 667         movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
 668         movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
 669         movl     20(%esp), %edi    # 20(%esp) : dest
 670         cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
 671         je       1f
 672         lock
 673 1:      cmpxchg8b (%edi)
 674         popl     %edi
 675         popl     %ebx
 676         ret
 677 
 678 
 679         # Support for jlong Atomic::load and Atomic::store.
 680         # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst)
 681         .p2align 4,,15
 682         ELF_TYPE(_Atomic_move_long,@function)
 683 SYMBOL(_Atomic_move_long):
 684         movl     4(%esp), %eax   # src
 685         fildll    (%eax)
 686         movl     8(%esp), %eax   # dest
 687         fistpll   (%eax)
 688         ret
 689