1 # 
   2 # Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 # 
  23 
  24  
  25 #ifdef __APPLE__
  26 # Darwin uses _ prefixed global symbols
  27 #define SYMBOL(s) _ ## s
  28 #define ELF_TYPE(name, description)
  29 #else
  30 #define SYMBOL(s) s
  31 #define ELF_TYPE(name, description) .type name,description
  32 #endif
  33 
  34         .globl SYMBOL(fixcw)
  35         
  36         # NOTE WELL!  The _Copy functions are called directly
  37         # from server-compiler-generated code via CallLeafNoFP,
  38         # which means that they *must* either not use floating
  39         # point or use it in the same manner as does the server
  40         # compiler.
  41         
  42         .globl SYMBOL(_Copy_conjoint_bytes)
  43         .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
  44         .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
  45         .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
  46         .globl SYMBOL(_Copy_conjoint_jints_atomic)
  47         .globl SYMBOL(_Copy_arrayof_conjoint_jints)
  48         .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
  49         .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts)
  50 
  51         .globl SYMBOL(_Atomic_cmpxchg_long)
  52         .globl SYMBOL(_Atomic_move_long)
  53 
  54         .text
  55 
  56 # Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
  57 # Set fpu to 53 bit precision.  This happens too early to use a stub.
  58 # ported from solaris_x86_32.s
  59 #ifdef __APPLE__
  60         .align   4
  61 #else
  62         .align   16
  63 #endif
  64 SYMBOL(fixcw):
  65         pushl    $0x27f
  66         fldcw    0(%esp)
  67         popl     %eax
  68         ret
  69 
  70 #ifdef __APPLE__
  71         .align   4
  72 #else
  73         .align   16
  74 #endif
  75 
  76         .globl  SYMBOL(SafeFetch32), SYMBOL(Fetch32PFI), SYMBOL(Fetch32Resume)
  77         .globl  SYMBOL(SafeFetchN)
  78         ## TODO: avoid exposing Fetch32PFI and Fetch32Resume.
  79         ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP)
  80         ## routine to vet the address.  If the address is the faulting LD then
  81         ## SafeFetchTriage() would return the resume-at EIP, otherwise null.
  82         ELF_TYPE(SafeFetch32,@function)
  83         .p2align 4,,15
  84 SYMBOL(SafeFetch32):
  85 SYMBOL(SafeFetchN):
  86          movl    0x8(%esp), %eax
  87          movl    0x4(%esp), %ecx
  88 SYMBOL(Fetch32PFI):
  89          movl    (%ecx), %eax
  90 SYMBOL(Fetch32Resume):
  91          ret
  92 
  93 
  94         .globl  SYMBOL(SpinPause)
  95         ELF_TYPE(SpinPause,@function)
  96         .p2align 4,,15
  97 SYMBOL(SpinPause):
  98         rep
  99         nop
 100         movl    $1, %eax
 101         ret
 102 
 103         # Support for void Copy::conjoint_bytes(void* from,
 104         #                                       void* to,
 105         #                                       size_t count)
 106         .p2align 4,,15
 107         ELF_TYPE(_Copy_conjoint_bytes,@function)
 108 SYMBOL(_Copy_conjoint_bytes):
 109         pushl    %esi
 110         movl     4+12(%esp),%ecx      # count
 111         pushl    %edi
 112         movl     8+ 4(%esp),%esi      # from
 113         movl     8+ 8(%esp),%edi      # to
 114         cmpl     %esi,%edi
 115         leal     -1(%esi,%ecx),%eax   # from + count - 1
 116         jbe      cb_CopyRight
 117         cmpl     %eax,%edi
 118         jbe      cb_CopyLeft
 119         # copy from low to high
 120 cb_CopyRight:
 121         cmpl     $3,%ecx
 122         jbe      5f                   # <= 3 bytes
 123         # align source address at dword address boundary
 124         movl     %ecx,%eax            # original count
 125         movl     $4,%ecx
 126         subl     %esi,%ecx
 127         andl     $3,%ecx              # prefix byte count
 128         jz       1f                   # no prefix
 129         subl     %ecx,%eax            # byte count less prefix
 130         # copy prefix
 131         subl     %esi,%edi
 132 0:      movb     (%esi),%dl
 133         movb     %dl,(%edi,%esi,1)
 134         addl     $1,%esi
 135         subl     $1,%ecx
 136         jnz      0b
 137         addl     %esi,%edi
 138 1:      movl     %eax,%ecx            # byte count less prefix
 139         shrl     $2,%ecx              # dword count
 140         jz       4f                   # no dwords to move
 141         cmpl     $32,%ecx
 142         jbe      2f                   # <= 32 dwords
 143         # copy aligned dwords
 144         rep;     smovl
 145         jmp      4f
 146         # copy aligned dwords
 147 2:      subl     %esi,%edi
 148         .p2align 4,,15
 149 3:      movl     (%esi),%edx
 150         movl     %edx,(%edi,%esi,1)
 151         addl     $4,%esi
 152         subl     $1,%ecx
 153         jnz      3b
 154         addl     %esi,%edi
 155 4:      movl     %eax,%ecx            # byte count less prefix
 156 5:      andl     $3,%ecx              # suffix byte count
 157         jz       7f                   # no suffix
 158         # copy suffix
 159         xorl     %eax,%eax
 160 6:      movb     (%esi,%eax,1),%dl
 161         movb     %dl,(%edi,%eax,1)
 162         addl     $1,%eax
 163         subl     $1,%ecx
 164         jnz      6b
 165 7:      popl     %edi
 166         popl     %esi
 167         ret
 168         # copy from high to low
 169 cb_CopyLeft:
 170         std
 171         leal     -4(%edi,%ecx),%edi   # to + count - 4
 172         movl     %eax,%esi            # from + count - 1
 173         movl     %ecx,%eax
 174         subl     $3,%esi              # from + count - 4
 175         cmpl     $3,%ecx
 176         jbe      5f                   # <= 3 bytes
 177 1:      shrl     $2,%ecx              # dword count
 178         jz       4f                   # no dwords to move
 179         cmpl     $32,%ecx
 180         ja       3f                   # > 32 dwords
 181         # copy dwords, aligned or not
 182         subl     %esi,%edi
 183         .p2align 4,,15
 184 2:      movl     (%esi),%edx
 185         movl     %edx,(%edi,%esi,1)
 186         subl     $4,%esi
 187         subl     $1,%ecx
 188         jnz      2b
 189         addl     %esi,%edi
 190         jmp      4f
 191         # copy dwords, aligned or not
 192 3:      rep;     smovl
 193 4:      movl     %eax,%ecx            # byte count
 194 5:      andl     $3,%ecx              # suffix byte count
 195         jz       7f                   # no suffix
 196         # copy suffix
 197         subl     %esi,%edi
 198         addl     $3,%esi
 199 6:      movb     (%esi),%dl
 200         movb     %dl,(%edi,%esi,1)
 201         subl     $1,%esi
 202         subl     $1,%ecx
 203         jnz      6b
 204 7:      cld
 205         popl     %edi
 206         popl     %esi
 207         ret
 208 
 209         # Support for void Copy::arrayof_conjoint_bytes(void* from,
 210         #                                               void* to,
 211         #                                               size_t count)
 212         #
 213         # Same as _Copy_conjoint_bytes, except no source alignment check.
 214         .p2align 4,,15
 215         ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
 216 SYMBOL(_Copy_arrayof_conjoint_bytes):
 217         pushl    %esi
 218         movl     4+12(%esp),%ecx      # count
 219         pushl    %edi
 220         movl     8+ 4(%esp),%esi      # from
 221         movl     8+ 8(%esp),%edi      # to
 222         cmpl     %esi,%edi
 223         leal     -1(%esi,%ecx),%eax   # from + count - 1
 224         jbe      acb_CopyRight
 225         cmpl     %eax,%edi
 226         jbe      acb_CopyLeft 
 227         # copy from low to high
 228 acb_CopyRight:
 229         cmpl     $3,%ecx
 230         jbe      5f
 231 1:      movl     %ecx,%eax
 232         shrl     $2,%ecx
 233         jz       4f
 234         cmpl     $32,%ecx
 235         ja       3f
 236         # copy aligned dwords
 237         subl     %esi,%edi
 238         .p2align 4,,15
 239 2:      movl     (%esi),%edx
 240         movl     %edx,(%edi,%esi,1)
 241         addl     $4,%esi
 242         subl     $1,%ecx
 243         jnz      2b
 244         addl     %esi,%edi
 245         jmp      4f
 246         # copy aligned dwords
 247 3:      rep;     smovl
 248 4:      movl     %eax,%ecx
 249 5:      andl     $3,%ecx
 250         jz       7f
 251         # copy suffix
 252         xorl     %eax,%eax
 253 6:      movb     (%esi,%eax,1),%dl
 254         movb     %dl,(%edi,%eax,1)
 255         addl     $1,%eax
 256         subl     $1,%ecx
 257         jnz      6b
 258 7:      popl     %edi
 259         popl     %esi
 260         ret
 261 acb_CopyLeft:
 262         std
 263         leal     -4(%edi,%ecx),%edi   # to + count - 4
 264         movl     %eax,%esi            # from + count - 1
 265         movl     %ecx,%eax
 266         subl     $3,%esi              # from + count - 4
 267         cmpl     $3,%ecx
 268         jbe      5f
 269 1:      shrl     $2,%ecx
 270         jz       4f
 271         cmpl     $32,%ecx
 272         jbe      2f                   # <= 32 dwords
 273         rep;     smovl
 274         jmp      4f
 275         .=.+8
 276 2:      subl     %esi,%edi
 277         .p2align 4,,15
 278 3:      movl     (%esi),%edx
 279         movl     %edx,(%edi,%esi,1)
 280         subl     $4,%esi
 281         subl     $1,%ecx
 282         jnz      3b
 283         addl     %esi,%edi
 284 4:      movl     %eax,%ecx
 285 5:      andl     $3,%ecx
 286         jz       7f
 287         subl     %esi,%edi
 288         addl     $3,%esi
 289 6:      movb     (%esi),%dl
 290         movb     %dl,(%edi,%esi,1)
 291         subl     $1,%esi
 292         subl     $1,%ecx
 293         jnz      6b
 294 7:      cld
 295         popl     %edi
 296         popl     %esi
 297         ret
 298 
 299         # Support for void Copy::conjoint_jshorts_atomic(void* from,
 300         #                                                void* to,
 301         #                                                size_t count)
 302         .p2align 4,,15
 303         ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
 304 SYMBOL(_Copy_conjoint_jshorts_atomic):
 305         pushl    %esi
 306         movl     4+12(%esp),%ecx      # count
 307         pushl    %edi
 308         movl     8+ 4(%esp),%esi      # from
 309         movl     8+ 8(%esp),%edi      # to
 310         cmpl     %esi,%edi
 311         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 312         jbe      cs_CopyRight
 313         cmpl     %eax,%edi
 314         jbe      cs_CopyLeft 
 315         # copy from low to high
 316 cs_CopyRight:
 317         # align source address at dword address boundary
 318         movl     %esi,%eax            # original from
 319         andl     $3,%eax              # either 0 or 2
 320         jz       1f                   # no prefix
 321         # copy prefix
 322         subl     $1,%ecx
 323         jl       5f                   # zero count
 324         movw     (%esi),%dx
 325         movw     %dx,(%edi)
 326         addl     %eax,%esi            # %eax == 2
 327         addl     %eax,%edi
 328 1:      movl     %ecx,%eax            # word count less prefix
 329         sarl     %ecx                 # dword count
 330         jz       4f                   # no dwords to move
 331         cmpl     $32,%ecx
 332         jbe      2f                   # <= 32 dwords
 333         # copy aligned dwords
 334         rep;     smovl
 335         jmp      4f 
 336         # copy aligned dwords
 337 2:      subl     %esi,%edi
 338         .p2align 4,,15
 339 3:      movl     (%esi),%edx
 340         movl     %edx,(%edi,%esi,1)
 341         addl     $4,%esi
 342         subl     $1,%ecx
 343         jnz      3b
 344         addl     %esi,%edi
 345 4:      andl     $1,%eax              # suffix count
 346         jz       5f                   # no suffix
 347         # copy suffix
 348         movw     (%esi),%dx
 349         movw     %dx,(%edi)
 350 5:      popl     %edi
 351         popl     %esi
 352         ret
 353         # copy from high to low
 354 cs_CopyLeft:
 355         std
 356         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 357         movl     %eax,%esi            # from + count*2 - 2
 358         movl     %ecx,%eax
 359         subl     $2,%esi              # from + count*2 - 4
 360 1:      sarl     %ecx                 # dword count
 361         jz       4f                   # no dwords to move
 362         cmpl     $32,%ecx
 363         ja       3f                   # > 32 dwords
 364         subl     %esi,%edi
 365         .p2align 4,,15
 366 2:      movl     (%esi),%edx
 367         movl     %edx,(%edi,%esi,1)
 368         subl     $4,%esi
 369         subl     $1,%ecx
 370         jnz      2b
 371         addl     %esi,%edi
 372         jmp      4f
 373 3:      rep;     smovl
 374 4:      andl     $1,%eax              # suffix count
 375         jz       5f                   # no suffix
 376         # copy suffix
 377         addl     $2,%esi
 378         addl     $2,%edi
 379         movw     (%esi),%dx
 380         movw     %dx,(%edi)
 381 5:      cld
 382         popl     %edi
 383         popl     %esi
 384         ret
 385 
 386         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 387         #                                                 void* to,
 388         #                                                 size_t count)
 389         .p2align 4,,15
 390         ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
 391 SYMBOL(_Copy_arrayof_conjoint_jshorts):
 392         pushl    %esi
 393         movl     4+12(%esp),%ecx      # count
 394         pushl    %edi
 395         movl     8+ 4(%esp),%esi      # from
 396         movl     8+ 8(%esp),%edi      # to
 397         cmpl     %esi,%edi
 398         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 399         jbe      acs_CopyRight
 400         cmpl     %eax,%edi
 401         jbe      acs_CopyLeft 
 402 acs_CopyRight:
 403         movl     %ecx,%eax            # word count
 404         sarl     %ecx                 # dword count
 405         jz       4f                   # no dwords to move
 406         cmpl     $32,%ecx
 407         jbe      2f                   # <= 32 dwords
 408         # copy aligned dwords
 409         rep;     smovl
 410         jmp      4f 
 411         # copy aligned dwords
 412         .=.+5
 413 2:      subl     %esi,%edi 
 414         .p2align 4,,15
 415 3:      movl     (%esi),%edx
 416         movl     %edx,(%edi,%esi,1)
 417         addl     $4,%esi
 418         subl     $1,%ecx
 419         jnz      3b
 420         addl     %esi,%edi
 421 4:      andl     $1,%eax              # suffix count
 422         jz       5f                   # no suffix
 423         # copy suffix
 424         movw     (%esi),%dx
 425         movw     %dx,(%edi)
 426 5:      popl     %edi
 427         popl     %esi
 428         ret
 429 acs_CopyLeft:
 430         std
 431         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 432         movl     %eax,%esi            # from + count*2 - 2
 433         movl     %ecx,%eax
 434         subl     $2,%esi              # from + count*2 - 4
 435         sarl     %ecx                 # dword count
 436         jz       4f                   # no dwords to move
 437         cmpl     $32,%ecx
 438         ja       3f                   # > 32 dwords
 439         subl     %esi,%edi
 440         .p2align 4,,15
 441 2:      movl     (%esi),%edx
 442         movl     %edx,(%edi,%esi,1)
 443         subl     $4,%esi
 444         subl     $1,%ecx
 445         jnz      2b
 446         addl     %esi,%edi
 447         jmp      4f
 448 3:      rep;     smovl
 449 4:      andl     $1,%eax              # suffix count
 450         jz       5f                   # no suffix
 451         # copy suffix
 452         addl     $2,%esi
 453         addl     $2,%edi
 454         movw     (%esi),%dx
 455         movw     %dx,(%edi)
 456 5:      cld
 457         popl     %edi
 458         popl     %esi
 459         ret
 460 
 461         # Support for void Copy::conjoint_jints_atomic(void* from,
 462         #                                              void* to,
 463         #                                              size_t count)
 464         # Equivalent to
 465         #   arrayof_conjoint_jints
 466         .p2align 4,,15
 467         ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
 468         ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
 469 SYMBOL(_Copy_conjoint_jints_atomic):
 470 SYMBOL(_Copy_arrayof_conjoint_jints):
 471         pushl    %esi
 472         movl     4+12(%esp),%ecx      # count
 473         pushl    %edi
 474         movl     8+ 4(%esp),%esi      # from
 475         movl     8+ 8(%esp),%edi      # to
 476         cmpl     %esi,%edi
 477         leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
 478         jbe      ci_CopyRight
 479         cmpl     %eax,%edi
 480         jbe      ci_CopyLeft 
 481 ci_CopyRight:
 482         cmpl     $32,%ecx
 483         jbe      2f                   # <= 32 dwords
 484         rep;     smovl
 485         popl     %edi
 486         popl     %esi
 487         ret
 488         .=.+10
 489 2:      subl     %esi,%edi
 490         jmp      4f
 491         .p2align 4,,15
 492 3:      movl     (%esi),%edx
 493         movl     %edx,(%edi,%esi,1)
 494         addl     $4,%esi
 495 4:      subl     $1,%ecx
 496         jge      3b
 497         popl     %edi
 498         popl     %esi
 499         ret
 500 ci_CopyLeft:
 501         std
 502         leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
 503         cmpl     $32,%ecx
 504         ja       4f                   # > 32 dwords
 505         subl     %eax,%edi            # eax == from + count*4 - 4
 506         jmp      3f
 507         .p2align 4,,15
 508 2:      movl     (%eax),%edx
 509         movl     %edx,(%edi,%eax,1)
 510         subl     $4,%eax
 511 3:      subl     $1,%ecx
 512         jge      2b
 513         cld
 514         popl     %edi
 515         popl     %esi
 516         ret
 517 4:      movl     %eax,%esi            # from + count*4 - 4
 518         rep;     smovl
 519         cld
 520         popl     %edi
 521         popl     %esi
 522         ret
 523         
 524         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
 525         #                                               jlong* to,
 526         #                                               size_t count)
 527         #
 528         # 32-bit
 529         #
 530         # count treated as signed
 531         #
 532         # // if (from > to) {
 533         #   while (--count >= 0) {
 534         #     *to++ = *from++;
 535         #   }
 536         # } else {
 537         #   while (--count >= 0) {
 538         #     to[count] = from[count];
 539         #   }
 540         # }
 541         .p2align 4,,15
 542         ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
 543 SYMBOL(_Copy_conjoint_jlongs_atomic):
 544         movl     4+8(%esp),%ecx       # count
 545         movl     4+0(%esp),%eax       # from
 546         movl     4+4(%esp),%edx       # to
 547         cmpl     %eax,%edx
 548         jae      cla_CopyLeft
 549 cla_CopyRight:
 550         subl     %eax,%edx
 551         jmp      2f
 552         .p2align 4,,15
 553 1:      fildll   (%eax)
 554         fistpll  (%edx,%eax,1)
 555         addl     $8,%eax
 556 2:      subl     $1,%ecx
 557         jge      1b
 558         ret
 559         .p2align 4,,15
 560 3:      fildll   (%eax,%ecx,8)
 561         fistpll  (%edx,%ecx,8)
 562 cla_CopyLeft:
 563         subl     $1,%ecx
 564         jge      3b
 565         ret
 566 
 567         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 568         #                                                 void* to,
 569         #                                                 size_t count)
 570         .p2align 4,,15
 571         ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function)
 572 SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts):
 573         pushl    %esi
 574         movl     4+12(%esp),%ecx
 575         pushl    %edi
 576         movl     8+ 4(%esp),%esi
 577         movl     8+ 8(%esp),%edi
 578         cmpl     %esi,%edi
 579         leal     -2(%esi,%ecx,2),%eax
 580         jbe      mmx_acs_CopyRight
 581         cmpl     %eax,%edi
 582         jbe      mmx_acs_CopyLeft
 583 mmx_acs_CopyRight:
 584         movl     %ecx,%eax
 585         sarl     %ecx
 586         je       5f
 587         cmpl     $33,%ecx
 588         jae      3f
 589 1:      subl     %esi,%edi 
 590         .p2align 4,,15
 591 2:      movl     (%esi),%edx
 592         movl     %edx,(%edi,%esi,1)
 593         addl     $4,%esi
 594         subl     $1,%ecx
 595         jnz      2b
 596         addl     %esi,%edi
 597         jmp      5f 
 598 3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
 599         subl     $1,%ecx
 600 4:      .p2align 4,,15
 601         movq     0(%esi),%mm0
 602         addl     $64,%edi
 603         movq     8(%esi),%mm1
 604         subl     $16,%ecx
 605         movq     16(%esi),%mm2
 606         movq     %mm0,-64(%edi)
 607         movq     24(%esi),%mm0
 608         movq     %mm1,-56(%edi)
 609         movq     32(%esi),%mm1
 610         movq     %mm2,-48(%edi)
 611         movq     40(%esi),%mm2
 612         movq     %mm0,-40(%edi)
 613         movq     48(%esi),%mm0
 614         movq     %mm1,-32(%edi)
 615         movq     56(%esi),%mm1
 616         movq     %mm2,-24(%edi)
 617         movq     %mm0,-16(%edi)
 618         addl     $64,%esi
 619         movq     %mm1,-8(%edi)
 620         cmpl     $16,%ecx
 621         jge      4b
 622         emms
 623         testl    %ecx,%ecx
 624         ja       1b
 625 5:      andl     $1,%eax
 626         je       7f
 627 6:      movw     (%esi),%dx
 628         movw     %dx,(%edi)
 629 7:      popl     %edi
 630         popl     %esi
 631         ret
 632 mmx_acs_CopyLeft:
 633         std
 634         leal     -4(%edi,%ecx,2),%edi
 635         movl     %eax,%esi
 636         movl     %ecx,%eax
 637         subl     $2,%esi
 638         sarl     %ecx
 639         je       4f
 640         cmpl     $32,%ecx
 641         ja       3f
 642         subl     %esi,%edi
 643         .p2align 4,,15
 644 2:      movl     (%esi),%edx
 645         movl     %edx,(%edi,%esi,1)
 646         subl     $4,%esi
 647         subl     $1,%ecx
 648         jnz      2b
 649         addl     %esi,%edi
 650         jmp      4f
 651 3:      rep;     smovl
 652 4:      andl     $1,%eax
 653         je       6f
 654         addl     $2,%esi
 655         addl     $2,%edi
 656 5:      movw     (%esi),%dx
 657         movw     %dx,(%edi)
 658 6:      cld
 659         popl     %edi
 660         popl     %esi
 661         ret
 662 
 663 
 664         # Support for jlong Atomic::cmpxchg(jlong exchange_value,
 665         #                                   volatile jlong* dest,
 666         #                                   jlong compare_value,
 667         #                                   bool is_MP)
 668         #
 669         .p2align 4,,15
 670         ELF_TYPE(_Atomic_cmpxchg_long,@function)
 671 SYMBOL(_Atomic_cmpxchg_long):
 672                                    #  8(%esp) : return PC
 673         pushl    %ebx              #  4(%esp) : old %ebx
 674         pushl    %edi              #  0(%esp) : old %edi
 675         movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
 676         movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
 677         movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
 678         movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
 679         movl     20(%esp), %edi    # 20(%esp) : dest
 680         cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
 681         je       1f
 682         lock
 683 1:      cmpxchg8b (%edi)
 684         popl     %edi
 685         popl     %ebx
 686         ret
 687 
 688 
 689         # Support for jlong Atomic::load and Atomic::store.
 690         # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst)
 691         .p2align 4,,15
 692         ELF_TYPE(_Atomic_move_long,@function)
 693 SYMBOL(_Atomic_move_long):
 694         movl     4(%esp), %eax   # src
 695         fildll    (%eax)
 696         movl     8(%esp), %eax   # dest
 697         fistpll   (%eax)
 698         ret
 699