1 # 
   2 # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 # 
  23 
  24         
  25         # NOTE WELL!  The _Copy functions are called directly
  26         # from server-compiler-generated code via CallLeafNoFP,
  27         # which means that they *must* either not use floating
  28         # point or use it in the same manner as does the server
  29         # compiler.
  30         
  31         .globl _Copy_conjoint_bytes
  32         .globl _Copy_arrayof_conjoint_bytes
  33         .globl _Copy_conjoint_jshorts_atomic
  34         .globl _Copy_arrayof_conjoint_jshorts
  35         .globl _Copy_conjoint_jints_atomic
  36         .globl _Copy_arrayof_conjoint_jints
  37         .globl _Copy_conjoint_jlongs_atomic
  38         .globl _mmx_Copy_arrayof_conjoint_jshorts
  39 
  40         .globl _Atomic_cmpxchg_long
  41         .globl _Atomic_move_long
  42 
  43         .text
  44 
  45         .globl  SpinPause
  46         .type   SpinPause,@function
  47         .p2align 4,,15
  48 SpinPause:
  49         rep
  50         nop
  51         movl    $1, %eax
  52         ret
  53 
  54         # Support for void Copy::conjoint_bytes(void* from,
  55         #                                       void* to,
  56         #                                       size_t count)
  57         .p2align 4,,15
  58         .type    _Copy_conjoint_bytes,@function
  59 _Copy_conjoint_bytes:
  60         pushl    %esi
  61         movl     4+12(%esp),%ecx      # count
  62         pushl    %edi
  63         movl     8+ 4(%esp),%esi      # from
  64         movl     8+ 8(%esp),%edi      # to
  65         cmpl     %esi,%edi
  66         leal     -1(%esi,%ecx),%eax   # from + count - 1
  67         jbe      cb_CopyRight
  68         cmpl     %eax,%edi
  69         jbe      cb_CopyLeft
  70         # copy from low to high
  71 cb_CopyRight:
  72         cmpl     $3,%ecx
  73         jbe      5f                   # <= 3 bytes
  74         # align source address at dword address boundary
  75         movl     %ecx,%eax            # original count
  76         movl     $4,%ecx
  77         subl     %esi,%ecx
  78         andl     $3,%ecx              # prefix byte count
  79         jz       1f                   # no prefix
  80         subl     %ecx,%eax            # byte count less prefix
  81         # copy prefix
  82         subl     %esi,%edi
  83 0:      movb     (%esi),%dl
  84         movb     %dl,(%edi,%esi,1)
  85         addl     $1,%esi
  86         subl     $1,%ecx
  87         jnz      0b
  88         addl     %esi,%edi
  89 1:      movl     %eax,%ecx            # byte count less prefix
  90         shrl     $2,%ecx              # dword count
  91         jz       4f                   # no dwords to move
  92         cmpl     $32,%ecx
  93         jbe      2f                   # <= 32 dwords
  94         # copy aligned dwords
  95         rep;     smovl
  96         jmp      4f
  97         # copy aligned dwords
  98 2:      subl     %esi,%edi
  99         .p2align 4,,15
 100 3:      movl     (%esi),%edx
 101         movl     %edx,(%edi,%esi,1)
 102         addl     $4,%esi
 103         subl     $1,%ecx
 104         jnz      3b
 105         addl     %esi,%edi
 106 4:      movl     %eax,%ecx            # byte count less prefix
 107 5:      andl     $3,%ecx              # suffix byte count
 108         jz       7f                   # no suffix
 109         # copy suffix
 110         xorl     %eax,%eax
 111 6:      movb     (%esi,%eax,1),%dl
 112         movb     %dl,(%edi,%eax,1)
 113         addl     $1,%eax
 114         subl     $1,%ecx
 115         jnz      6b
 116 7:      popl     %edi
 117         popl     %esi
 118         ret
 119         # copy from high to low
 120 cb_CopyLeft:
 121         std
 122         leal     -4(%edi,%ecx),%edi   # to + count - 4
 123         movl     %eax,%esi            # from + count - 1
 124         movl     %ecx,%eax
 125         subl     $3,%esi              # from + count - 4
 126         cmpl     $3,%ecx
 127         jbe      5f                   # <= 3 bytes
 128 1:      shrl     $2,%ecx              # dword count
 129         jz       4f                   # no dwords to move
 130         cmpl     $32,%ecx
 131         ja       3f                   # > 32 dwords
 132         # copy dwords, aligned or not
 133         subl     %esi,%edi
 134         .p2align 4,,15
 135 2:      movl     (%esi),%edx
 136         movl     %edx,(%edi,%esi,1)
 137         subl     $4,%esi
 138         subl     $1,%ecx
 139         jnz      2b
 140         addl     %esi,%edi
 141         jmp      4f
 142         # copy dwords, aligned or not
 143 3:      rep;     smovl
 144 4:      movl     %eax,%ecx            # byte count
 145 5:      andl     $3,%ecx              # suffix byte count
 146         jz       7f                   # no suffix
 147         # copy suffix
 148         subl     %esi,%edi
 149         addl     $3,%esi
 150 6:      movb     (%esi),%dl
 151         movb     %dl,(%edi,%esi,1)
 152         subl     $1,%esi
 153         subl     $1,%ecx
 154         jnz      6b
 155 7:      cld
 156         popl     %edi
 157         popl     %esi
 158         ret
 159 
 160         # Support for void Copy::arrayof_conjoint_bytes(void* from,
 161         #                                               void* to,
 162         #                                               size_t count)
 163         #
 164         # Same as _Copy_conjoint_bytes, except no source alignment check.
 165         .p2align 4,,15
 166         .type    _Copy_arrayof_conjoint_bytes,@function
 167 _Copy_arrayof_conjoint_bytes:
 168         pushl    %esi
 169         movl     4+12(%esp),%ecx      # count
 170         pushl    %edi
 171         movl     8+ 4(%esp),%esi      # from
 172         movl     8+ 8(%esp),%edi      # to
 173         cmpl     %esi,%edi
 174         leal     -1(%esi,%ecx),%eax   # from + count - 1
 175         jbe      acb_CopyRight
 176         cmpl     %eax,%edi
 177         jbe      acb_CopyLeft 
 178         # copy from low to high
 179 acb_CopyRight:
 180         cmpl     $3,%ecx
 181         jbe      5f
 182 1:      movl     %ecx,%eax
 183         shrl     $2,%ecx
 184         jz       4f
 185         cmpl     $32,%ecx
 186         ja       3f
 187         # copy aligned dwords
 188         subl     %esi,%edi
 189         .p2align 4,,15
 190 2:      movl     (%esi),%edx
 191         movl     %edx,(%edi,%esi,1)
 192         addl     $4,%esi
 193         subl     $1,%ecx
 194         jnz      2b
 195         addl     %esi,%edi
 196         jmp      4f
 197         # copy aligned dwords
 198 3:      rep;     smovl
 199 4:      movl     %eax,%ecx
 200 5:      andl     $3,%ecx
 201         jz       7f
 202         # copy suffix
 203         xorl     %eax,%eax
 204 6:      movb     (%esi,%eax,1),%dl
 205         movb     %dl,(%edi,%eax,1)
 206         addl     $1,%eax
 207         subl     $1,%ecx
 208         jnz      6b
 209 7:      popl     %edi
 210         popl     %esi
 211         ret
 212 acb_CopyLeft:
 213         std
 214         leal     -4(%edi,%ecx),%edi   # to + count - 4
 215         movl     %eax,%esi            # from + count - 1
 216         movl     %ecx,%eax
 217         subl     $3,%esi              # from + count - 4
 218         cmpl     $3,%ecx
 219         jbe      5f
 220 1:      shrl     $2,%ecx
 221         jz       4f
 222         cmpl     $32,%ecx
 223         jbe      2f                   # <= 32 dwords
 224         rep;     smovl
 225         jmp      4f
 226         .space 8
 227 2:      subl     %esi,%edi
 228         .p2align 4,,15
 229 3:      movl     (%esi),%edx
 230         movl     %edx,(%edi,%esi,1)
 231         subl     $4,%esi
 232         subl     $1,%ecx
 233         jnz      3b
 234         addl     %esi,%edi
 235 4:      movl     %eax,%ecx
 236 5:      andl     $3,%ecx
 237         jz       7f
 238         subl     %esi,%edi
 239         addl     $3,%esi
 240 6:      movb     (%esi),%dl
 241         movb     %dl,(%edi,%esi,1)
 242         subl     $1,%esi
 243         subl     $1,%ecx
 244         jnz      6b
 245 7:      cld
 246         popl     %edi
 247         popl     %esi
 248         ret
 249 
 250         # Support for void Copy::conjoint_jshorts_atomic(void* from,
 251         #                                                void* to,
 252         #                                                size_t count)
 253         .p2align 4,,15
 254         .type    _Copy_conjoint_jshorts_atomic,@function
 255 _Copy_conjoint_jshorts_atomic:
 256         pushl    %esi
 257         movl     4+12(%esp),%ecx      # count
 258         pushl    %edi
 259         movl     8+ 4(%esp),%esi      # from
 260         movl     8+ 8(%esp),%edi      # to
 261         cmpl     %esi,%edi
 262         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 263         jbe      cs_CopyRight
 264         cmpl     %eax,%edi
 265         jbe      cs_CopyLeft 
 266         # copy from low to high
 267 cs_CopyRight:
 268         # align source address at dword address boundary
 269         movl     %esi,%eax            # original from
 270         andl     $3,%eax              # either 0 or 2
 271         jz       1f                   # no prefix
 272         # copy prefix
 273         subl     $1,%ecx
 274         jl       5f                   # zero count
 275         movw     (%esi),%dx
 276         movw     %dx,(%edi)
 277         addl     %eax,%esi            # %eax == 2
 278         addl     %eax,%edi
 279 1:      movl     %ecx,%eax            # word count less prefix
 280         sarl     %ecx                 # dword count
 281         jz       4f                   # no dwords to move
 282         cmpl     $32,%ecx
 283         jbe      2f                   # <= 32 dwords
 284         # copy aligned dwords
 285         rep;     smovl
 286         jmp      4f 
 287         # copy aligned dwords
 288 2:      subl     %esi,%edi
 289         .p2align 4,,15
 290 3:      movl     (%esi),%edx
 291         movl     %edx,(%edi,%esi,1)
 292         addl     $4,%esi
 293         subl     $1,%ecx
 294         jnz      3b
 295         addl     %esi,%edi
 296 4:      andl     $1,%eax              # suffix count
 297         jz       5f                   # no suffix
 298         # copy suffix
 299         movw     (%esi),%dx
 300         movw     %dx,(%edi)
 301 5:      popl     %edi
 302         popl     %esi
 303         ret
 304         # copy from high to low
 305 cs_CopyLeft:
 306         std
 307         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 308         movl     %eax,%esi            # from + count*2 - 2
 309         movl     %ecx,%eax
 310         subl     $2,%esi              # from + count*2 - 4
 311 1:      sarl     %ecx                 # dword count
 312         jz       4f                   # no dwords to move
 313         cmpl     $32,%ecx
 314         ja       3f                   # > 32 dwords
 315         subl     %esi,%edi
 316         .p2align 4,,15
 317 2:      movl     (%esi),%edx
 318         movl     %edx,(%edi,%esi,1)
 319         subl     $4,%esi
 320         subl     $1,%ecx
 321         jnz      2b
 322         addl     %esi,%edi
 323         jmp      4f
 324 3:      rep;     smovl
 325 4:      andl     $1,%eax              # suffix count
 326         jz       5f                   # no suffix
 327         # copy suffix
 328         addl     $2,%esi
 329         addl     $2,%edi
 330         movw     (%esi),%dx
 331         movw     %dx,(%edi)
 332 5:      cld
 333         popl     %edi
 334         popl     %esi
 335         ret
 336 
 337         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 338         #                                                 void* to,
 339         #                                                 size_t count)
 340         .p2align 4,,15
 341         .type    _Copy_arrayof_conjoint_jshorts,@function
 342 _Copy_arrayof_conjoint_jshorts:
 343         pushl    %esi
 344         movl     4+12(%esp),%ecx      # count
 345         pushl    %edi
 346         movl     8+ 4(%esp),%esi      # from
 347         movl     8+ 8(%esp),%edi      # to
 348         cmpl     %esi,%edi
 349         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 350         jbe      acs_CopyRight
 351         cmpl     %eax,%edi
 352         jbe      acs_CopyLeft 
 353 acs_CopyRight:
 354         movl     %ecx,%eax            # word count
 355         sarl     %ecx                 # dword count
 356         jz       4f                   # no dwords to move
 357         cmpl     $32,%ecx
 358         jbe      2f                   # <= 32 dwords
 359         # copy aligned dwords
 360         rep;     smovl
 361         jmp      4f 
 362         # copy aligned dwords
 363         .space 5
 364 2:      subl     %esi,%edi 
 365         .p2align 4,,15
 366 3:      movl     (%esi),%edx
 367         movl     %edx,(%edi,%esi,1)
 368         addl     $4,%esi
 369         subl     $1,%ecx
 370         jnz      3b
 371         addl     %esi,%edi
 372 4:      andl     $1,%eax              # suffix count
 373         jz       5f                   # no suffix
 374         # copy suffix
 375         movw     (%esi),%dx
 376         movw     %dx,(%edi)
 377 5:      popl     %edi
 378         popl     %esi
 379         ret
 380 acs_CopyLeft:
 381         std
 382         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 383         movl     %eax,%esi            # from + count*2 - 2
 384         movl     %ecx,%eax
 385         subl     $2,%esi              # from + count*2 - 4
 386         sarl     %ecx                 # dword count
 387         jz       4f                   # no dwords to move
 388         cmpl     $32,%ecx
 389         ja       3f                   # > 32 dwords
 390         subl     %esi,%edi
 391         .p2align 4,,15
 392 2:      movl     (%esi),%edx
 393         movl     %edx,(%edi,%esi,1)
 394         subl     $4,%esi
 395         subl     $1,%ecx
 396         jnz      2b
 397         addl     %esi,%edi
 398         jmp      4f
 399 3:      rep;     smovl
 400 4:      andl     $1,%eax              # suffix count
 401         jz       5f                   # no suffix
 402         # copy suffix
 403         addl     $2,%esi
 404         addl     $2,%edi
 405         movw     (%esi),%dx
 406         movw     %dx,(%edi)
 407 5:      cld
 408         popl     %edi
 409         popl     %esi
 410         ret
 411 
 412         # Support for void Copy::conjoint_jints_atomic(void* from,
 413         #                                              void* to,
 414         #                                              size_t count)
 415         # Equivalent to
 416         #   arrayof_conjoint_jints
 417         .p2align 4,,15
 418         .type    _Copy_conjoint_jints_atomic,@function
 419         .type    _Copy_arrayof_conjoint_jints,@function
 420 _Copy_conjoint_jints_atomic:
 421 _Copy_arrayof_conjoint_jints:
 422         pushl    %esi
 423         movl     4+12(%esp),%ecx      # count
 424         pushl    %edi
 425         movl     8+ 4(%esp),%esi      # from
 426         movl     8+ 8(%esp),%edi      # to
 427         cmpl     %esi,%edi
 428         leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
 429         jbe      ci_CopyRight
 430         cmpl     %eax,%edi
 431         jbe      ci_CopyLeft 
 432 ci_CopyRight:
 433         cmpl     $32,%ecx
 434         jbe      2f                   # <= 32 dwords
 435         rep;     smovl
 436         popl     %edi
 437         popl     %esi
 438         ret
 439         .space 10
 440 2:      subl     %esi,%edi
 441         jmp      4f
 442         .p2align 4,,15
 443 3:      movl     (%esi),%edx
 444         movl     %edx,(%edi,%esi,1)
 445         addl     $4,%esi
 446 4:      subl     $1,%ecx
 447         jge      3b
 448         popl     %edi
 449         popl     %esi
 450         ret
 451 ci_CopyLeft:
 452         std
 453         leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
 454         cmpl     $32,%ecx
 455         ja       4f                   # > 32 dwords
 456         subl     %eax,%edi            # eax == from + count*4 - 4
 457         jmp      3f
 458         .p2align 4,,15
 459 2:      movl     (%eax),%edx
 460         movl     %edx,(%edi,%eax,1)
 461         subl     $4,%eax
 462 3:      subl     $1,%ecx
 463         jge      2b
 464         cld
 465         popl     %edi
 466         popl     %esi
 467         ret
 468 4:      movl     %eax,%esi            # from + count*4 - 4
 469         rep;     smovl
 470         cld
 471         popl     %edi
 472         popl     %esi
 473         ret
 474         
 475         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
 476         #                                               jlong* to,
 477         #                                               size_t count)
 478         #
 479         # 32-bit
 480         #
 481         # count treated as signed
 482         #
 483         # if (from > to) {
 484         #   while (--count >= 0) {
 485         #     *to++ = *from++;
 486         #   }
 487         # } else {
 488         #   while (--count >= 0) {
 489         #     to[count] = from[count];
 490         #   }
 491         # }
 492         .p2align 4,,15
 493         .type    _Copy_conjoint_jlongs_atomic,@function
 494 _Copy_conjoint_jlongs_atomic:
 495         movl     4+8(%esp),%ecx       # count
 496         movl     4+0(%esp),%eax       # from
 497         movl     4+4(%esp),%edx       # to
 498         cmpl     %eax,%edx
 499         jae      cla_CopyLeft
 500 cla_CopyRight:
 501         subl     %eax,%edx
 502         jmp      2f
 503         .p2align 4,,15
 504 1:      fildll   (%eax)
 505         fistpll  (%edx,%eax,1)
 506         addl     $8,%eax
 507 2:      subl     $1,%ecx
 508         jge      1b
 509         ret
 510         .p2align 4,,15
 511 3:      fildll   (%eax,%ecx,8)
 512         fistpll  (%edx,%ecx,8)
 513 cla_CopyLeft:
 514         subl     $1,%ecx
 515         jge      3b
 516         ret
 517 
 518         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 519         #                                                 void* to,
 520         #                                                 size_t count)
 521         .p2align 4,,15
 522         .type    _mmx_Copy_arrayof_conjoint_jshorts,@function
 523 _mmx_Copy_arrayof_conjoint_jshorts:
 524         pushl    %esi
 525         movl     4+12(%esp),%ecx
 526         pushl    %edi
 527         movl     8+ 4(%esp),%esi
 528         movl     8+ 8(%esp),%edi
 529         cmpl     %esi,%edi
 530         leal     -2(%esi,%ecx,2),%eax
 531         jbe      mmx_acs_CopyRight
 532         cmpl     %eax,%edi
 533         jbe      mmx_acs_CopyLeft
 534 mmx_acs_CopyRight:
 535         movl     %ecx,%eax
 536         sarl     %ecx
 537         je       5f
 538         cmpl     $33,%ecx
 539         jae      3f
 540 1:      subl     %esi,%edi 
 541         .p2align 4,,15
 542 2:      movl     (%esi),%edx
 543         movl     %edx,(%edi,%esi,1)
 544         addl     $4,%esi
 545         subl     $1,%ecx
 546         jnz      2b
 547         addl     %esi,%edi
 548         jmp      5f 
 549 3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
 550         subl     $1,%ecx
 551 4:      .p2align 4,,15
 552         movq     0(%esi),%mm0
 553         addl     $64,%edi
 554         movq     8(%esi),%mm1
 555         subl     $16,%ecx
 556         movq     16(%esi),%mm2
 557         movq     %mm0,-64(%edi)
 558         movq     24(%esi),%mm0
 559         movq     %mm1,-56(%edi)
 560         movq     32(%esi),%mm1
 561         movq     %mm2,-48(%edi)
 562         movq     40(%esi),%mm2
 563         movq     %mm0,-40(%edi)
 564         movq     48(%esi),%mm0
 565         movq     %mm1,-32(%edi)
 566         movq     56(%esi),%mm1
 567         movq     %mm2,-24(%edi)
 568         movq     %mm0,-16(%edi)
 569         addl     $64,%esi
 570         movq     %mm1,-8(%edi)
 571         cmpl     $16,%ecx
 572         jge      4b
 573         emms
 574         testl    %ecx,%ecx
 575         ja       1b
 576 5:      andl     $1,%eax
 577         je       7f
 578 6:      movw     (%esi),%dx
 579         movw     %dx,(%edi)
 580 7:      popl     %edi
 581         popl     %esi
 582         ret
 583 mmx_acs_CopyLeft:
 584         std
 585         leal     -4(%edi,%ecx,2),%edi
 586         movl     %eax,%esi
 587         movl     %ecx,%eax
 588         subl     $2,%esi
 589         sarl     %ecx
 590         je       4f
 591         cmpl     $32,%ecx
 592         ja       3f
 593         subl     %esi,%edi
 594         .p2align 4,,15
 595 2:      movl     (%esi),%edx
 596         movl     %edx,(%edi,%esi,1)
 597         subl     $4,%esi
 598         subl     $1,%ecx
 599         jnz      2b
 600         addl     %esi,%edi
 601         jmp      4f
 602 3:      rep;     smovl
 603 4:      andl     $1,%eax
 604         je       6f
 605         addl     $2,%esi
 606         addl     $2,%edi
 607 5:      movw     (%esi),%dx
 608         movw     %dx,(%edi)
 609 6:      cld
 610         popl     %edi
 611         popl     %esi
 612         ret
 613 
 614 
 615         # Support for jlong Atomic::cmpxchg(jlong exchange_value,
 616         #                                   volatile jlong* dest,
 617         #                                   jlong compare_value)
 618         #
 619         .p2align 4,,15
 620         .type    _Atomic_cmpxchg_long,@function
 621 _Atomic_cmpxchg_long:
 622                                    #  8(%esp) : return PC
 623         pushl    %ebx              #  4(%esp) : old %ebx
 624         pushl    %edi              #  0(%esp) : old %edi
 625         movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
 626         movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
 627         movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
 628         movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
 629         movl     20(%esp), %edi    # 20(%esp) : dest
 630         lock cmpxchg8b (%edi)
 631         popl     %edi
 632         popl     %ebx
 633         ret
 634 
 635 
 636         # Support for jlong Atomic::load and Atomic::store.
 637         # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)
 638         .p2align 4,,15
 639         .type    _Atomic_move_long,@function
 640 _Atomic_move_long:
 641         movl     4(%esp), %eax   # src
 642         fildll    (%eax)
 643         movl     8(%esp), %eax   # dest
 644         fistpll   (%eax)
 645         ret
 646