1 # 
   2 # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 # 
  23 
  24         
  25         # NOTE WELL!  The _Copy functions are called directly
  26         # from server-compiler-generated code via CallLeafNoFP,
  27         # which means that they *must* either not use floating
  28         # point or use it in the same manner as does the server
  29         # compiler.
  30         
  31         .globl _Copy_conjoint_bytes
  32         .globl _Copy_arrayof_conjoint_bytes
  33         .globl _Copy_conjoint_jshorts_atomic
  34         .globl _Copy_arrayof_conjoint_jshorts
  35         .globl _Copy_conjoint_jints_atomic
  36         .globl _Copy_arrayof_conjoint_jints
  37         .globl _Copy_conjoint_jlongs_atomic
  38         .globl _mmx_Copy_arrayof_conjoint_jshorts
  39 
  40         .globl _Atomic_cmpxchg_long
  41         .globl _Atomic_move_long
  42 
  43         .text
  44 
  45         # Support for void Copy::conjoint_bytes(void* from,
  46         #                                       void* to,
  47         #                                       size_t count)
  48         .p2align 4,,15
  49         .type    _Copy_conjoint_bytes,@function
  50 _Copy_conjoint_bytes:
  51         pushl    %esi
  52         movl     4+12(%esp),%ecx      # count
  53         pushl    %edi
  54         movl     8+ 4(%esp),%esi      # from
  55         movl     8+ 8(%esp),%edi      # to
  56         cmpl     %esi,%edi
  57         leal     -1(%esi,%ecx),%eax   # from + count - 1
  58         jbe      cb_CopyRight
  59         cmpl     %eax,%edi
  60         jbe      cb_CopyLeft
  61         # copy from low to high
  62 cb_CopyRight:
  63         cmpl     $3,%ecx
  64         jbe      5f                   # <= 3 bytes
  65         # align source address at dword address boundary
  66         movl     %ecx,%eax            # original count
  67         movl     $4,%ecx
  68         subl     %esi,%ecx
  69         andl     $3,%ecx              # prefix byte count
  70         jz       1f                   # no prefix
  71         subl     %ecx,%eax            # byte count less prefix
  72         # copy prefix
  73         subl     %esi,%edi
  74 0:      movb     (%esi),%dl
  75         movb     %dl,(%edi,%esi,1)
  76         addl     $1,%esi
  77         subl     $1,%ecx
  78         jnz      0b
  79         addl     %esi,%edi
  80 1:      movl     %eax,%ecx            # byte count less prefix
  81         shrl     $2,%ecx              # dword count
  82         jz       4f                   # no dwords to move
  83         cmpl     $32,%ecx
  84         jbe      2f                   # <= 32 dwords
  85         # copy aligned dwords
  86         rep;     smovl
  87         jmp      4f
  88         # copy aligned dwords
  89 2:      subl     %esi,%edi
  90         .p2align 4,,15
  91 3:      movl     (%esi),%edx
  92         movl     %edx,(%edi,%esi,1)
  93         addl     $4,%esi
  94         subl     $1,%ecx
  95         jnz      3b
  96         addl     %esi,%edi
  97 4:      movl     %eax,%ecx            # byte count less prefix
  98 5:      andl     $3,%ecx              # suffix byte count
  99         jz       7f                   # no suffix
 100         # copy suffix
 101         xorl     %eax,%eax
 102 6:      movb     (%esi,%eax,1),%dl
 103         movb     %dl,(%edi,%eax,1)
 104         addl     $1,%eax
 105         subl     $1,%ecx
 106         jnz      6b
 107 7:      popl     %edi
 108         popl     %esi
 109         ret
 110         # copy from high to low
 111 cb_CopyLeft:
 112         std
 113         leal     -4(%edi,%ecx),%edi   # to + count - 4
 114         movl     %eax,%esi            # from + count - 1
 115         movl     %ecx,%eax
 116         subl     $3,%esi              # from + count - 4
 117         cmpl     $3,%ecx
 118         jbe      5f                   # <= 3 bytes
 119 1:      shrl     $2,%ecx              # dword count
 120         jz       4f                   # no dwords to move
 121         cmpl     $32,%ecx
 122         ja       3f                   # > 32 dwords
 123         # copy dwords, aligned or not
 124         subl     %esi,%edi
 125         .p2align 4,,15
 126 2:      movl     (%esi),%edx
 127         movl     %edx,(%edi,%esi,1)
 128         subl     $4,%esi
 129         subl     $1,%ecx
 130         jnz      2b
 131         addl     %esi,%edi
 132         jmp      4f
 133         # copy dwords, aligned or not
 134 3:      rep;     smovl
 135 4:      movl     %eax,%ecx            # byte count
 136 5:      andl     $3,%ecx              # suffix byte count
 137         jz       7f                   # no suffix
 138         # copy suffix
 139         subl     %esi,%edi
 140         addl     $3,%esi
 141 6:      movb     (%esi),%dl
 142         movb     %dl,(%edi,%esi,1)
 143         subl     $1,%esi
 144         subl     $1,%ecx
 145         jnz      6b
 146 7:      cld
 147         popl     %edi
 148         popl     %esi
 149         ret
 150 
 151         # Support for void Copy::arrayof_conjoint_bytes(void* from,
 152         #                                               void* to,
 153         #                                               size_t count)
 154         #
 155         # Same as _Copy_conjoint_bytes, except no source alignment check.
 156         .p2align 4,,15
 157         .type    _Copy_arrayof_conjoint_bytes,@function
 158 _Copy_arrayof_conjoint_bytes:
 159         pushl    %esi
 160         movl     4+12(%esp),%ecx      # count
 161         pushl    %edi
 162         movl     8+ 4(%esp),%esi      # from
 163         movl     8+ 8(%esp),%edi      # to
 164         cmpl     %esi,%edi
 165         leal     -1(%esi,%ecx),%eax   # from + count - 1
 166         jbe      acb_CopyRight
 167         cmpl     %eax,%edi
 168         jbe      acb_CopyLeft 
 169         # copy from low to high
 170 acb_CopyRight:
 171         cmpl     $3,%ecx
 172         jbe      5f
 173 1:      movl     %ecx,%eax
 174         shrl     $2,%ecx
 175         jz       4f
 176         cmpl     $32,%ecx
 177         ja       3f
 178         # copy aligned dwords
 179         subl     %esi,%edi
 180         .p2align 4,,15
 181 2:      movl     (%esi),%edx
 182         movl     %edx,(%edi,%esi,1)
 183         addl     $4,%esi
 184         subl     $1,%ecx
 185         jnz      2b
 186         addl     %esi,%edi
 187         jmp      4f
 188         # copy aligned dwords
 189 3:      rep;     smovl
 190 4:      movl     %eax,%ecx
 191 5:      andl     $3,%ecx
 192         jz       7f
 193         # copy suffix
 194         xorl     %eax,%eax
 195 6:      movb     (%esi,%eax,1),%dl
 196         movb     %dl,(%edi,%eax,1)
 197         addl     $1,%eax
 198         subl     $1,%ecx
 199         jnz      6b
 200 7:      popl     %edi
 201         popl     %esi
 202         ret
 203 acb_CopyLeft:
 204         std
 205         leal     -4(%edi,%ecx),%edi   # to + count - 4
 206         movl     %eax,%esi            # from + count - 1
 207         movl     %ecx,%eax
 208         subl     $3,%esi              # from + count - 4
 209         cmpl     $3,%ecx
 210         jbe      5f
 211 1:      shrl     $2,%ecx
 212         jz       4f
 213         cmpl     $32,%ecx
 214         jbe      2f                   # <= 32 dwords
 215         rep;     smovl
 216         jmp      4f
 217         .space 8
 218 2:      subl     %esi,%edi
 219         .p2align 4,,15
 220 3:      movl     (%esi),%edx
 221         movl     %edx,(%edi,%esi,1)
 222         subl     $4,%esi
 223         subl     $1,%ecx
 224         jnz      3b
 225         addl     %esi,%edi
 226 4:      movl     %eax,%ecx
 227 5:      andl     $3,%ecx
 228         jz       7f
 229         subl     %esi,%edi
 230         addl     $3,%esi
 231 6:      movb     (%esi),%dl
 232         movb     %dl,(%edi,%esi,1)
 233         subl     $1,%esi
 234         subl     $1,%ecx
 235         jnz      6b
 236 7:      cld
 237         popl     %edi
 238         popl     %esi
 239         ret
 240 
 241         # Support for void Copy::conjoint_jshorts_atomic(void* from,
 242         #                                                void* to,
 243         #                                                size_t count)
 244         .p2align 4,,15
 245         .type    _Copy_conjoint_jshorts_atomic,@function
 246 _Copy_conjoint_jshorts_atomic:
 247         pushl    %esi
 248         movl     4+12(%esp),%ecx      # count
 249         pushl    %edi
 250         movl     8+ 4(%esp),%esi      # from
 251         movl     8+ 8(%esp),%edi      # to
 252         cmpl     %esi,%edi
 253         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 254         jbe      cs_CopyRight
 255         cmpl     %eax,%edi
 256         jbe      cs_CopyLeft 
 257         # copy from low to high
 258 cs_CopyRight:
 259         # align source address at dword address boundary
 260         movl     %esi,%eax            # original from
 261         andl     $3,%eax              # either 0 or 2
 262         jz       1f                   # no prefix
 263         # copy prefix
 264         subl     $1,%ecx
 265         jl       5f                   # zero count
 266         movw     (%esi),%dx
 267         movw     %dx,(%edi)
 268         addl     %eax,%esi            # %eax == 2
 269         addl     %eax,%edi
 270 1:      movl     %ecx,%eax            # word count less prefix
 271         sarl     %ecx                 # dword count
 272         jz       4f                   # no dwords to move
 273         cmpl     $32,%ecx
 274         jbe      2f                   # <= 32 dwords
 275         # copy aligned dwords
 276         rep;     smovl
 277         jmp      4f 
 278         # copy aligned dwords
 279 2:      subl     %esi,%edi
 280         .p2align 4,,15
 281 3:      movl     (%esi),%edx
 282         movl     %edx,(%edi,%esi,1)
 283         addl     $4,%esi
 284         subl     $1,%ecx
 285         jnz      3b
 286         addl     %esi,%edi
 287 4:      andl     $1,%eax              # suffix count
 288         jz       5f                   # no suffix
 289         # copy suffix
 290         movw     (%esi),%dx
 291         movw     %dx,(%edi)
 292 5:      popl     %edi
 293         popl     %esi
 294         ret
 295         # copy from high to low
 296 cs_CopyLeft:
 297         std
 298         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 299         movl     %eax,%esi            # from + count*2 - 2
 300         movl     %ecx,%eax
 301         subl     $2,%esi              # from + count*2 - 4
 302 1:      sarl     %ecx                 # dword count
 303         jz       4f                   # no dwords to move
 304         cmpl     $32,%ecx
 305         ja       3f                   # > 32 dwords
 306         subl     %esi,%edi
 307         .p2align 4,,15
 308 2:      movl     (%esi),%edx
 309         movl     %edx,(%edi,%esi,1)
 310         subl     $4,%esi
 311         subl     $1,%ecx
 312         jnz      2b
 313         addl     %esi,%edi
 314         jmp      4f
 315 3:      rep;     smovl
 316 4:      andl     $1,%eax              # suffix count
 317         jz       5f                   # no suffix
 318         # copy suffix
 319         addl     $2,%esi
 320         addl     $2,%edi
 321         movw     (%esi),%dx
 322         movw     %dx,(%edi)
 323 5:      cld
 324         popl     %edi
 325         popl     %esi
 326         ret
 327 
 328         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 329         #                                                 void* to,
 330         #                                                 size_t count)
 331         .p2align 4,,15
 332         .type    _Copy_arrayof_conjoint_jshorts,@function
 333 _Copy_arrayof_conjoint_jshorts:
 334         pushl    %esi
 335         movl     4+12(%esp),%ecx      # count
 336         pushl    %edi
 337         movl     8+ 4(%esp),%esi      # from
 338         movl     8+ 8(%esp),%edi      # to
 339         cmpl     %esi,%edi
 340         leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
 341         jbe      acs_CopyRight
 342         cmpl     %eax,%edi
 343         jbe      acs_CopyLeft 
 344 acs_CopyRight:
 345         movl     %ecx,%eax            # word count
 346         sarl     %ecx                 # dword count
 347         jz       4f                   # no dwords to move
 348         cmpl     $32,%ecx
 349         jbe      2f                   # <= 32 dwords
 350         # copy aligned dwords
 351         rep;     smovl
 352         jmp      4f 
 353         # copy aligned dwords
 354         .space 5
 355 2:      subl     %esi,%edi 
 356         .p2align 4,,15
 357 3:      movl     (%esi),%edx
 358         movl     %edx,(%edi,%esi,1)
 359         addl     $4,%esi
 360         subl     $1,%ecx
 361         jnz      3b
 362         addl     %esi,%edi
 363 4:      andl     $1,%eax              # suffix count
 364         jz       5f                   # no suffix
 365         # copy suffix
 366         movw     (%esi),%dx
 367         movw     %dx,(%edi)
 368 5:      popl     %edi
 369         popl     %esi
 370         ret
 371 acs_CopyLeft:
 372         std
 373         leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
 374         movl     %eax,%esi            # from + count*2 - 2
 375         movl     %ecx,%eax
 376         subl     $2,%esi              # from + count*2 - 4
 377         sarl     %ecx                 # dword count
 378         jz       4f                   # no dwords to move
 379         cmpl     $32,%ecx
 380         ja       3f                   # > 32 dwords
 381         subl     %esi,%edi
 382         .p2align 4,,15
 383 2:      movl     (%esi),%edx
 384         movl     %edx,(%edi,%esi,1)
 385         subl     $4,%esi
 386         subl     $1,%ecx
 387         jnz      2b
 388         addl     %esi,%edi
 389         jmp      4f
 390 3:      rep;     smovl
 391 4:      andl     $1,%eax              # suffix count
 392         jz       5f                   # no suffix
 393         # copy suffix
 394         addl     $2,%esi
 395         addl     $2,%edi
 396         movw     (%esi),%dx
 397         movw     %dx,(%edi)
 398 5:      cld
 399         popl     %edi
 400         popl     %esi
 401         ret
 402 
 403         # Support for void Copy::conjoint_jints_atomic(void* from,
 404         #                                              void* to,
 405         #                                              size_t count)
 406         # Equivalent to
 407         #   arrayof_conjoint_jints
 408         .p2align 4,,15
 409         .type    _Copy_conjoint_jints_atomic,@function
 410         .type    _Copy_arrayof_conjoint_jints,@function
 411 _Copy_conjoint_jints_atomic:
 412 _Copy_arrayof_conjoint_jints:
 413         pushl    %esi
 414         movl     4+12(%esp),%ecx      # count
 415         pushl    %edi
 416         movl     8+ 4(%esp),%esi      # from
 417         movl     8+ 8(%esp),%edi      # to
 418         cmpl     %esi,%edi
 419         leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
 420         jbe      ci_CopyRight
 421         cmpl     %eax,%edi
 422         jbe      ci_CopyLeft 
 423 ci_CopyRight:
 424         cmpl     $32,%ecx
 425         jbe      2f                   # <= 32 dwords
 426         rep;     smovl
 427         popl     %edi
 428         popl     %esi
 429         ret
 430         .space 10
 431 2:      subl     %esi,%edi
 432         jmp      4f
 433         .p2align 4,,15
 434 3:      movl     (%esi),%edx
 435         movl     %edx,(%edi,%esi,1)
 436         addl     $4,%esi
 437 4:      subl     $1,%ecx
 438         jge      3b
 439         popl     %edi
 440         popl     %esi
 441         ret
 442 ci_CopyLeft:
 443         std
 444         leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
 445         cmpl     $32,%ecx
 446         ja       4f                   # > 32 dwords
 447         subl     %eax,%edi            # eax == from + count*4 - 4
 448         jmp      3f
 449         .p2align 4,,15
 450 2:      movl     (%eax),%edx
 451         movl     %edx,(%edi,%eax,1)
 452         subl     $4,%eax
 453 3:      subl     $1,%ecx
 454         jge      2b
 455         cld
 456         popl     %edi
 457         popl     %esi
 458         ret
 459 4:      movl     %eax,%esi            # from + count*4 - 4
 460         rep;     smovl
 461         cld
 462         popl     %edi
 463         popl     %esi
 464         ret
 465         
 466         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
 467         #                                               jlong* to,
 468         #                                               size_t count)
 469         #
 470         # 32-bit
 471         #
 472         # count treated as signed
 473         #
 474         # if (from > to) {
 475         #   while (--count >= 0) {
 476         #     *to++ = *from++;
 477         #   }
 478         # } else {
 479         #   while (--count >= 0) {
 480         #     to[count] = from[count];
 481         #   }
 482         # }
 483         .p2align 4,,15
 484         .type    _Copy_conjoint_jlongs_atomic,@function
 485 _Copy_conjoint_jlongs_atomic:
 486         movl     4+8(%esp),%ecx       # count
 487         movl     4+0(%esp),%eax       # from
 488         movl     4+4(%esp),%edx       # to
 489         cmpl     %eax,%edx
 490         jae      cla_CopyLeft
 491 cla_CopyRight:
 492         subl     %eax,%edx
 493         jmp      2f
 494         .p2align 4,,15
 495 1:      fildll   (%eax)
 496         fistpll  (%edx,%eax,1)
 497         addl     $8,%eax
 498 2:      subl     $1,%ecx
 499         jge      1b
 500         ret
 501         .p2align 4,,15
 502 3:      fildll   (%eax,%ecx,8)
 503         fistpll  (%edx,%ecx,8)
 504 cla_CopyLeft:
 505         subl     $1,%ecx
 506         jge      3b
 507         ret
 508 
 509         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 510         #                                                 void* to,
 511         #                                                 size_t count)
 512         .p2align 4,,15
 513         .type    _mmx_Copy_arrayof_conjoint_jshorts,@function
 514 _mmx_Copy_arrayof_conjoint_jshorts:
 515         pushl    %esi
 516         movl     4+12(%esp),%ecx
 517         pushl    %edi
 518         movl     8+ 4(%esp),%esi
 519         movl     8+ 8(%esp),%edi
 520         cmpl     %esi,%edi
 521         leal     -2(%esi,%ecx,2),%eax
 522         jbe      mmx_acs_CopyRight
 523         cmpl     %eax,%edi
 524         jbe      mmx_acs_CopyLeft
 525 mmx_acs_CopyRight:
 526         movl     %ecx,%eax
 527         sarl     %ecx
 528         je       5f
 529         cmpl     $33,%ecx
 530         jae      3f
 531 1:      subl     %esi,%edi 
 532         .p2align 4,,15
 533 2:      movl     (%esi),%edx
 534         movl     %edx,(%edi,%esi,1)
 535         addl     $4,%esi
 536         subl     $1,%ecx
 537         jnz      2b
 538         addl     %esi,%edi
 539         jmp      5f 
 540 3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
 541         subl     $1,%ecx
 542 4:      .p2align 4,,15
 543         movq     0(%esi),%mm0
 544         addl     $64,%edi
 545         movq     8(%esi),%mm1
 546         subl     $16,%ecx
 547         movq     16(%esi),%mm2
 548         movq     %mm0,-64(%edi)
 549         movq     24(%esi),%mm0
 550         movq     %mm1,-56(%edi)
 551         movq     32(%esi),%mm1
 552         movq     %mm2,-48(%edi)
 553         movq     40(%esi),%mm2
 554         movq     %mm0,-40(%edi)
 555         movq     48(%esi),%mm0
 556         movq     %mm1,-32(%edi)
 557         movq     56(%esi),%mm1
 558         movq     %mm2,-24(%edi)
 559         movq     %mm0,-16(%edi)
 560         addl     $64,%esi
 561         movq     %mm1,-8(%edi)
 562         cmpl     $16,%ecx
 563         jge      4b
 564         emms
 565         testl    %ecx,%ecx
 566         ja       1b
 567 5:      andl     $1,%eax
 568         je       7f
 569 6:      movw     (%esi),%dx
 570         movw     %dx,(%edi)
 571 7:      popl     %edi
 572         popl     %esi
 573         ret
 574 mmx_acs_CopyLeft:
 575         std
 576         leal     -4(%edi,%ecx,2),%edi
 577         movl     %eax,%esi
 578         movl     %ecx,%eax
 579         subl     $2,%esi
 580         sarl     %ecx
 581         je       4f
 582         cmpl     $32,%ecx
 583         ja       3f
 584         subl     %esi,%edi
 585         .p2align 4,,15
 586 2:      movl     (%esi),%edx
 587         movl     %edx,(%edi,%esi,1)
 588         subl     $4,%esi
 589         subl     $1,%ecx
 590         jnz      2b
 591         addl     %esi,%edi
 592         jmp      4f
 593 3:      rep;     smovl
 594 4:      andl     $1,%eax
 595         je       6f
 596         addl     $2,%esi
 597         addl     $2,%edi
 598 5:      movw     (%esi),%dx
 599         movw     %dx,(%edi)
 600 6:      cld
 601         popl     %edi
 602         popl     %esi
 603         ret
 604 
 605 
 606         # Support for jlong Atomic::cmpxchg(jlong exchange_value,
 607         #                                   volatile jlong* dest,
 608         #                                   jlong compare_value)
 609         #
 610         .p2align 4,,15
 611         .type    _Atomic_cmpxchg_long,@function
 612 _Atomic_cmpxchg_long:
 613                                    #  8(%esp) : return PC
 614         pushl    %ebx              #  4(%esp) : old %ebx
 615         pushl    %edi              #  0(%esp) : old %edi
 616         movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
 617         movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
 618         movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
 619         movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
 620         movl     20(%esp), %edi    # 20(%esp) : dest
 621         lock cmpxchg8b (%edi)
 622         popl     %edi
 623         popl     %ebx
 624         ret
 625 
 626 
 627         # Support for jlong Atomic::load and Atomic::store.
 628         # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)
 629         .p2align 4,,15
 630         .type    _Atomic_move_long,@function
 631 _Atomic_move_long:
 632         movl     4(%esp), %eax   # src
 633         fildll    (%eax)
 634         movl     8(%esp), %eax   # dest
 635         fistpll   (%eax)
 636         ret
 637