1 //
   2 // Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 
  24         .globl fixcw
  25         .globl sse_check
  26         .globl sse_unavailable
  27         .globl gs_load
  28         .globl gs_thread
  29         .globl _Atomic_cmpxchg_long_gcc
  30 
  31         // NOTE WELL!  The _Copy functions are called directly
  32         // from server-compiler-generated code via CallLeafNoFP,
  33         // which means that they *must* either not use floating
  34         // point or use it in the same manner as does the server
  35         // compiler.
  36 
  37         .globl _Copy_conjoint_bytes
  38         .globl _Copy_arrayof_conjoint_bytes
  39         .globl _Copy_conjoint_jshorts_atomic
  40         .globl _Copy_arrayof_conjoint_jshorts
  41         .globl _Copy_conjoint_jints_atomic
  42         .globl _Copy_arrayof_conjoint_jints
  43         .globl _Copy_conjoint_jlongs_atomic
  44         .globl _mmx_Copy_arrayof_conjoint_jshorts
  45 
  46         .section .text,"ax"
  47 
  48 / Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
  49 / Set fpu to 53 bit precision.  This happens too early to use a stub.
  50         .align   16
  51 fixcw:
  52         pushl    $0x27f
  53         fldcw    0(%esp)
  54         popl     %eax
  55         ret
  56 
  57         .align  16
  58         .globl  SpinPause
  59 SpinPause:
  60         rep 
  61         nop
  62         movl    $1, %eax
  63         ret
  64 
  65 
  66 / Test SSE availability, used by os_solaris_i486.cpp
  67         .align   16
  68 sse_check:
  69         / Fault if SSE not available
  70         xorps %xmm0,%xmm0
  71         / No fault
  72         movl $1,%eax
  73         ret
  74         / Signal handler continues here if SSE is not available
  75 sse_unavailable:
  76         xorl %eax,%eax
  77         ret
  78 
  79 / Fast thread accessors, used by threadLS_solaris_i486.cpp
  80         .align   16
  81 gs_load:
  82         movl 4(%esp),%ecx
  83         movl %gs:(%ecx),%eax
  84         ret
  85 
  86         .align   16
  87 gs_thread:
  88         movl %gs:0x0,%eax
  89         ret
  90 
  91         / Support for void Copy::conjoint_bytes(void* from,
  92         /                                       void* to,
  93         /                                       size_t count)
  94         .align   16
  95 _Copy_conjoint_bytes:
  96         pushl    %esi
  97         movl     4+12(%esp),%ecx      / count
  98         pushl    %edi
  99         movl     8+ 4(%esp),%esi      / from
 100         movl     8+ 8(%esp),%edi      / to
 101         cmpl     %esi,%edi
 102         leal     -1(%esi,%ecx),%eax   / from + count - 1
 103         jbe      cb_CopyRight
 104         cmpl     %eax,%edi
 105         jbe      cb_CopyLeft
 106         / copy from low to high
 107 cb_CopyRight:
 108         cmpl     $3,%ecx
 109         jbe      5f                   / <= 3 bytes
 110         / align source address at dword address boundary
 111         movl     %ecx,%eax            / original count
 112         movl     $4,%ecx
 113         subl     %esi,%ecx
 114         andl     $3,%ecx              / prefix byte count
 115         jz       1f                   / no prefix
 116         subl     %ecx,%eax            / byte count less prefix
 117         / copy prefix
 118         subl     %esi,%edi
 119 0:      movb     (%esi),%dl
 120         movb     %dl,(%edi,%esi,1)
 121         addl     $1,%esi
 122         subl     $1,%ecx
 123         jnz      0b
 124         addl     %esi,%edi
 125 1:      movl     %eax,%ecx            / byte count less prefix
 126         shrl     $2,%ecx              / dword count
 127         jz       4f                   / no dwords to move
 128         cmpl     $32,%ecx
 129         jbe      2f                   / <= 32 dwords
 130         / copy aligned dwords
 131         rep;     smovl
 132         jmp      4f
 133         / copy aligned dwords
 134 2:      subl     %esi,%edi
 135         .align   16
 136 3:      movl     (%esi),%edx
 137         movl     %edx,(%edi,%esi,1)
 138         addl     $4,%esi
 139         subl     $1,%ecx
 140         jnz      3b
 141         addl     %esi,%edi
 142 4:      movl     %eax,%ecx            / byte count less prefix
 143 5:      andl     $3,%ecx              / suffix byte count
 144         jz       7f                   / no suffix
 145         / copy suffix
 146         xorl     %eax,%eax
 147 6:      movb     (%esi,%eax,1),%dl
 148         movb     %dl,(%edi,%eax,1)
 149         addl     $1,%eax
 150         subl     $1,%ecx
 151         jnz      6b
 152 7:      popl     %edi
 153         popl     %esi
 154         ret
 155         / copy from high to low
 156 cb_CopyLeft:
 157         std
 158         leal     -4(%edi,%ecx),%edi   / to + count - 4
 159         movl     %eax,%esi            / from + count - 1
 160         movl     %ecx,%eax
 161         subl     $3,%esi              / from + count - 4
 162         cmpl     $3,%ecx
 163         jbe      5f                   / <= 3 bytes
 164 1:      shrl     $2,%ecx              / dword count
 165         jz       4f                   / no dwords to move
 166         cmpl     $32,%ecx
 167         ja       3f                   / > 32 dwords
 168         / copy dwords, aligned or not
 169         subl     %esi,%edi
 170         .align   16
 171 2:      movl     (%esi),%edx
 172         movl     %edx,(%edi,%esi,1)
 173         subl     $4,%esi
 174         subl     $1,%ecx
 175         jnz      2b
 176         addl     %esi,%edi
 177         jmp      4f
 178         / copy dwords, aligned or not
 179 3:      rep;     smovl
 180 4:      movl     %eax,%ecx            / byte count
 181 5:      andl     $3,%ecx              / suffix byte count
 182         jz       7f                   / no suffix
 183         / copy suffix
 184         subl     %esi,%edi
 185         addl     $3,%esi
 186 6:      movb     (%esi),%dl
 187         movb     %dl,(%edi,%esi,1)
 188         subl     $1,%esi
 189         subl     $1,%ecx
 190         jnz      6b
 191 7:      cld
 192         popl     %edi
 193         popl     %esi
 194         ret
 195 
 196         / Support for void Copy::arrayof_conjoint_bytes(void* from,
 197         /                                               void* to,
 198         /                                               size_t count)
 199         /
 200         / Same as _Copy_conjoint_bytes, except no source alignment check.
 201         .align   16
 202 _Copy_arrayof_conjoint_bytes:
 203         pushl    %esi
 204         movl     4+12(%esp),%ecx      / count
 205         pushl    %edi
 206         movl     8+ 4(%esp),%esi      / from
 207         movl     8+ 8(%esp),%edi      / to
 208         cmpl     %esi,%edi
 209         leal     -1(%esi,%ecx),%eax   / from + count - 1
 210         jbe      acb_CopyRight
 211         cmpl     %eax,%edi
 212         jbe      acb_CopyLeft 
 213         / copy from low to high
 214 acb_CopyRight:
 215         cmpl     $3,%ecx
 216         jbe      5f
 217 1:      movl     %ecx,%eax
 218         shrl     $2,%ecx
 219         jz       4f
 220         cmpl     $32,%ecx
 221         ja       3f
 222         / copy aligned dwords
 223         subl     %esi,%edi
 224         .align   16
 225 2:      movl     (%esi),%edx
 226         movl     %edx,(%edi,%esi,1)
 227         addl     $4,%esi
 228         subl     $1,%ecx
 229         jnz      2b
 230         addl     %esi,%edi
 231         jmp      4f
 232         / copy aligned dwords
 233 3:      rep;     smovl
 234 4:      movl     %eax,%ecx
 235 5:      andl     $3,%ecx
 236         jz       7f
 237         / copy suffix
 238         xorl     %eax,%eax
 239 6:      movb     (%esi,%eax,1),%dl
 240         movb     %dl,(%edi,%eax,1)
 241         addl     $1,%eax
 242         subl     $1,%ecx
 243         jnz      6b
 244 7:      popl     %edi
 245         popl     %esi
 246         ret
 247 acb_CopyLeft:
 248         std
 249         leal     -4(%edi,%ecx),%edi   / to + count - 4
 250         movl     %eax,%esi            / from + count - 1
 251         movl     %ecx,%eax
 252         subl     $3,%esi              / from + count - 4
 253         cmpl     $3,%ecx
 254         jbe      5f
 255 1:      shrl     $2,%ecx
 256         jz       4f
 257         cmpl     $32,%ecx
 258         jbe      2f                   / <= 32 dwords
 259         rep;     smovl
 260         jmp      4f
 261         .=.+8
 262 2:      subl     %esi,%edi
 263         .align   16
 264 3:      movl     (%esi),%edx
 265         movl     %edx,(%edi,%esi,1)
 266         subl     $4,%esi
 267         subl     $1,%ecx
 268         jnz      3b
 269         addl     %esi,%edi
 270 4:      movl     %eax,%ecx
 271 5:      andl     $3,%ecx
 272         jz       7f
 273         subl     %esi,%edi
 274         addl     $3,%esi
 275 6:      movb     (%esi),%dl
 276         movb     %dl,(%edi,%esi,1)
 277         subl     $1,%esi
 278         subl     $1,%ecx
 279         jnz      6b
 280 7:      cld
 281         popl     %edi
 282         popl     %esi
 283         ret
 284 
 285         / Support for void Copy::conjoint_jshorts_atomic(void* from,
 286         /                                                void* to,
 287         /                                                size_t count)
 288         .align   16
 289 _Copy_conjoint_jshorts_atomic:
 290         pushl    %esi
 291         movl     4+12(%esp),%ecx      / count
 292         pushl    %edi
 293         movl     8+ 4(%esp),%esi      / from
 294         movl     8+ 8(%esp),%edi      / to
 295         cmpl     %esi,%edi
 296         leal     -2(%esi,%ecx,2),%eax / from + count*2 - 2
 297         jbe      cs_CopyRight
 298         cmpl     %eax,%edi
 299         jbe      cs_CopyLeft 
 300         / copy from low to high
 301 cs_CopyRight:
 302         / align source address at dword address boundary
 303         movl     %esi,%eax            / original from
 304         andl     $3,%eax              / either 0 or 2
 305         jz       1f                   / no prefix
 306         / copy prefix
 307         subl     $1,%ecx
 308         jl       5f                   / zero count
 309         movw     (%esi),%dx
 310         movw     %dx,(%edi)
 311         addl     %eax,%esi            / %eax == 2
 312         addl     %eax,%edi
 313 1:      movl     %ecx,%eax            / word count less prefix
 314         sarl     %ecx                 / dword count
 315         jz       4f                   / no dwords to move
 316         cmpl     $32,%ecx
 317         jbe      2f                   / <= 32 dwords
 318         / copy aligned dwords
 319         rep;     smovl
 320         jmp      4f 
 321         / copy aligned dwords
 322 2:      subl     %esi,%edi
 323         .align   16
 324 3:      movl     (%esi),%edx
 325         movl     %edx,(%edi,%esi,1)
 326         addl     $4,%esi
 327         subl     $1,%ecx
 328         jnz      3b
 329         addl     %esi,%edi
 330 4:      andl     $1,%eax              / suffix count
 331         jz       5f                   / no suffix
 332         / copy suffix
 333         movw     (%esi),%dx
 334         movw     %dx,(%edi)
 335 5:      popl     %edi
 336         popl     %esi
 337         ret
 338         / copy from high to low
 339 cs_CopyLeft:
 340         std
 341         leal     -4(%edi,%ecx,2),%edi / to + count*2 - 4
 342         movl     %eax,%esi            / from + count*2 - 2
 343         movl     %ecx,%eax
 344         subl     $2,%esi              / from + count*2 - 4
 345 1:      sarl     %ecx                 / dword count
 346         jz       4f                   / no dwords to move
 347         cmpl     $32,%ecx
 348         ja       3f                   / > 32 dwords
 349         subl     %esi,%edi
 350         .align   16
 351 2:      movl     (%esi),%edx
 352         movl     %edx,(%edi,%esi,1)
 353         subl     $4,%esi
 354         subl     $1,%ecx
 355         jnz      2b
 356         addl     %esi,%edi
 357         jmp      4f
 358 3:      rep;     smovl
 359 4:      andl     $1,%eax              / suffix count
 360         jz       5f                   / no suffix
 361         / copy suffix
 362         addl     $2,%esi
 363         addl     $2,%edi
 364         movw     (%esi),%dx
 365         movw     %dx,(%edi)
 366 5:      cld
 367         popl     %edi
 368         popl     %esi
 369         ret
 370 
 371         / Support for void Copy::arrayof_conjoint_jshorts(void* from,
 372         /                                                 void* to,
 373         /                                                 size_t count)
 374         .align   16
 375 _Copy_arrayof_conjoint_jshorts:
 376         pushl    %esi
 377         movl     4+12(%esp),%ecx      / count
 378         pushl    %edi
 379         movl     8+ 4(%esp),%esi      / from
 380         movl     8+ 8(%esp),%edi      / to
 381         cmpl     %esi,%edi
 382         leal     -2(%esi,%ecx,2),%eax / from + count*2 - 2
 383         jbe      acs_CopyRight
 384         cmpl     %eax,%edi
 385         jbe      acs_CopyLeft 
 386 acs_CopyRight:
 387         movl     %ecx,%eax            / word count
 388         sarl     %ecx                 / dword count
 389         jz       4f                   / no dwords to move
 390         cmpl     $32,%ecx
 391         jbe      2f                   / <= 32 dwords
 392         / copy aligned dwords
 393         rep;     smovl   
 394         jmp      4f 
 395         / copy aligned dwords
 396         .=.+5
 397 2:      subl     %esi,%edi 
 398         .align   16     
 399 3:      movl     (%esi),%edx
 400         movl     %edx,(%edi,%esi,1)
 401         addl     $4,%esi
 402         subl     $1,%ecx
 403         jnz      3b
 404         addl     %esi,%edi
 405 4:      andl     $1,%eax              / suffix count
 406         jz       5f                   / no suffix
 407         / copy suffix
 408         movw     (%esi),%dx
 409         movw     %dx,(%edi)
 410 5:      popl     %edi
 411         popl     %esi
 412         ret
 413 acs_CopyLeft:
 414         std
 415         leal     -4(%edi,%ecx,2),%edi / to + count*2 - 4
 416         movl     %eax,%esi            / from + count*2 - 2
 417         movl     %ecx,%eax
 418         subl     $2,%esi              / from + count*2 - 4
 419         sarl     %ecx                 / dword count
 420         jz       4f                   / no dwords to move
 421         cmpl     $32,%ecx
 422         ja       3f                   / > 32 dwords
 423         subl     %esi,%edi
 424         .align   16
 425 2:      movl     (%esi),%edx
 426         movl     %edx,(%edi,%esi,1)
 427         subl     $4,%esi
 428         subl     $1,%ecx
 429         jnz      2b
 430         addl     %esi,%edi
 431         jmp      4f
 432 3:      rep;     smovl
 433 4:      andl     $1,%eax              / suffix count
 434         jz       5f                   / no suffix
 435         / copy suffix
 436         addl     $2,%esi
 437         addl     $2,%edi
 438         movw     (%esi),%dx
 439         movw     %dx,(%edi)
 440 5:      cld
 441         popl     %edi
 442         popl     %esi
 443         ret
 444 
 445         / Support for void Copy::conjoint_jints_atomic(void* from,
 446         /                                              void* to,
 447         /                                              size_t count)
 448         / Equivalent to
 449         /   arrayof_conjoint_jints
 450         .align   16
 451 _Copy_conjoint_jints_atomic:
 452 _Copy_arrayof_conjoint_jints:
 453         pushl    %esi
 454         movl     4+12(%esp),%ecx      / count
 455         pushl    %edi
 456         movl     8+ 4(%esp),%esi      / from
 457         movl     8+ 8(%esp),%edi      / to
 458         cmpl     %esi,%edi
 459         leal     -4(%esi,%ecx,4),%eax / from + count*4 - 4
 460         jbe      ci_CopyRight
 461         cmpl     %eax,%edi
 462         jbe      ci_CopyLeft 
 463 ci_CopyRight:
 464         cmpl     $32,%ecx
 465         jbe      2f                   / <= 32 dwords
 466         rep;     smovl 
 467         popl     %edi
 468         popl     %esi
 469         ret
 470         .=.+10
 471 2:      subl     %esi,%edi
 472         jmp      4f
 473         .align   16
 474 3:      movl     (%esi),%edx
 475         movl     %edx,(%edi,%esi,1)
 476         addl     $4,%esi
 477 4:      subl     $1,%ecx
 478         jge      3b
 479         popl     %edi
 480         popl     %esi
 481         ret
 482 ci_CopyLeft:
 483         std
 484         leal     -4(%edi,%ecx,4),%edi / to + count*4 - 4
 485         cmpl     $32,%ecx
 486         ja       4f                   / > 32 dwords
 487         subl     %eax,%edi            / eax == from + count*4 - 4
 488         jmp      3f
 489         .align   16
 490 2:      movl     (%eax),%edx
 491         movl     %edx,(%edi,%eax,1)
 492         subl     $4,%eax
 493 3:      subl     $1,%ecx
 494         jge      2b
 495         cld
 496         popl     %edi
 497         popl     %esi
 498         ret
 499 4:      movl     %eax,%esi            / from + count*4 - 4
 500         rep;     smovl
 501         cld
 502         popl     %edi
 503         popl     %esi
 504         ret
 505         
 506         / Support for void Copy::conjoint_jlongs_atomic(jlong* from,
 507         /                                               jlong* to,
 508         /                                               size_t count)
 509         /
 510         / 32-bit
 511         /
 512         / count treated as signed
 513         /
 514         / if (from > to) {
 515         /   while (--count >= 0) {
 516         /     *to++ = *from++;
 517         /   }
 518         / } else {
 519         /   while (--count >= 0) {
 520         /     to[count] = from[count];
 521         /   }
 522         / }
 523         .align   16
 524 _Copy_conjoint_jlongs_atomic:
 525         movl     4+8(%esp),%ecx       / count
 526         movl     4+0(%esp),%eax       / from
 527         movl     4+4(%esp),%edx       / to
 528         cmpl     %eax,%edx
 529         jae      cla_CopyLeft
 530 cla_CopyRight:
 531         subl     %eax,%edx
 532         jmp      2f
 533         .align   16
 534 1:      fildll   (%eax)
 535         fistpll  (%edx,%eax,1)
 536         addl     $8,%eax
 537 2:      subl     $1,%ecx
 538         jge      1b
 539         ret
 540         .align   16
 541 3:      fildll   (%eax,%ecx,8)
 542         fistpll  (%edx,%ecx,8)
 543 cla_CopyLeft:
 544         subl     $1,%ecx
 545         jge      3b
 546         ret
 547 
 548         / Support for void Copy::arrayof_conjoint_jshorts(void* from,
 549         /                                                 void* to,
 550         /                                                 size_t count)
 551        .align   16
 552 _mmx_Copy_arrayof_conjoint_jshorts:
 553         pushl    %esi
 554         movl     4+12(%esp),%ecx
 555         pushl    %edi
 556         movl     8+ 4(%esp),%esi
 557         movl     8+ 8(%esp),%edi
 558         cmpl     %esi,%edi
 559         leal     -2(%esi,%ecx,2),%eax
 560         jbe      mmx_acs_CopyRight
 561         cmpl     %eax,%edi
 562         jbe      mmx_acs_CopyLeft
 563 mmx_acs_CopyRight:
 564         movl     %ecx,%eax
 565         sarl     %ecx
 566         je       5f
 567         cmpl     $33,%ecx
 568         jae      3f
 569 1:      subl     %esi,%edi 
 570         .align   16
 571 2:      movl     (%esi),%edx
 572         movl     %edx,(%edi,%esi,1)
 573         addl     $4,%esi
 574         subl     $1,%ecx
 575         jnz      2b
 576         addl     %esi,%edi
 577         jmp      5f 
 578 3:      smovl / align to 8 bytes, we know we are 4 byte aligned to start
 579         subl     $1,%ecx
 580 4:      .align   16
 581         movq     0(%esi),%mm0
 582         addl     $64,%edi
 583         movq     8(%esi),%mm1
 584         subl     $16,%ecx
 585         movq     16(%esi),%mm2
 586         movq     %mm0,-64(%edi)
 587         movq     24(%esi),%mm0
 588         movq     %mm1,-56(%edi)
 589         movq     32(%esi),%mm1
 590         movq     %mm2,-48(%edi)
 591         movq     40(%esi),%mm2
 592         movq     %mm0,-40(%edi)
 593         movq     48(%esi),%mm0
 594         movq     %mm1,-32(%edi)
 595         movq     56(%esi),%mm1
 596         movq     %mm2,-24(%edi)
 597         movq     %mm0,-16(%edi)
 598         addl     $64,%esi
 599         movq     %mm1,-8(%edi)
 600         cmpl     $16,%ecx
 601         jge      4b
 602         emms
 603         testl    %ecx,%ecx
 604         ja       1b
 605 5:      andl     $1,%eax
 606         je       7f
 607 6:      movw     (%esi),%dx
 608         movw     %dx,(%edi)
 609 7:      popl     %edi
 610         popl     %esi
 611         ret
 612 mmx_acs_CopyLeft:
 613         std
 614         leal     -4(%edi,%ecx,2),%edi
 615         movl     %eax,%esi
 616         movl     %ecx,%eax
 617         subl     $2,%esi
 618         sarl     %ecx
 619         je       4f
 620         cmpl     $32,%ecx
 621         ja       3f
 622         subl     %esi,%edi
 623         .align   16
 624 2:      movl     (%esi),%edx
 625         movl     %edx,(%edi,%esi,1)
 626         subl     $4,%esi
 627         subl     $1,%ecx
 628         jnz      2b
 629         addl     %esi,%edi
 630         jmp      4f
 631 3:      rep;     smovl
 632 4:      andl     $1,%eax
 633         je       6f
 634         addl     $2,%esi
 635         addl     $2,%edi
 636 5:      movw     (%esi),%dx
 637         movw     %dx,(%edi)
 638 6:      cld
 639         popl     %edi
 640         popl     %esi
 641         ret
 642 
 643 
 644         / Support for jlong Atomic::cmpxchg(jlong exchange_value,
 645         /                                   volatile jlong* dest,
 646         /                                   jlong compare_value,
 647         /                                   bool is_MP)
 648         / Used only for Solaris/gcc builds
 649         .align 16
 650 _Atomic_cmpxchg_long_gcc:
 651                                    /  8(%esp) : return PC
 652         pushl    %ebx              /  4(%esp) : old %ebx
 653         pushl    %edi              /  0(%esp) : old %edi
 654         movl     12(%esp), %ebx    / 12(%esp) : exchange_value (low)
 655         movl     16(%esp), %ecx    / 16(%esp) : exchange_value (high)
 656         movl     24(%esp), %eax    / 24(%esp) : compare_value (low)
 657         movl     28(%esp), %edx    / 28(%esp) : compare_value (high)
 658         movl     20(%esp), %edi    / 20(%esp) : dest
 659         cmpl     $0, 32(%esp)      / 32(%esp) : is_MP
 660         je       1f
 661         lock
 662 1:      cmpxchg8b (%edi)
 663         popl     %edi
 664         popl     %ebx
 665         ret