1 # 
   2 # Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
   3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 #
   5 # This code is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License version 2 only, as
   7 # published by the Free Software Foundation.
   8 #
   9 # This code is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 # version 2 for more details (a copy is included in the LICENSE file that
  13 # accompanied this code).
  14 #
  15 # You should have received a copy of the GNU General Public License version
  16 # 2 along with this work; if not, write to the Free Software Foundation,
  17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 #
  19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 # or visit www.oracle.com if you need additional information or have any
  21 # questions.
  22 # 
  23 
  24         
  25         # NOTE WELL!  The _Copy functions are called directly
  26         # from server-compiler-generated code via CallLeafNoFP,
  27         # which means that they *must* either not use floating
  28         # point or use it in the same manner as does the server
  29         # compiler.
  30         
  31         .globl _Copy_conjoint_bytes
  32         .type _Copy_conjoint_bytes, %function
  33         .globl _Copy_arrayof_conjoint_bytes
  34         .type _Copy_arrayof_conjoint_bytes, %function
  35         .globl _Copy_disjoint_words
  36         .type _Copy_disjoint_words, %function
  37         .globl _Copy_conjoint_words
  38         .type _Copy_conjoint_words, %function
  39         .globl _Copy_conjoint_jshorts_atomic
  40         .type _Copy_conjoint_jshorts_atomic, %function
  41         .globl _Copy_arrayof_conjoint_jshorts
  42         .type _Copy_arrayof_conjoint_jshorts, %function
  43         .globl _Copy_conjoint_jints_atomic
  44         .type _Copy_conjoint_jints_atomic, %function
  45         .globl _Copy_arrayof_conjoint_jints
  46         .type _Copy_arrayof_conjoint_jints, %function
  47         .globl _Copy_conjoint_jlongs_atomic
  48         .type _Copy_conjoint_jlongs_atomic, %function
  49         .globl _Copy_arrayof_conjoint_jlongs
  50         .type _Copy_arrayof_conjoint_jlongs, %function
  51 
  52         .text
  53         .globl  SpinPause
  54         .type SpinPause, %function
  55 SpinPause:
  56         bx      LR
  57 
  58         # Support for void Copy::conjoint_bytes(void* from,
  59         #                                       void* to,
  60         #                                       size_t count)
  61 _Copy_conjoint_bytes:
  62         swi     0x9f0001
  63 
  64         # Support for void Copy::arrayof_conjoint_bytes(void* from,
  65         #                                               void* to,
  66         #                                               size_t count)
  67 _Copy_arrayof_conjoint_bytes:
  68         swi     0x9f0001
  69 
  70 
  71         # Support for void Copy::disjoint_words(void* from,
  72         #                                       void* to,
  73         #                                       size_t count)
  74 _Copy_disjoint_words:
  75         stmdb    sp!, {r3 - r9, ip}
  76  
  77         cmp     r2, #0
  78         beq     disjoint_words_finish
  79 
  80         pld     [r1, #0]
  81         cmp     r2, #12
  82         ble disjoint_words_small
  83 
  84         .align 3
  85 dw_f2b_loop_32:
  86         subs    r2, #32
  87         blt     dw_f2b_loop_32_finish
  88         ldmia r1!, {r3 - r9, ip}
  89         nop
  90         pld     [r1]
  91         stmia r0!, {r3 - r9, ip}
  92         bgt     dw_f2b_loop_32
  93 dw_f2b_loop_32_finish:
  94         addlts  r2, #32
  95         beq     disjoint_words_finish
  96         cmp     r2, #16
  97         blt     disjoint_words_small
  98         ldmia r1!, {r3 - r6}
  99         subge   r2, r2, #16
 100         stmia r0!, {r3 - r6}
 101         beq     disjoint_words_finish
 102 disjoint_words_small:
 103         cmp     r2, #8
 104         ldr     r7, [r1], #4
 105         ldrge   r8, [r1], #4
 106         ldrgt   r9, [r1], #4
 107         str     r7, [r0], #4
 108         strge   r8, [r0], #4
 109         strgt   r9, [r0], #4
 110 
 111 disjoint_words_finish:
 112         ldmia   sp!, {r3 - r9, ip}
 113         bx      lr
 114 
 115 
 116         # Support for void Copy::conjoint_words(void* from,
 117         #                                       void* to,
 118         #                                       size_t count)
 119 _Copy_conjoint_words:
 120         stmdb    sp!, {r3 - r9, ip}
 121 
 122         cmp     r2, #0
 123         beq     conjoint_words_finish
 124 
 125         pld     [r1, #0]
 126         cmp     r2, #12
 127         ble conjoint_words_small
 128 
 129         subs    r3, r0, r1
 130         cmphi   r2, r3
 131         bhi     cw_b2f_copy
 132         .align 3
 133 cw_f2b_loop_32:
 134         subs    r2, #32
 135         blt     cw_f2b_loop_32_finish
 136         ldmia r1!, {r3 - r9, ip}
 137         nop
 138         pld     [r1]
 139         stmia r0!, {r3 - r9, ip}
 140         bgt     cw_f2b_loop_32
 141 cw_f2b_loop_32_finish:
 142         addlts  r2, #32
 143         beq     conjoint_words_finish
 144         cmp     r2, #16
 145         blt     conjoint_words_small
 146         ldmia r1!, {r3 - r6}
 147         subge   r2, r2, #16
 148         stmia r0!, {r3 - r6}
 149         beq     conjoint_words_finish
 150 conjoint_words_small:
 151         cmp     r2, #8
 152         ldr     r7, [r1], #4
 153         ldrge   r8, [r1], #4
 154         ldrgt   r9, [r1], #4
 155         str     r7, [r0], #4
 156         strge   r8, [r0], #4
 157         strgt   r9, [r0], #4
 158         b       conjoint_words_finish
 159 
 160         # Src and dest overlap, copy in a descending order
 161 cw_b2f_copy:
 162         add     r1, r2
 163         pld     [r1, #-32]
 164         add     r0, r2
 165         .align 3
 166 cw_b2f_loop_32:
 167         subs    r2, #32
 168         blt     cw_b2f_loop_32_finish
 169         ldmdb r1!, {r3-r9,ip}
 170         nop
 171         pld     [r1, #-32]
 172         stmdb r0!, {r3-r9,ip}
 173         bgt     cw_b2f_loop_32
 174 cw_b2f_loop_32_finish:
 175         addlts  r2, #32
 176         beq     conjoint_words_finish
 177         cmp     r2, #16
 178         blt     cw_b2f_copy_small
 179         ldmdb r1!, {r3 - r6}
 180         subge   r2, r2, #16
 181         stmdb r0!, {r3 - r6}
 182         beq     conjoint_words_finish
 183 cw_b2f_copy_small:
 184         cmp     r2, #8
 185         ldr     r7, [r1, #-4]!
 186         ldrge   r8, [r1, #-4]!
 187         ldrgt   r9, [r1, #-4]!
 188         str     r7, [r0, #-4]!
 189         strge   r8, [r0, #-4]!
 190         strgt   r9, [r0, #-4]!
 191 
 192 conjoint_words_finish:
 193         ldmia   sp!, {r3 - r9, ip}
 194         bx      lr
 195 
 196         # Support for void Copy::conjoint_jshorts_atomic(void* from,
 197         #                                                void* to,
 198         #                                                size_t count)
 199 _Copy_conjoint_jshorts_atomic:
 200         stmdb   sp!, {r3 - r9, ip}
 201 
 202         cmp     r2, #0
 203         beq     conjoint_shorts_finish  
 204 
 205         subs    r3, r0, r1
 206         cmphi   r2, r3
 207         bhi     cs_b2f_copy
 208 
 209         pld     [r1]
 210 
 211         ands    r3, r0, #3
 212         bne     cs_f2b_dest_u
 213         ands    r3, r1, #3
 214         bne     cs_f2b_src_u
 215 
 216         # Aligned source address
 217         .align 3
 218 cs_f2b_loop_32:
 219         subs    r2, #32
 220         blt     cs_f2b_loop_32_finish
 221         ldmia r1!, {r3 - r9, ip}
 222         nop
 223         pld     [r1]
 224         stmia r0!, {r3 - r9, ip}
 225         bgt     cs_f2b_loop_32
 226 cs_f2b_loop_32_finish:
 227         addlts  r2, #32
 228         beq     conjoint_shorts_finish
 229         movs    r6, r2, lsr #3
 230         .align 3
 231 cs_f2b_8_loop:
 232         beq     cs_f2b_4
 233         ldmia   r1!, {r4-r5}
 234         subs    r6, #1
 235         stmia   r0!, {r4-r5}
 236         bgt     cs_f2b_8_loop
 237 
 238 cs_f2b_4:
 239         ands    r2, #7
 240         beq     conjoint_shorts_finish
 241         cmp     r2, #4
 242         ldrh    r3, [r1], #2
 243         ldrgeh  r4, [r1], #2
 244         ldrgth  r5, [r1], #2
 245         strh    r3, [r0], #2
 246         strgeh  r4, [r0], #2
 247         strgth  r5, [r0], #2
 248         b       conjoint_shorts_finish
 249 
 250         # Destination not aligned
 251 cs_f2b_dest_u:
 252         ldrh    r3, [r1], #2
 253         subs    r2, #2
 254         strh    r3, [r0], #2
 255         beq     conjoint_shorts_finish
 256 
 257         # Check to see if source is not aligned ether
 258         ands    r3, r1, #3
 259         beq     cs_f2b_loop_32
 260 
 261 cs_f2b_src_u:
 262         cmp     r2, #16
 263         blt     cs_f2b_8_u
 264 
 265         # Load 2 first bytes to r7 and make src ptr word aligned
 266         bic     r1, #3
 267         ldr     r7, [r1], #4
 268 
 269         # Destination aligned, source not
 270         mov     r8, r2, lsr #4
 271         .align 3
 272 cs_f2b_16_u_loop:
 273         mov     r3, r7, lsr #16
 274         ldmia   r1!, {r4 - r7}
 275         orr     r3, r3, r4, lsl #16
 276         mov     r4, r4, lsr #16
 277         pld     [r1]
 278         orr     r4, r4, r5, lsl #16
 279         mov     r5, r5, lsr #16
 280         orr     r5, r5, r6, lsl #16
 281         mov     r6, r6, lsr #16
 282         orr     r6, r6, r7, lsl #16
 283         stmia   r0!, {r3 - r6}
 284         subs    r8, #1
 285         bgt     cs_f2b_16_u_loop
 286         ands    r2, #0xf
 287         beq     conjoint_shorts_finish
 288         sub     r1, #2
 289 
 290 cs_f2b_8_u:
 291         cmp     r2, #8
 292         blt     cs_f2b_4_u
 293         ldrh    r4, [r1], #2
 294         ldr     r5, [r1], #4
 295         ldrh    r6, [r1], #2
 296         orr     r4, r4, r5, lsl #16
 297         mov     r5, r5, lsr #16
 298         orr     r5, r5, r6, lsl #16
 299         subs    r2, #8
 300         stmia   r0!, {r4 - r5}
 301 cs_f2b_4_u:
 302         beq     conjoint_shorts_finish
 303         cmp     r2, #4
 304         ldrh    r3, [r1], #2
 305         ldrgeh  r4, [r1], #2
 306         ldrgth  r5, [r1], #2
 307         strh    r3, [r0], #2
 308         strgeh  r4, [r0], #2
 309         strgth  r5, [r0], #2
 310         b       conjoint_shorts_finish
 311 
 312         # Src and dest overlap, copy in a descending order
 313 cs_b2f_copy:
 314         add     r1, r2
 315         pld     [r1, #-32]
 316         add     r0, r2
 317 
 318         ands    r3, r0, #3
 319         bne     cs_b2f_dest_u
 320         ands    r3, r1, #3
 321         bne     cs_b2f_src_u
 322         .align 3
 323 cs_b2f_loop_32:
 324         subs    r2, #32
 325         blt     cs_b2f_loop_32_finish
 326         ldmdb r1!, {r3-r9,ip}
 327         nop
 328         pld     [r1, #-32]
 329         stmdb r0!, {r3-r9,ip}
 330         bgt     cs_b2f_loop_32
 331 cs_b2f_loop_32_finish:
 332         addlts  r2, #32
 333         beq     conjoint_shorts_finish
 334         cmp     r2, #24
 335         blt     cs_b2f_16
 336         ldmdb   r1!, {r3-r8}
 337         sub     r2, #24
 338         stmdb   r0!, {r3-r8}
 339         beq     conjoint_shorts_finish
 340 cs_b2f_16:
 341         cmp     r2, #16
 342         blt     cs_b2f_8
 343         ldmdb   r1!, {r3-r6}
 344         sub     r2, #16
 345         stmdb   r0!, {r3-r6}
 346         beq     conjoint_shorts_finish
 347 cs_b2f_8:
 348         cmp     r2, #8
 349         blt     cs_b2f_all_copy
 350         ldmdb   r1!, {r3-r4}
 351         sub     r2, #8
 352         stmdb   r0!, {r3-r4}
 353         beq     conjoint_shorts_finish
 354 
 355 cs_b2f_all_copy:
 356         cmp     r2, #4
 357         ldrh    r3, [r1, #-2]!
 358         ldrgeh  r4, [r1, #-2]!
 359         ldrgth  r5, [r1, #-2]!
 360         strh    r3, [r0, #-2]!
 361         strgeh  r4, [r0, #-2]!
 362         strgth  r5, [r0, #-2]!
 363         b       conjoint_shorts_finish
 364 
 365         # Destination not aligned
 366 cs_b2f_dest_u:
 367         ldrh    r3, [r1, #-2]!
 368         strh    r3, [r0, #-2]!
 369         sub     r2, #2
 370         # Check source alignment as well
 371         ands    r3, r1, #3
 372         beq     cs_b2f_loop_32
 373 
 374         # Source not aligned
 375 cs_b2f_src_u:
 376         bic     r1, #3
 377         .align 3
 378 cs_b2f_16_loop_u:
 379         subs    r2, #16
 380         blt     cs_b2f_16_loop_u_finished
 381         ldr     r7, [r1]
 382         mov     r3, r7
 383         ldmdb   r1!, {r4 - r7}
 384         mov     r4, r4, lsr #16
 385         orr     r4, r4, r5, lsl #16
 386         pld     [r1, #-32]
 387         mov     r5, r5, lsr #16
 388         orr     r5, r5, r6, lsl #16
 389         mov     r6, r6, lsr #16
 390         orr     r6, r6, r7, lsl #16
 391         mov     r7, r7, lsr #16
 392         orr     r7, r7, r3, lsl #16
 393         stmdb   r0!, {r4 - r7}
 394         bgt     cs_b2f_16_loop_u
 395         beq     conjoint_shorts_finish
 396 cs_b2f_16_loop_u_finished:
 397         addlts  r2, #16
 398         ldr     r3, [r1]
 399         cmp     r2, #10
 400         blt     cs_b2f_2_u_loop
 401         ldmdb   r1!, {r4 - r5}
 402         mov     r6, r4, lsr #16
 403         orr     r6, r6, r5, lsl #16
 404         mov     r7, r5, lsr #16
 405         orr     r7, r7, r3, lsl #16
 406         stmdb   r0!, {r6-r7}
 407         sub     r2, #8
 408         .align 3
 409 cs_b2f_2_u_loop:
 410         subs    r2, #2
 411         ldrh    r3, [r1], #-2
 412         strh    r3, [r0, #-2]!
 413         bgt     cs_b2f_2_u_loop
 414 
 415 conjoint_shorts_finish:
 416         ldmia   sp!, {r3 - r9, ip}
 417         bx      lr
 418 
 419 
 420         # Support for void Copy::arrayof_conjoint_jshorts(void* from,
 421         #                                                 void* to,
 422         #                                                 size_t count)
 423 _Copy_arrayof_conjoint_jshorts:
 424         swi     0x9f0001
 425 
 426         # Support for void Copy::conjoint_jints_atomic(void* from,
 427         #                                              void* to,
 428         #                                              size_t count)
 429 _Copy_conjoint_jints_atomic:
 430 _Copy_arrayof_conjoint_jints:
 431         swi     0x9f0001
 432         
 433         # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
 434         #                                               jlong* to,
 435         #                                               size_t count)
 436 _Copy_conjoint_jlongs_atomic:
 437 _Copy_arrayof_conjoint_jlongs:
 438         stmdb    sp!, {r3 - r9, ip}
 439 
 440         cmp     r2, #0
 441         beq     conjoint_longs_finish
 442 
 443         pld     [r1, #0]
 444         cmp     r2, #24
 445         ble conjoint_longs_small
 446 
 447         subs    r3, r0, r1
 448         cmphi   r2, r3
 449         bhi     cl_b2f_copy
 450         .align 3
 451 cl_f2b_loop_32:
 452         subs    r2, #32
 453         blt     cl_f2b_loop_32_finish
 454         ldmia r1!, {r3 - r9, ip}
 455         nop
 456         pld     [r1]
 457         stmia r0!, {r3 - r9, ip}
 458         bgt     cl_f2b_loop_32
 459 cl_f2b_loop_32_finish:
 460         addlts  r2, #32
 461         beq     conjoint_longs_finish
 462 conjoint_longs_small:
 463         cmp     r2, #16
 464         blt     cl_f2b_copy_8
 465         bgt     cl_f2b_copy_24
 466         ldmia   r1!, {r3 - r6}
 467         stmia   r0!, {r3 - r6}
 468         b       conjoint_longs_finish
 469 cl_f2b_copy_8:
 470         ldmia   r1!, {r3 - r4}
 471         stmia   r0!, {r3 - r4}
 472         b       conjoint_longs_finish
 473 cl_f2b_copy_24:
 474         ldmia   r1!, {r3 - r8}
 475         stmia   r0!, {r3 - r8}
 476         b       conjoint_longs_finish
 477 
 478         # Src and dest overlap, copy in a descending order
 479 cl_b2f_copy:
 480         add     r1, r2
 481         pld     [r1, #-32]
 482         add     r0, r2
 483         .align 3
 484 cl_b2f_loop_32:
 485         subs    r2, #32
 486         blt     cl_b2f_loop_32_finish
 487         ldmdb   r1!, {r3 - r9, ip}
 488         nop
 489         pld     [r1]
 490         stmdb   r0!, {r3 - r9, ip}
 491         bgt     cl_b2f_loop_32
 492 cl_b2f_loop_32_finish:
 493         addlts  r2, #32
 494         beq     conjoint_longs_finish
 495         cmp     r2, #16
 496         blt     cl_b2f_copy_8
 497         bgt     cl_b2f_copy_24
 498         ldmdb   r1!, {r3 - r6}
 499         stmdb   r0!, {r3 - r6}
 500         b       conjoint_longs_finish
 501 cl_b2f_copy_8:
 502         ldmdb   r1!, {r3 - r4}
 503         stmdb   r0!, {r3 - r4}
 504         b       conjoint_longs_finish
 505 cl_b2f_copy_24:
 506         ldmdb   r1!, {r3 - r8}
 507         stmdb   r0!, {r3 - r8}
 508 
 509 conjoint_longs_finish:
 510         ldmia   sp!, {r3 - r9, ip}
 511         bx      lr
 512 
 513