4 #
5 # This code is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License version 2 only, as
7 # published by the Free Software Foundation.
8 #
9 # This code is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # version 2 for more details (a copy is included in the LICENSE file that
13 # accompanied this code).
14 #
15 # You should have received a copy of the GNU General Public License version
16 # 2 along with this work; if not, write to the Free Software Foundation,
17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 #
19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 # or visit www.oracle.com if you need additional information or have any
21 # questions.
22 #
23
24
25 # NOTE WELL! The _Copy functions are called directly
26 # from server-compiler-generated code via CallLeafNoFP,
27 # which means that they *must* either not use floating
28 # point or use it in the same manner as does the server
29 # compiler.
30
31 .globl _Copy_arrayof_conjoint_bytes
32 .globl _Copy_arrayof_conjoint_jshorts
33 .globl _Copy_conjoint_jshorts_atomic
34 .globl _Copy_arrayof_conjoint_jints
35 .globl _Copy_conjoint_jints_atomic
36 .globl _Copy_arrayof_conjoint_jlongs
37 .globl _Copy_conjoint_jlongs_atomic
38
39 .text
40
41 .globl SafeFetch32, Fetch32PFI, Fetch32Resume
42 .align 16
43 .type SafeFetch32,@function
44 // Prototype: int SafeFetch32 (int * Adr, int ErrValue)
45 SafeFetch32:
46 movl %esi, %eax
47 Fetch32PFI:
48 movl (%rdi), %eax
49 Fetch32Resume:
50 ret
51
52 .globl SafeFetchN, FetchNPFI, FetchNResume
53 .align 16
54 .type SafeFetchN,@function
55 // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue)
56 SafeFetchN:
57 movq %rsi, %rax
58 FetchNPFI:
59 movq (%rdi), %rax
60 FetchNResume:
61 ret
62
63 .globl SpinPause
64 .align 16
65 .type SpinPause,@function
66 SpinPause:
67 rep
68 nop
69 movq $1, %rax
70 ret
71
72 # Support for void Copy::arrayof_conjoint_bytes(void* from,
73 # void* to,
74 # size_t count)
75 # rdi - from
76 # rsi - to
77 # rdx - count, treated as ssize_t
78 #
79 .p2align 4,,15
80 .type _Copy_arrayof_conjoint_bytes,@function
81 _Copy_arrayof_conjoint_bytes:
82 movq %rdx,%r8 # byte count
83 shrq $3,%rdx # qword count
84 cmpq %rdi,%rsi
85 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1
86 jbe acb_CopyRight
87 cmpq %rax,%rsi
88 jbe acb_CopyLeft
89 acb_CopyRight:
90 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
91 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
92 negq %rdx
93 jmp 7f
94 .p2align 4,,15
95 1: movq 8(%rax,%rdx,8),%rsi
96 movq %rsi,8(%rcx,%rdx,8)
97 addq $1,%rdx
98 jnz 1b
99 2: testq $4,%r8 # check for trailing dword
100 jz 3f
101 movl 8(%rax),%esi # copy trailing dword
162 addq $4,%rdx
163 jg 3b
164 ret
165
166 # Support for void Copy::arrayof_conjoint_jshorts(void* from,
167 # void* to,
168 # size_t count)
169 # Equivalent to
170 # conjoint_jshorts_atomic
171 #
172 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
173 # let the hardware handle it. The tow or four words within dwords
174 # or qwords that span cache line boundaries will still be loaded
175 # and stored atomically.
176 #
177 # rdi - from
178 # rsi - to
179 # rdx - count, treated as ssize_t
180 #
181 .p2align 4,,15
182 .type _Copy_arrayof_conjoint_jshorts,@function
183 .type _Copy_conjoint_jshorts_atomic,@function
184 _Copy_arrayof_conjoint_jshorts:
185 _Copy_conjoint_jshorts_atomic:
186 movq %rdx,%r8 # word count
187 shrq $2,%rdx # qword count
188 cmpq %rdi,%rsi
189 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2
190 jbe acs_CopyRight
191 cmpq %rax,%rsi
192 jbe acs_CopyLeft
193 acs_CopyRight:
194 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
195 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
196 negq %rdx
197 jmp 6f
198 1: movq 8(%rax,%rdx,8),%rsi
199 movq %rsi,8(%rcx,%rdx,8)
200 addq $1,%rdx
201 jnz 1b
202 2: testq $2,%r8 # check for trailing dword
203 jz 3f
204 movl 8(%rax),%esi # copy trailing dword
205 movl %esi,8(%rcx)
252 jge 3b
253 addq $4,%rdx
254 jg 2b
255 ret
256
257 # Support for void Copy::arrayof_conjoint_jints(jint* from,
258 # jint* to,
259 # size_t count)
260 # Equivalent to
261 # conjoint_jints_atomic
262 #
263 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
264 # the hardware handle it. The two dwords within qwords that span
265 # cache line boundaries will still be loaded and stored atomically.
266 #
267 # rdi - from
268 # rsi - to
269 # rdx - count, treated as ssize_t
270 #
271 .p2align 4,,15
272 .type _Copy_arrayof_conjoint_jints,@function
273 .type _Copy_conjoint_jints_atomic,@function
274 _Copy_arrayof_conjoint_jints:
275 _Copy_conjoint_jints_atomic:
276 movq %rdx,%r8 # dword count
277 shrq %rdx # qword count
278 cmpq %rdi,%rsi
279 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4
280 jbe aci_CopyRight
281 cmpq %rax,%rsi
282 jbe aci_CopyLeft
283 aci_CopyRight:
284 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
285 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
286 negq %rdx
287 jmp 5f
288 .p2align 4,,15
289 1: movq 8(%rax,%rdx,8),%rsi
290 movq %rsi,8(%rcx,%rdx,8)
291 addq $1,%rdx
292 jnz 1b
293 2: testq $1,%r8 # check for trailing dword
294 jz 3f
295 movl 8(%rax),%esi # copy trailing dword
331 movq %rcx,(%rsi,%rdx,8)
332 3: subq $4,%rdx
333 jge 2b
334 addq $4,%rdx
335 jg 1b
336 ret
337
338 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
339 # jlong* to,
340 # size_t count)
341 # Equivalent to
342 # conjoint_jlongs_atomic
343 # arrayof_conjoint_oops
344 # conjoint_oops_atomic
345 #
346 # rdi - from
347 # rsi - to
348 # rdx - count, treated as ssize_t
349 #
350 .p2align 4,,15
351 .type _Copy_arrayof_conjoint_jlongs,@function
352 .type _Copy_conjoint_jlongs_atomic,@function
353 _Copy_arrayof_conjoint_jlongs:
354 _Copy_conjoint_jlongs_atomic:
355 cmpq %rdi,%rsi
356 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8
357 jbe acl_CopyRight
358 cmpq %rax,%rsi
359 jbe acl_CopyLeft
360 acl_CopyRight:
361 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
362 negq %rdx
363 jmp 3f
364 1: movq 8(%rax,%rdx,8),%rsi
365 movq %rsi,8(%rcx,%rdx,8)
366 addq $1,%rdx
367 jnz 1b
368 ret
369 .p2align 4,,15
370 2: movq -24(%rax,%rdx,8),%rsi
371 movq %rsi,-24(%rcx,%rdx,8)
372 movq -16(%rax,%rdx,8),%rsi
373 movq %rsi,-16(%rcx,%rdx,8)
374 movq -8(%rax,%rdx,8),%rsi
|
4 #
5 # This code is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License version 2 only, as
7 # published by the Free Software Foundation.
8 #
9 # This code is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 # version 2 for more details (a copy is included in the LICENSE file that
13 # accompanied this code).
14 #
15 # You should have received a copy of the GNU General Public License version
16 # 2 along with this work; if not, write to the Free Software Foundation,
17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 #
19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 # or visit www.oracle.com if you need additional information or have any
21 # questions.
22 #
23
24 #ifdef __APPLE__
25 # Darwin uses _ prefixed global symbols
26 #define SYMBOL(s) _ ## s
27 #define ELF_TYPE(name, description)
28 #else
29 #define SYMBOL(s) s
30 #define ELF_TYPE(name, description) .type name,description
31 #endif
32
33 # NOTE WELL! The _Copy functions are called directly
34 # from server-compiler-generated code via CallLeafNoFP,
35 # which means that they *must* either not use floating
36 # point or use it in the same manner as does the server
37 # compiler.
38
39 .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
40 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
41 .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
42 .globl SYMBOL(_Copy_arrayof_conjoint_jints)
43 .globl SYMBOL(_Copy_conjoint_jints_atomic)
44 .globl SYMBOL(_Copy_arrayof_conjoint_jlongs)
45 .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
46
47 .text
48
49 .globl SYMBOL(SafeFetch32), SYMBOL(Fetch32PFI), SYMBOL(Fetch32Resume)
50 #ifdef __APPLE__
51 .align 4
52 #else
53 .align 16
54 #endif
55 ELF_TYPE(SafeFetch32,@function)
56 // Prototype: int SafeFetch32 (int * Adr, int ErrValue)
57 SYMBOL(SafeFetch32):
58 movl %esi, %eax
59 SYMBOL(Fetch32PFI):
60 movl (%rdi), %eax
61 SYMBOL(Fetch32Resume):
62 ret
63
64 .globl SYMBOL(SafeFetchN), SYMBOL(FetchNPFI), SYMBOL(FetchNResume)
65 #ifdef __APPLE__
66 .align 4
67 #else
68 .align 16
69 #endif
70 ELF_TYPE(SafeFetchN,@function)
71 // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue)
72 SYMBOL(SafeFetchN):
73 movq %rsi, %rax
74 SYMBOL(FetchNPFI):
75 movq (%rdi), %rax
76 SYMBOL(FetchNResume):
77 ret
78
79 .globl SYMBOL(SpinPause)
80 #ifdef __APPLE__
81 .align 4
82 #else
83 .align 16
84 #endif
85 ELF_TYPE(SpinPause,@function)
86 SYMBOL(SpinPause):
87 rep
88 nop
89 movq $1, %rax
90 ret
91
92 # Support for void Copy::arrayof_conjoint_bytes(void* from,
93 # void* to,
94 # size_t count)
95 # rdi - from
96 # rsi - to
97 # rdx - count, treated as ssize_t
98 #
99 .p2align 4,,15
100 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
101 SYMBOL(_Copy_arrayof_conjoint_bytes):
102 movq %rdx,%r8 # byte count
103 shrq $3,%rdx # qword count
104 cmpq %rdi,%rsi
105 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1
106 jbe acb_CopyRight
107 cmpq %rax,%rsi
108 jbe acb_CopyLeft
109 acb_CopyRight:
110 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
111 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
112 negq %rdx
113 jmp 7f
114 .p2align 4,,15
115 1: movq 8(%rax,%rdx,8),%rsi
116 movq %rsi,8(%rcx,%rdx,8)
117 addq $1,%rdx
118 jnz 1b
119 2: testq $4,%r8 # check for trailing dword
120 jz 3f
121 movl 8(%rax),%esi # copy trailing dword
182 addq $4,%rdx
183 jg 3b
184 ret
185
186 # Support for void Copy::arrayof_conjoint_jshorts(void* from,
187 # void* to,
188 # size_t count)
189 # Equivalent to
190 # conjoint_jshorts_atomic
191 #
192 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
193 # let the hardware handle it. The tow or four words within dwords
194 # or qwords that span cache line boundaries will still be loaded
195 # and stored atomically.
196 #
197 # rdi - from
198 # rsi - to
199 # rdx - count, treated as ssize_t
200 #
201 .p2align 4,,15
202 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
203 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
204 SYMBOL(_Copy_arrayof_conjoint_jshorts):
205 SYMBOL(_Copy_conjoint_jshorts_atomic):
206 movq %rdx,%r8 # word count
207 shrq $2,%rdx # qword count
208 cmpq %rdi,%rsi
209 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2
210 jbe acs_CopyRight
211 cmpq %rax,%rsi
212 jbe acs_CopyLeft
213 acs_CopyRight:
214 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
215 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
216 negq %rdx
217 jmp 6f
218 1: movq 8(%rax,%rdx,8),%rsi
219 movq %rsi,8(%rcx,%rdx,8)
220 addq $1,%rdx
221 jnz 1b
222 2: testq $2,%r8 # check for trailing dword
223 jz 3f
224 movl 8(%rax),%esi # copy trailing dword
225 movl %esi,8(%rcx)
272 jge 3b
273 addq $4,%rdx
274 jg 2b
275 ret
276
277 # Support for void Copy::arrayof_conjoint_jints(jint* from,
278 # jint* to,
279 # size_t count)
280 # Equivalent to
281 # conjoint_jints_atomic
282 #
283 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
284 # the hardware handle it. The two dwords within qwords that span
285 # cache line boundaries will still be loaded and stored atomically.
286 #
287 # rdi - from
288 # rsi - to
289 # rdx - count, treated as ssize_t
290 #
291 .p2align 4,,15
292 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
293 ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
294 SYMBOL(_Copy_arrayof_conjoint_jints):
295 SYMBOL(_Copy_conjoint_jints_atomic):
296 movq %rdx,%r8 # dword count
297 shrq %rdx # qword count
298 cmpq %rdi,%rsi
299 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4
300 jbe aci_CopyRight
301 cmpq %rax,%rsi
302 jbe aci_CopyLeft
303 aci_CopyRight:
304 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
305 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
306 negq %rdx
307 jmp 5f
308 .p2align 4,,15
309 1: movq 8(%rax,%rdx,8),%rsi
310 movq %rsi,8(%rcx,%rdx,8)
311 addq $1,%rdx
312 jnz 1b
313 2: testq $1,%r8 # check for trailing dword
314 jz 3f
315 movl 8(%rax),%esi # copy trailing dword
351 movq %rcx,(%rsi,%rdx,8)
352 3: subq $4,%rdx
353 jge 2b
354 addq $4,%rdx
355 jg 1b
356 ret
357
358 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
359 # jlong* to,
360 # size_t count)
361 # Equivalent to
362 # conjoint_jlongs_atomic
363 # arrayof_conjoint_oops
364 # conjoint_oops_atomic
365 #
366 # rdi - from
367 # rsi - to
368 # rdx - count, treated as ssize_t
369 #
370 .p2align 4,,15
371 ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function)
372 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
373 SYMBOL(_Copy_arrayof_conjoint_jlongs):
374 SYMBOL(_Copy_conjoint_jlongs_atomic):
375 cmpq %rdi,%rsi
376 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8
377 jbe acl_CopyRight
378 cmpq %rax,%rsi
379 jbe acl_CopyLeft
380 acl_CopyRight:
381 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
382 negq %rdx
383 jmp 3f
384 1: movq 8(%rax,%rdx,8),%rsi
385 movq %rsi,8(%rcx,%rdx,8)
386 addq $1,%rdx
387 jnz 1b
388 ret
389 .p2align 4,,15
390 2: movq -24(%rax,%rdx,8),%rsi
391 movq %rsi,-24(%rcx,%rdx,8)
392 movq -16(%rax,%rdx,8),%rsi
393 movq %rsi,-16(%rcx,%rdx,8)
394 movq -8(%rax,%rdx,8),%rsi
|