/* * Copyright (c) 2016, Linaro Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ .global _Copy_conjoint_words .global _Copy_disjoint_words s .req x0 d .req x1 count .req x2 t0 .req x3 t1 .req x4 t2 .req x5 t3 .req x6 t4 .req x7 t5 .req x8 t6 .req x9 t7 .req x10 .align 6 _Copy_disjoint_words: // Ensure 2 word aligned tbz s, #3, fwd_copy_aligned ldr t0, [s], #8 str t0, [d], #8 sub count, count, #1 fwd_copy_aligned: ldp t0, t1, [s, #0] ldp t2, t3, [s, #16] ldp t4, t5, [s, #32] ldp t6, t7, [s, #48]! // Source now biased by -16 tbnz d, #3, unal_fwd_copy sub d, d, #16 // and bias dest subs count, count, #16 blo fwd_copy_drain fwd_copy_again: prfm pldl1keep, [s, #256] stp t0, t1, [d, #16] ldp t0, t1, [s, #16] stp t2, t3, [d, #32] ldp t2, t3, [s, #32] stp t4, t5, [d, #48] ldp t4, t5, [s, #48] stp t6, t7, [d, #64]! ldp t6, t7, [s, #64]! subs count, count, #8 bhs fwd_copy_again fwd_copy_drain: stp t0, t1, [d, #16] stp t2, t3, [d, #32] stp t4, t5, [d, #48] stp t6, t7, [d, #64]! // count is now -8..-1 for 0..7 words to copy adr t0, 0f add t0, t0, count, lsl #5 br t0 .align 5 ret // -8 == 0 words .align 5 ldr t0, [s, #16] // -7 == 1 word str t0, [d, #16] ret .align 5 ldp t0, t1, [s, #16] // -6 = 2 words stp t0, t1, [d, #16] ret .align 5 ldp t0, t1, [s, #16] // -5 = 3 words ldr t2, [s, #32] stp t0, t1, [d, #16] str t2, [d, #32] ret .align 5 ldp t0, t1, [s, #16] // -4 = 4 words ldp t2, t3, [s, #32] stp t0, t1, [d, #16] stp t2, t3, [d, #32] ret .align 5 ldp t0, t1, [s, #16] // -3 = 5 words ldp t2, t3, [s, #32] ldr t4, [s, #48] stp t0, t1, [d, #16] stp t2, t3, [d, #32] str t4, [d, #48] ret .align 5 ldp t0, t1, [s, #16] // -2 = 6 words ldp t2, t3, [s, #32] ldp t4, t5, [s, #48] stp t0, t1, [d, #16] stp t2, t3, [d, #32] stp t4, t5, [d, #48] ret .align 5 ldp t0, t1, [s, #16] // -1 = 7 words ldp t2, t3, [s, #32] ldp t4, t5, [s, #48] ldr t6, [s, #64] stp t0, t1, [d, #16] stp t2, t3, [d, #32] stp t4, t5, [d, #48] str t6, [d, #64] // Is always aligned here, code for 7 words is one instruction // too large so it just falls through. .align 5 0: ret unal_fwd_copy: // Bias dest so we only pre index on the last copy sub d, d, #8 subs count, count, #16 blo unal_fwd_copy_drain unal_fwd_copy_again: prfm pldl1keep, [s, #256] str t0, [d, #8] stp t1, t2, [d, #16] ldp t0, t1, [s, #16] stp t3, t4, [d, #32] ldp t2, t3, [s, #32] stp t5, t6, [d, #48] ldp t4, t5, [s, #48] str t7, [d, #64]! ldp t6, t7, [s, #64]! subs count, count, #8 bhs unal_fwd_copy_again unal_fwd_copy_drain: str t0, [d, #8] stp t1, t2, [d, #16] stp t3, t4, [d, #32] stp t5, t6, [d, #48] str t7, [d, #64]! // count is now -8..-1 for 0..7 words to copy adr t0, 0f add t0, t0, count, lsl #5 br t0 .align 5 ret // -8 == 0 words .align 5 ldr t0, [s, #16] // -7 == 1 word str t0, [d, #8] ret .align 5 ldp t0, t1, [s, #16] // -6 = 2 words str t0, [d, #8] str t1, [d, #16] ret .align 5 ldp t0, t1, [s, #16] // -5 = 3 words ldr t2, [s, #32] str t0, [d, #8] stp t1, t2, [d, #16] ret .align 5 ldp t0, t1, [s, #16] // -4 = 4 words ldp t2, t3, [s, #32] str t0, [d, #8] stp t1, t2, [d, #16] str t3, [d, #32] ret .align 5 ldp t0, t1, [s, #16] // -3 = 5 words ldp t2, t3, [s, #32] ldr t4, [s, #48] str t0, [d, #8] stp t1, t2, [d, #16] stp t3, t4, [d, #32] ret .align 5 ldp t0, t1, [s, #16] // -2 = 6 words ldp t2, t3, [s, #32] ldp t4, t5, [s, #48] str t0, [d, #8] stp t1, t2, [d, #16] stp t3, t4, [d, #32] str t5, [d, #48] ret .align 5 ldp t0, t1, [s, #16] // -1 = 7 words ldp t2, t3, [s, #32] ldp t4, t5, [s, #48] ldr t6, [s, #64] str t0, [d, #8] stp t1, t2, [d, #16] stp t3, t4, [d, #32] stp t5, t6, [d, #48] // Is always aligned here, code for 7 words is one instruction // too large so it just falls through. .align 5 0: ret .align 6 _Copy_conjoint_words: sub t0, d, s cmp t0, count, lsl #3 bhs _Copy_disjoint_words add s, s, count, lsl #3 add d, d, count, lsl #3 // Ensure 2 word aligned tbz s, #3, bwd_copy_aligned ldr t0, [s, #-8]! str t0, [d, #-8]! sub count, count, #1 bwd_copy_aligned: ldp t0, t1, [s, #-16] ldp t2, t3, [s, #-32] ldp t4, t5, [s, #-48] ldp t6, t7, [s, #-64]! tbnz d, #3, unal_bwd_copy subs count, count, #16 blo bwd_copy_drain bwd_copy_again: prfum pldl1keep, [s, #-256] stp t0, t1, [d, #-16] ldp t0, t1, [s, #-16] stp t2, t3, [d, #-32] ldp t2, t3, [s, #-32] stp t4, t5, [d, #-48] ldp t4, t5, [s, #-48] stp t6, t7, [d, #-64]! ldp t6, t7, [s, #-64]! subs count, count, #8 bhs bwd_copy_again bwd_copy_drain: stp t0, t1, [d, #-16] stp t2, t3, [d, #-32] stp t4, t5, [d, #-48] stp t6, t7, [d, #-64]! // count is now -8..-1 for 0..7 words to copy adr t0, 0f add t0, t0, count, lsl #5 br t0 .align 5 ret // -8 == 0 words .align 5 ldr t0, [s, #-8] // -7 == 1 word str t0, [d, #-8] ret .align 5 ldp t0, t1, [s, #-16] // -6 = 2 words stp t0, t1, [d, #-16] ret .align 5 ldp t0, t1, [s, #-16] // -5 = 3 words ldr t2, [s, #-24] stp t0, t1, [d, #-16] str t2, [d, #-24] ret .align 5 ldp t0, t1, [s, #-16] // -4 = 4 words ldp t2, t3, [s, #-32] stp t0, t1, [d, #-16] stp t2, t3, [d, #-32] ret .align 5 ldp t0, t1, [s, #-16] // -3 = 5 words ldp t2, t3, [s, #-32] ldr t4, [s, #-40] stp t0, t1, [d, #-16] stp t2, t3, [d, #-32] str t4, [d, #-40] ret .align 5 ldp t0, t1, [s, #-16] // -2 = 6 words ldp t2, t3, [s, #-32] ldp t4, t5, [s, #-48] stp t0, t1, [d, #-16] stp t2, t3, [d, #-32] stp t4, t5, [d, #-48] ret .align 5 ldp t0, t1, [s, #-16] // -1 = 7 words ldp t2, t3, [s, #-32] ldp t4, t5, [s, #-48] ldr t6, [s, #-56] stp t0, t1, [d, #-16] stp t2, t3, [d, #-32] stp t4, t5, [d, #-48] str t6, [d, #-56] // Is always aligned here, code for 7 words is one instruction // too large so it just falls through. .align 5 0: ret unal_bwd_copy: subs count, count, #16 blo unal_bwd_copy_drain unal_bwd_copy_again: prfm pldl1keep, [s, #-256] str t1, [d, #-8] stp t3, t0, [d, #-24] ldp t0, t1, [s, #-16] stp t5, t2, [d, #-40] ldp t2, t3, [s, #-32] stp t7, t4, [d, #-56] ldp t4, t5, [s, #-48] str t6, [d, #-64]! ldp t6, t7, [s, #-64]! subs count, count, #8 bhs unal_bwd_copy_again unal_bwd_copy_drain: str t1, [d, #-8] stp t3, t0, [d, #-24] stp t5, t2, [d, #-40] stp t7, t4, [d, #-56] str t6, [d, #-64]! // count is now -8..-1 for 0..7 words to copy adr t0, 0f add t0, t0, count, lsl #5 br t0 .align 5 ret // -8 == 0 words .align 5 ldr t0, [s, #-8] // -7 == 1 word str t0, [d, #-8] ret .align 5 ldp t0, t1, [s, #-16] // -6 = 2 words str t1, [d, #-8] str t0, [d, #-16] ret .align 5 ldp t0, t1, [s, #-16] // -5 = 3 words ldr t2, [s, #-24] str t1, [d, #-8] stp t2, t0, [d, #-24] ret .align 5 ldp t0, t1, [s, #-16] // -4 = 4 words ldp t2, t3, [s, #-32] str t1, [d, #-8] stp t3, t0, [d, #-24] str t2, [d, #-32] ret .align 5 ldp t0, t1, [s, #-16] // -3 = 5 words ldp t2, t3, [s, #-32] ldr t4, [s, #-40] str t1, [d, #-8] stp t3, t0, [d, #-24] stp t4, t2, [d, #-40] ret .align 5 ldp t0, t1, [s, #-16] // -2 = 6 words ldp t2, t3, [s, #-32] ldp t4, t5, [s, #-48] str t1, [d, #-8] stp t3, t0, [d, #-24] stp t5, t2, [d, #-40] str t4, [d, #-48] ret .align 5 ldp t0, t1, [s, #-16] // -1 = 7 words ldp t2, t3, [s, #-32] ldp t4, t5, [s, #-48] ldr t6, [s, #-56] str t1, [d, #-8] stp t3, t0, [d, #-24] stp t5, t2, [d, #-40] stp t6, t4, [d, #-56] // Is always aligned here, code for 7 words is one instruction // too large so it just falls through. .align 5 0: ret