/* * Copyright (c) 2007 Tim Kelly/Dialectronics * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to permit * persons to whom the Software is furnished to do so, subject to the * following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* this approach does not consider alignments */ /* void ppc_memcpy_4(void* src, void* dst, int size); */ #include #ifdef _KERNEL #include #endif .text .align 4 #if defined(MEMCOPY) || defined(MEMMOVE) #ifdef MEMCOPY ENTRY(memcpy) #else ENTRY(memmove) #endif #define SRCREG %r4 #define DSTREG %r3 #else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ ENTRY(bcopy) #define SRCREG %r3 #define DSTREG %r4 #endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ /* bcopy: */ /* r3 contains source address */ /* r4 contains destination address */ /* r5 contains number of bytes */ /* r6, r7 volatile */ /* r0 contains zero */ xor %r0, %r0, %r0 /* make sure length > 0 */ cmp cr0, 0x0, %r5, %r0 /* make sure src != dst */ cmp cr1, 0x0, SRCREG, DSTREG /* bail if length less than */ /* or equal to zero */ ble- cr0, done /* bail if src == dst */ beq- cr1, done debug_33: /* if copying backwards while reading */ /* forward, overlap does not matter */ /* value is read already */ /* conversely, if copying forward use */ /* backward reading method to ignore */ /* overlap */ /* src can be less than or greater than dst */ /* if src < dst and src+nbytes > dst, overlap */ /* if src > dst and dst+nbytes > src, overlap */ /* other two conditions imply no overlap */ /* r3 contains src */ /* r4 contains dst */ /* r5 contains nbytes */ /* so.... */ add %r6, %r5, SRCREG add %r7, %r5, DSTREG /* r6 contains src+nbytes */ /* r7 contains dst+nbytes */ /* compare dst to src */ cmp cr0, 0, DSTREG, SRCREG /* compare dst+nbytes to src */ cmp cr1, 0, %r7, SRCREG /* dst > src? (copy from end) */ bgt+ cr0, +0x08 /* src > dst, is dst+nbytes > src? */ /* we have to do this because we may run into */ /* previously written region if copy from end */ bgt- cr1, Copy4FromStart /* the default is to copy backwards */ Copy4FromEnd: /* prolog: copy up to three bytes */ get1_end: /* get mod(1) bytes */ andi. %r7, %r5, 0x01 cmp cr0, 0x0, %r7, %r0 beq cr0, get2_end sub %r5, %r5, %r7 /* load and store the byte */ lbzx %r6, %r5, SRCREG stbx %r6, %r5, DSTREG get2_end: /* get mod(2) bytes */ andi. %r7, %r5, 0x02 cmp cr0, 0x0, %r7, %r0 beq cr0, get4_end sub %r5, %r5, %r7 /* load and store the halfword */ lhzx %r6, %r5, SRCREG sthx %r6, %r5, DSTREG get4_end: /* make sure we haven't copied all of the bytes */ cmp cr0, 0x0, %r5, %r0 beq- cr0, done /* loop while load/store 4 bytes at a time */ subi %r5, %r5, 0x04 cmp cr0, 0x0, %r5, %r0 /* load/store 4 bytes */ lwzx %r6, %r5, SRCREG stwx %r6, %r5, DSTREG bgt+ cr0, -16 b done /* this approach is to copy forwards by */ /* incrementing the addresses of src and dst */ Copy4FromStart: /* prolog: copy up to three bytes */ get1_st: /* get mod(1) bytes */ andi. %r7, %r5, 0x01 cmp cr0, 0x0, %r7, %r0 beq cr0, get2_st sub %r5, %r5, %r7 /* load and store the byte */ lbz %r6, 0(SRCREG) stb %r6, 0(DSTREG) /* increase the addresses if non-zero mod */ addi SRCREG, SRCREG, 0x01 addi DSTREG, DSTREG, 0x01 get2_st: /* get mod(2) bytes */ andi. %r7, %r5, 0x02 cmp cr0, 0x0, %r7, %r0 beq cr0, get4_st sub %r5, %r5, %r7 /* load and store the halfword */ lhz %r6, 0(SRCREG) sth %r6, 0(DSTREG) /* increase the addresses if non-zero mod */ addi SRCREG, SRCREG, 0x02 addi DSTREG, DSTREG, 0x02 /* now loop on four byte read/writes */ /* get mod(4) bytes */ get4_st: /* andi. %r7, %r5, 0x04 */ cmp cr0, 0x0, %r5, %r0 beq cr0, done subi %r5, %r5, 0x04 /* load and store the word */ lwz %r6, 0(SRCREG) stw %r6, 0(DSTREG) /* increase the addresses if non-zero mod */ addi SRCREG, SRCREG, 0x04 addi DSTREG, DSTREG, 0x04 b get4_st done: /* eieio */ blr