root/arch/powerpc/lib/memcpy_mcsafe_64.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * Copyright (C) IBM Corporation, 2011
   4  * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
   5  * Author - Balbir Singh <bsingharora@gmail.com>
   6  */
   7 #include <asm/ppc_asm.h>
   8 #include <asm/errno.h>
   9 #include <asm/export.h>
  10 
  11         .macro err1
  12 100:
  13         EX_TABLE(100b,.Ldo_err1)
  14         .endm
  15 
  16         .macro err2
  17 200:
  18         EX_TABLE(200b,.Ldo_err2)
  19         .endm
  20 
  21         .macro err3
  22 300:    EX_TABLE(300b,.Ldone)
  23         .endm
  24 
  25 .Ldo_err2:
  26         ld      r22,STK_REG(R22)(r1)
  27         ld      r21,STK_REG(R21)(r1)
  28         ld      r20,STK_REG(R20)(r1)
  29         ld      r19,STK_REG(R19)(r1)
  30         ld      r18,STK_REG(R18)(r1)
  31         ld      r17,STK_REG(R17)(r1)
  32         ld      r16,STK_REG(R16)(r1)
  33         ld      r15,STK_REG(R15)(r1)
  34         ld      r14,STK_REG(R14)(r1)
  35         addi    r1,r1,STACKFRAMESIZE
  36 .Ldo_err1:
  37         /* Do a byte by byte copy to get the exact remaining size */
  38         mtctr   r7
  39 46:
  40 err3;   lbz     r0,0(r4)
  41         addi    r4,r4,1
  42 err3;   stb     r0,0(r3)
  43         addi    r3,r3,1
  44         bdnz    46b
  45         li      r3,0
  46         blr
  47 
  48 .Ldone:
  49         mfctr   r3
  50         blr
  51 
  52 
  53 _GLOBAL(memcpy_mcsafe)
  54         mr      r7,r5
  55         cmpldi  r5,16
  56         blt     .Lshort_copy
  57 
  58 .Lcopy:
  59         /* Get the source 8B aligned */
  60         neg     r6,r4
  61         mtocrf  0x01,r6
  62         clrldi  r6,r6,(64-3)
  63 
  64         bf      cr7*4+3,1f
  65 err1;   lbz     r0,0(r4)
  66         addi    r4,r4,1
  67 err1;   stb     r0,0(r3)
  68         addi    r3,r3,1
  69         subi    r7,r7,1
  70 
  71 1:      bf      cr7*4+2,2f
  72 err1;   lhz     r0,0(r4)
  73         addi    r4,r4,2
  74 err1;   sth     r0,0(r3)
  75         addi    r3,r3,2
  76         subi    r7,r7,2
  77 
  78 2:      bf      cr7*4+1,3f
  79 err1;   lwz     r0,0(r4)
  80         addi    r4,r4,4
  81 err1;   stw     r0,0(r3)
  82         addi    r3,r3,4
  83         subi    r7,r7,4
  84 
  85 3:      sub     r5,r5,r6
  86         cmpldi  r5,128
  87 
  88         mflr    r0
  89         stdu    r1,-STACKFRAMESIZE(r1)
  90         std     r14,STK_REG(R14)(r1)
  91         std     r15,STK_REG(R15)(r1)
  92         std     r16,STK_REG(R16)(r1)
  93         std     r17,STK_REG(R17)(r1)
  94         std     r18,STK_REG(R18)(r1)
  95         std     r19,STK_REG(R19)(r1)
  96         std     r20,STK_REG(R20)(r1)
  97         std     r21,STK_REG(R21)(r1)
  98         std     r22,STK_REG(R22)(r1)
  99         std     r0,STACKFRAMESIZE+16(r1)
 100 
 101         blt     5f
 102         srdi    r6,r5,7
 103         mtctr   r6
 104 
 105         /* Now do cacheline (128B) sized loads and stores. */
 106         .align  5
 107 4:
 108 err2;   ld      r0,0(r4)
 109 err2;   ld      r6,8(r4)
 110 err2;   ld      r8,16(r4)
 111 err2;   ld      r9,24(r4)
 112 err2;   ld      r10,32(r4)
 113 err2;   ld      r11,40(r4)
 114 err2;   ld      r12,48(r4)
 115 err2;   ld      r14,56(r4)
 116 err2;   ld      r15,64(r4)
 117 err2;   ld      r16,72(r4)
 118 err2;   ld      r17,80(r4)
 119 err2;   ld      r18,88(r4)
 120 err2;   ld      r19,96(r4)
 121 err2;   ld      r20,104(r4)
 122 err2;   ld      r21,112(r4)
 123 err2;   ld      r22,120(r4)
 124         addi    r4,r4,128
 125 err2;   std     r0,0(r3)
 126 err2;   std     r6,8(r3)
 127 err2;   std     r8,16(r3)
 128 err2;   std     r9,24(r3)
 129 err2;   std     r10,32(r3)
 130 err2;   std     r11,40(r3)
 131 err2;   std     r12,48(r3)
 132 err2;   std     r14,56(r3)
 133 err2;   std     r15,64(r3)
 134 err2;   std     r16,72(r3)
 135 err2;   std     r17,80(r3)
 136 err2;   std     r18,88(r3)
 137 err2;   std     r19,96(r3)
 138 err2;   std     r20,104(r3)
 139 err2;   std     r21,112(r3)
 140 err2;   std     r22,120(r3)
 141         addi    r3,r3,128
 142         subi    r7,r7,128
 143         bdnz    4b
 144 
 145         clrldi  r5,r5,(64-7)
 146 
 147         /* Up to 127B to go */
 148 5:      srdi    r6,r5,4
 149         mtocrf  0x01,r6
 150 
 151 6:      bf      cr7*4+1,7f
 152 err2;   ld      r0,0(r4)
 153 err2;   ld      r6,8(r4)
 154 err2;   ld      r8,16(r4)
 155 err2;   ld      r9,24(r4)
 156 err2;   ld      r10,32(r4)
 157 err2;   ld      r11,40(r4)
 158 err2;   ld      r12,48(r4)
 159 err2;   ld      r14,56(r4)
 160         addi    r4,r4,64
 161 err2;   std     r0,0(r3)
 162 err2;   std     r6,8(r3)
 163 err2;   std     r8,16(r3)
 164 err2;   std     r9,24(r3)
 165 err2;   std     r10,32(r3)
 166 err2;   std     r11,40(r3)
 167 err2;   std     r12,48(r3)
 168 err2;   std     r14,56(r3)
 169         addi    r3,r3,64
 170         subi    r7,r7,64
 171 
 172 7:      ld      r14,STK_REG(R14)(r1)
 173         ld      r15,STK_REG(R15)(r1)
 174         ld      r16,STK_REG(R16)(r1)
 175         ld      r17,STK_REG(R17)(r1)
 176         ld      r18,STK_REG(R18)(r1)
 177         ld      r19,STK_REG(R19)(r1)
 178         ld      r20,STK_REG(R20)(r1)
 179         ld      r21,STK_REG(R21)(r1)
 180         ld      r22,STK_REG(R22)(r1)
 181         addi    r1,r1,STACKFRAMESIZE
 182 
 183         /* Up to 63B to go */
 184         bf      cr7*4+2,8f
 185 err1;   ld      r0,0(r4)
 186 err1;   ld      r6,8(r4)
 187 err1;   ld      r8,16(r4)
 188 err1;   ld      r9,24(r4)
 189         addi    r4,r4,32
 190 err1;   std     r0,0(r3)
 191 err1;   std     r6,8(r3)
 192 err1;   std     r8,16(r3)
 193 err1;   std     r9,24(r3)
 194         addi    r3,r3,32
 195         subi    r7,r7,32
 196 
 197         /* Up to 31B to go */
 198 8:      bf      cr7*4+3,9f
 199 err1;   ld      r0,0(r4)
 200 err1;   ld      r6,8(r4)
 201         addi    r4,r4,16
 202 err1;   std     r0,0(r3)
 203 err1;   std     r6,8(r3)
 204         addi    r3,r3,16
 205         subi    r7,r7,16
 206 
 207 9:      clrldi  r5,r5,(64-4)
 208 
 209         /* Up to 15B to go */
 210 .Lshort_copy:
 211         mtocrf  0x01,r5
 212         bf      cr7*4+0,12f
 213 err1;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
 214 err1;   lwz     r6,4(r4)
 215         addi    r4,r4,8
 216 err1;   stw     r0,0(r3)
 217 err1;   stw     r6,4(r3)
 218         addi    r3,r3,8
 219         subi    r7,r7,8
 220 
 221 12:     bf      cr7*4+1,13f
 222 err1;   lwz     r0,0(r4)
 223         addi    r4,r4,4
 224 err1;   stw     r0,0(r3)
 225         addi    r3,r3,4
 226         subi    r7,r7,4
 227 
 228 13:     bf      cr7*4+2,14f
 229 err1;   lhz     r0,0(r4)
 230         addi    r4,r4,2
 231 err1;   sth     r0,0(r3)
 232         addi    r3,r3,2
 233         subi    r7,r7,2
 234 
 235 14:     bf      cr7*4+3,15f
 236 err1;   lbz     r0,0(r4)
 237 err1;   stb     r0,0(r3)
 238 
 239 15:     li      r3,0
 240         blr
 241 
 242 EXPORT_SYMBOL_GPL(memcpy_mcsafe);

/* [<][>][^][v][top][bottom][index][help] */