1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 #include <linux/linkage.h>
  14 #include <asm/assembler.h>
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 src1            .req    x0
  30 src2            .req    x1
  31 limit           .req    x2
  32 result          .req    x0
  33 
  34 
  35 data1           .req    x3
  36 data1w          .req    w3
  37 data2           .req    x4
  38 data2w          .req    w4
  39 has_nul         .req    x5
  40 diff            .req    x6
  41 endloop         .req    x7
  42 tmp1            .req    x8
  43 tmp2            .req    x9
  44 tmp3            .req    x10
  45 pos             .req    x11
  46 limit_wd        .req    x12
  47 mask            .req    x13
  48 
  49 WEAK(memcmp)
  50         cbz     limit, .Lret0
  51         eor     tmp1, src1, src2
  52         tst     tmp1, #7
  53         b.ne    .Lmisaligned8
  54         ands    tmp1, src1, #7
  55         b.ne    .Lmutual_align
  56         sub     limit_wd, limit, #1 
  57         lsr     limit_wd, limit_wd, #3 
  58         
  59 
  60 
  61 
  62 .Lloop_aligned:
  63         ldr     data1, [src1], #8
  64         ldr     data2, [src2], #8
  65 .Lstart_realigned:
  66         subs    limit_wd, limit_wd, #1
  67         eor     diff, data1, data2      
  68         csinv   endloop, diff, xzr, cs  
  69         cbz     endloop, .Lloop_aligned
  70 
  71         
  72         tbz     limit_wd, #63, .Lnot_limit
  73 
  74         
  75         ands    limit, limit, #7
  76         b.eq    .Lnot_limit
  77         
  78 
  79 
  80 
  81         lsl     limit, limit, #3        
  82         mov     mask, #~0
  83 CPU_BE( lsr     mask, mask, limit )
  84 CPU_LE( lsl     mask, mask, limit )
  85         bic     data1, data1, mask
  86         bic     data2, data2, mask
  87 
  88         orr     diff, diff, mask
  89         b       .Lnot_limit
  90 
  91 .Lmutual_align:
  92         
  93 
  94 
  95 
  96 
  97         bic     src1, src1, #7
  98         bic     src2, src2, #7
  99         ldr     data1, [src1], #8
 100         ldr     data2, [src2], #8
 101         
 102 
 103 
 104 
 105         sub     limit_wd, limit, #1
 106         and     tmp3, limit_wd, #7
 107         lsr     limit_wd, limit_wd, #3
 108         add     tmp3, tmp3, tmp1
 109         add     limit_wd, limit_wd, tmp3, lsr #3
 110         add     limit, limit, tmp1
 111 
 112         lsl     tmp1, tmp1, #3
 113         neg     tmp1, tmp1
 114         mov     tmp2, #~0
 115         
 116 CPU_BE( lsl     tmp2, tmp2, tmp1 )
 117         
 118 CPU_LE( lsr     tmp2, tmp2, tmp1 )
 119 
 120         orr     data1, data1, tmp2
 121         orr     data2, data2, tmp2
 122         b       .Lstart_realigned
 123 
 124         
 125 .Lmisaligned8:
 126         cmp     limit, #8
 127         b.lo    .Ltiny8proc 
 128 
 129         and     tmp1, src1, #7
 130         neg     tmp1, tmp1
 131         add     tmp1, tmp1, #8
 132         and     tmp2, src2, #7
 133         neg     tmp2, tmp2
 134         add     tmp2, tmp2, #8
 135         subs    tmp3, tmp1, tmp2
 136         csel    pos, tmp1, tmp2, hi 
 137 
 138         sub     limit, limit, pos
 139         
 140 .Ltinycmp:
 141         ldrb    data1w, [src1], #1
 142         ldrb    data2w, [src2], #1
 143         subs    pos, pos, #1
 144         ccmp    data1w, data2w, #0, ne  
 145         b.eq    .Ltinycmp
 146         cbnz    pos, 1f 
 147         cmp     data1w, data2w
 148         b.eq    .Lstart_align
 149 1:
 150         sub     result, data1, data2
 151         ret
 152 
 153 .Lstart_align:
 154         lsr     limit_wd, limit, #3
 155         cbz     limit_wd, .Lremain8
 156 
 157         ands    xzr, src1, #7
 158         b.eq    .Lrecal_offset
 159         
 160         add     src1, src1, tmp3 
 161         add     src2, src2, tmp3
 162         sub     limit, limit, tmp3
 163         lsr     limit_wd, limit, #3
 164         cbz     limit_wd, .Lremain8
 165         
 166         ldr     data1, [src1], #8
 167         ldr     data2, [src2], #8
 168 
 169         subs    limit_wd, limit_wd, #1
 170         eor     diff, data1, data2  
 171         csinv   endloop, diff, xzr, ne
 172         cbnz    endloop, .Lunequal_proc
 173         
 174         and     tmp3, tmp3, #7
 175 
 176 .Lrecal_offset:
 177         neg     pos, tmp3
 178 .Lloopcmp_proc:
 179         
 180 
 181 
 182 
 183 
 184 
 185 
 186 
 187         ldr     data1, [src1,pos]
 188         ldr     data2, [src2,pos]
 189         eor     diff, data1, data2  
 190         cbnz    diff, .Lnot_limit
 191 
 192         
 193         ldr     data1, [src1], #8
 194         ldr     data2, [src2], #8
 195         eor     diff, data1, data2  
 196         subs    limit_wd, limit_wd, #1
 197         csinv   endloop, diff, xzr, ne
 198         cbz     endloop, .Lloopcmp_proc
 199 .Lunequal_proc:
 200         cbz     diff, .Lremain8
 201 
 202 
 203 .Lnot_limit:
 204 
 205 
 206 
 207 
 208 CPU_LE( rev     diff, diff )
 209 CPU_LE( rev     data1, data1 )
 210 CPU_LE( rev     data2, data2 )
 211 
 212         
 213 
 214 
 215 
 216 
 217 
 218         clz     pos, diff
 219         lsl     data1, data1, pos
 220         lsl     data2, data2, pos
 221         
 222 
 223 
 224 
 225         lsr     data1, data1, #56
 226         sub     result, data1, data2, lsr #56
 227         ret
 228 
 229 .Lremain8:
 230         
 231         ands    limit, limit, #7
 232         b.eq    .Lret0
 233 
 234 .Ltiny8proc:
 235         ldrb    data1w, [src1], #1
 236         ldrb    data2w, [src2], #1
 237         subs    limit, limit, #1
 238 
 239         ccmp    data1w, data2w, #0, ne  
 240         b.eq    .Ltiny8proc
 241         sub     result, data1, data2
 242         ret
 243 .Lret0:
 244         mov     result, #0
 245         ret
 246 ENDPIPROC(memcmp)
 247 EXPORT_SYMBOL_NOKASAN(memcmp)