err.no Git - linux-2.6/blob - arch/mips/lib/csum_partial.S

   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * Quick'n'dirty IP checksum ...
   7  *
   8  * Copyright (C) 1998, 1999 Ralf Baechle
   9  * Copyright (C) 1999 Silicon Graphics, Inc.
  10  */
  11 #include <asm/asm.h>
  12 #include <asm/regdef.h>
  13
  14 #ifdef CONFIG_64BIT
  15 /*
  16  * As we are sharing code base with the mips32 tree (which use the o32 ABI
  17  * register definitions). We need to redefine the register definitions from
  18  * the n64 ABI register naming to the o32 ABI register naming.
  19  */
  20 #undef t0
  21 #undef t1
  22 #undef t2
  23 #undef t3
  24 #define t0      $8
  25 #define t1      $9
  26 #define t2      $10
  27 #define t3      $11
  28 #define t4      $12
  29 #define t5      $13
  30 #define t6      $14
  31 #define t7      $15
  32 #endif
  33
  34 #define ADDC(sum,reg)                                           \
  35         addu    sum, reg;                                       \
  36         sltu    v1, sum, reg;                                   \
  37         addu    sum, v1
  38
  39 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
  40         lw      _t0, (offset + 0x00)(src);                      \
  41         lw      _t1, (offset + 0x04)(src);                      \
  42         lw      _t2, (offset + 0x08)(src);                      \
  43         lw      _t3, (offset + 0x0c)(src);                      \
  44         ADDC(sum, _t0);                                         \
  45         ADDC(sum, _t1);                                         \
  46         ADDC(sum, _t2);                                         \
  47         ADDC(sum, _t3);                                         \
  48         lw      _t0, (offset + 0x10)(src);                      \
  49         lw      _t1, (offset + 0x14)(src);                      \
  50         lw      _t2, (offset + 0x18)(src);                      \
  51         lw      _t3, (offset + 0x1c)(src);                      \
  52         ADDC(sum, _t0);                                         \
  53         ADDC(sum, _t1);                                         \
  54         ADDC(sum, _t2);                                         \
  55         ADDC(sum, _t3);                                         \
  56
  57 /*
  58  * a0: source address
  59  * a1: length of the area to checksum
  60  * a2: partial checksum
  61  */
  62
  63 #define src a0
  64 #define sum v0
  65
  66         .text
  67         .set    noreorder
  68         .align  5
  69 LEAF(csum_partial)
  70         move    sum, zero
  71         move    t7, zero
  72
  73         sltiu   t8, a1, 0x8
  74         bnez    t8, small_csumcpy               /* < 8 bytes to copy */
  75          move   t2, a1
  76
  77         andi    t7, src, 0x1                    /* odd buffer? */
  78
  79 hword_align:
  80         beqz    t7, word_align
  81          andi   t8, src, 0x2
  82
  83         lbu     t0, (src)
  84         LONG_SUBU       a1, a1, 0x1
  85 #ifdef __MIPSEL__
  86         sll     t0, t0, 8
  87 #endif
  88         ADDC(sum, t0)
  89         PTR_ADDU        src, src, 0x1
  90         andi    t8, src, 0x2
  91
  92 word_align:
  93         beqz    t8, dword_align
  94          sltiu  t8, a1, 56
  95
  96         lhu     t0, (src)
  97         LONG_SUBU       a1, a1, 0x2
  98         ADDC(sum, t0)
  99         sltiu   t8, a1, 56
 100         PTR_ADDU        src, src, 0x2
 101
 102 dword_align:
 103         bnez    t8, do_end_words
 104          move   t8, a1
 105
 106         andi    t8, src, 0x4
 107         beqz    t8, qword_align
 108          andi   t8, src, 0x8
 109
 110         lw      t0, 0x00(src)
 111         LONG_SUBU       a1, a1, 0x4
 112         ADDC(sum, t0)
 113         PTR_ADDU        src, src, 0x4
 114         andi    t8, src, 0x8
 115
 116 qword_align:
 117         beqz    t8, oword_align
 118          andi   t8, src, 0x10
 119
 120         lw      t0, 0x00(src)
 121         lw      t1, 0x04(src)
 122         LONG_SUBU       a1, a1, 0x8
 123         ADDC(sum, t0)
 124         ADDC(sum, t1)
 125         PTR_ADDU        src, src, 0x8
 126         andi    t8, src, 0x10
 127
 128 oword_align:
 129         beqz    t8, begin_movement
 130          LONG_SRL       t8, a1, 0x7
 131
 132         lw      t3, 0x08(src)
 133         lw      t4, 0x0c(src)
 134         lw      t0, 0x00(src)
 135         lw      t1, 0x04(src)
 136         ADDC(sum, t3)
 137         ADDC(sum, t4)
 138         ADDC(sum, t0)
 139         ADDC(sum, t1)
 140         LONG_SUBU       a1, a1, 0x10
 141         PTR_ADDU        src, src, 0x10
 142         LONG_SRL        t8, a1, 0x7
 143
 144 begin_movement:
 145         beqz    t8, 1f
 146          andi   t2, a1, 0x40
 147
 148 move_128bytes:
 149         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 150         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 151         CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
 152         CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
 153         LONG_SUBU       t8, t8, 0x01
 154         bnez    t8, move_128bytes
 155          PTR_ADDU       src, src, 0x80
 156
 157 1:
 158         beqz    t2, 1f
 159          andi   t2, a1, 0x20
 160
 161 move_64bytes:
 162         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 163         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 164         PTR_ADDU        src, src, 0x40
 165
 166 1:
 167         beqz    t2, do_end_words
 168          andi   t8, a1, 0x1c
 169
 170 move_32bytes:
 171         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 172         andi    t8, a1, 0x1c
 173         PTR_ADDU        src, src, 0x20
 174
 175 do_end_words:
 176         beqz    t8, small_csumcpy
 177          andi   t2, a1, 0x3
 178         LONG_SRL        t8, t8, 0x2
 179
 180 end_words:
 181         lw      t0, (src)
 182         LONG_SUBU       t8, t8, 0x1
 183         ADDC(sum, t0)
 184         bnez    t8, end_words
 185          PTR_ADDU       src, src, 0x4
 186
 187 /* unknown src alignment and < 8 bytes to go  */
 188 small_csumcpy:
 189         move    a1, t2
 190
 191         andi    t0, a1, 4
 192         beqz    t0, 1f
 193          andi   t0, a1, 2
 194
 195         /* Still a full word to go  */
 196         ulw     t1, (src)
 197         PTR_ADDIU       src, 4
 198         ADDC(sum, t1)
 199
 200 1:      move    t1, zero
 201         beqz    t0, 1f
 202          andi   t0, a1, 1
 203
 204         /* Still a halfword to go  */
 205         ulhu    t1, (src)
 206         PTR_ADDIU       src, 2
 207
 208 1:      beqz    t0, 1f
 209          sll    t1, t1, 16
 210
 211         lbu     t2, (src)
 212          nop
 213
 214 #ifdef __MIPSEB__
 215         sll     t2, t2, 8
 216 #endif
 217         or      t1, t2
 218
 219 1:      ADDC(sum, t1)
 220
 221         /* fold checksum */
 222         sll     v1, sum, 16
 223         addu    sum, v1
 224         sltu    v1, sum, v1
 225         srl     sum, sum, 16
 226         addu    sum, v1
 227
 228         /* odd buffer alignment? */
 229         beqz    t7, 1f
 230          nop
 231         sll     v1, sum, 8
 232         srl     sum, sum, 8
 233         or      sum, v1
 234         andi    sum, 0xffff
 235 1:
 236         .set    reorder
 237         /* Add the passed partial csum.  */
 238         ADDC(sum, a2)
 239         jr      ra
 240         .set    noreorder
 241         END(csum_partial)