]> WPIA git - cassiopeia.git/blob - lib/openssl/crypto/aes/asm/aes-mips.pl
upd: openssl to 1.1.0
[cassiopeia.git] / lib / openssl / crypto / aes / asm / aes-mips.pl
1 #! /usr/bin/env perl
2 # Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License").  You may not use
5 # this file except in compliance with the License.  You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # AES for MIPS
18
19 # October 2010
20 #
21 # Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
22 # spends ~68 cycles per byte processed with 128-bit key. This is ~16%
23 # faster than gcc-generated code, which is not very impressive. But
24 # recall that compressed S-box requires extra processing, namely
25 # additional rotations. Rotations are implemented with lwl/lwr pairs,
26 # which is normally used for loading unaligned data. Another cool
27 # thing about this module is its endian neutrality, which means that
28 # it processes data without ever changing byte order...
29
30 # September 2012
31 #
32 # Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
33 # ~25% less instructions) code. Note that there is no run-time switch,
34 # instead, code path is chosen upon pre-process time, pass -mips32r2
35 # or/and -msmartmips.
36
37 ######################################################################
38 # There is a number of MIPS ABI in use, O32 and N32/64 are most
39 # widely used. Then there is a new contender: NUBI. It appears that if
40 # one picks the latter, it's possible to arrange code in ABI neutral
41 # manner. Therefore let's stick to NUBI register layout:
42 #
43 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
44 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
45 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
46 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
47 #
48 # The return value is placed in $a0. Following coding rules facilitate
49 # interoperability:
50 #
51 # - never ever touch $tp, "thread pointer", former $gp;
52 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
53 #   old code];
54 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
55 #
56 # For reference here is register layout for N32/64 MIPS ABIs:
57 #
58 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
59 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
60 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
61 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
62 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
63 #
64 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
65
66 if ($flavour =~ /64|n32/i) {
67         $PTR_LA="dla";
68         $PTR_ADD="dadd";        # incidentally works even on n32
69         $PTR_SUB="dsub";        # incidentally works even on n32
70         $PTR_INS="dins";
71         $REG_S="sd";
72         $REG_L="ld";
73         $PTR_SLL="dsll";        # incidentally works even on n32
74         $SZREG=8;
75 } else {
76         $PTR_LA="la";
77         $PTR_ADD="add";
78         $PTR_SUB="sub";
79         $PTR_INS="ins";
80         $REG_S="sw";
81         $REG_L="lw";
82         $PTR_SLL="sll";
83         $SZREG=4;
84 }
85 $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
86 #
87 # <appro@openssl.org>
88 #
89 ######################################################################
90
91 $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
92
93 for (@ARGV) {   $output=$_ if (/\w[\w\-]*\.\w+$/);      }
94 open STDOUT,">$output";
95
96 if (!defined($big_endian))
97 {    $big_endian=(unpack('L',pack('N',1))==1);   }
98
99 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
100 open STDOUT,">$output";
101
102 my ($MSB,$LSB)=(0,3);   # automatically converted to little-endian
103
104 $code.=<<___;
105 .text
106 #ifdef OPENSSL_FIPSCANISTER
107 # include <openssl/fipssyms.h>
108 #endif
109
110 #if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
111 #define _MIPS_ARCH_MIPS32R2
112 #endif
113
114 #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
115 .option pic2
116 #endif
117 .set    noat
118 ___
119 \f
120 {{{
121 my $FRAMESIZE=16*$SZREG;
122 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
123
124 my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
125 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
126 my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
127 my ($key0,$cnt)=($gp,$fp);
128
129 # instuction ordering is "stolen" from output from MIPSpro assembler
130 # invoked with -mips3 -O3 arguments...
131 $code.=<<___;
132 .align  5
133 .ent    _mips_AES_encrypt
134 _mips_AES_encrypt:
135         .frame  $sp,0,$ra
136         .set    reorder
137         lw      $t0,0($key)
138         lw      $t1,4($key)
139         lw      $t2,8($key)
140         lw      $t3,12($key)
141         lw      $cnt,240($key)
142         $PTR_ADD $key0,$key,16
143
144         xor     $s0,$t0
145         xor     $s1,$t1
146         xor     $s2,$t2
147         xor     $s3,$t3
148
149         sub     $cnt,1
150 #if defined(__mips_smartmips)
151         ext     $i0,$s1,16,8
152 .Loop_enc:
153         ext     $i1,$s2,16,8
154         ext     $i2,$s3,16,8
155         ext     $i3,$s0,16,8
156         lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
157         ext     $i0,$s2,8,8
158         lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
159         ext     $i1,$s3,8,8
160         lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
161         ext     $i2,$s0,8,8
162         lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
163         ext     $i3,$s1,8,8
164
165         lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
166         ext     $i0,$s3,0,8
167         lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
168         ext     $i1,$s0,0,8
169         lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
170         ext     $i2,$s1,0,8
171         lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
172         ext     $i3,$s2,0,8
173
174         lwxs    $t8,$i0($Tbl)           # Te3[s3]
175         ext     $i0,$s0,24,8
176         lwxs    $t9,$i1($Tbl)           # Te3[s0]
177         ext     $i1,$s1,24,8
178         lwxs    $t10,$i2($Tbl)          # Te3[s1]
179         ext     $i2,$s2,24,8
180         lwxs    $t11,$i3($Tbl)          # Te3[s2]
181         ext     $i3,$s3,24,8
182
183         rotr    $t0,$t0,8
184         rotr    $t1,$t1,8
185         rotr    $t2,$t2,8
186         rotr    $t3,$t3,8
187
188         rotr    $t4,$t4,16
189         rotr    $t5,$t5,16
190         rotr    $t6,$t6,16
191         rotr    $t7,$t7,16
192
193         xor     $t0,$t4
194         lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
195         xor     $t1,$t5
196         lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
197         xor     $t2,$t6
198         lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
199         xor     $t3,$t7
200         lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
201
202         rotr    $t8,$t8,24
203         lw      $s0,0($key0)
204         rotr    $t9,$t9,24
205         lw      $s1,4($key0)
206         rotr    $t10,$t10,24
207         lw      $s2,8($key0)
208         rotr    $t11,$t11,24
209         lw      $s3,12($key0)
210
211         xor     $t0,$t8
212         xor     $t1,$t9
213         xor     $t2,$t10
214         xor     $t3,$t11
215
216         xor     $t0,$t4
217         xor     $t1,$t5
218         xor     $t2,$t6
219         xor     $t3,$t7
220
221         sub     $cnt,1
222         $PTR_ADD $key0,16
223         xor     $s0,$t0
224         xor     $s1,$t1
225         xor     $s2,$t2
226         xor     $s3,$t3
227         .set    noreorder
228         bnez    $cnt,.Loop_enc
229         ext     $i0,$s1,16,8
230
231         _xtr    $i0,$s1,16-2
232 #else
233         _xtr    $i0,$s1,16-2
234 .Loop_enc:
235         _xtr    $i1,$s2,16-2
236         _xtr    $i2,$s3,16-2
237         _xtr    $i3,$s0,16-2
238         and     $i0,0x3fc
239         and     $i1,0x3fc
240         and     $i2,0x3fc
241         and     $i3,0x3fc
242         $PTR_ADD $i0,$Tbl
243         $PTR_ADD $i1,$Tbl
244         $PTR_ADD $i2,$Tbl
245         $PTR_ADD $i3,$Tbl
246 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
247         lw      $t0,0($i0)              # Te1[s1>>16]
248         _xtr    $i0,$s2,8-2
249         lw      $t1,0($i1)              # Te1[s2>>16]
250         _xtr    $i1,$s3,8-2
251         lw      $t2,0($i2)              # Te1[s3>>16]
252         _xtr    $i2,$s0,8-2
253         lw      $t3,0($i3)              # Te1[s0>>16]
254         _xtr    $i3,$s1,8-2
255 #else
256         lwl     $t0,3($i0)              # Te1[s1>>16]
257         lwl     $t1,3($i1)              # Te1[s2>>16]
258         lwl     $t2,3($i2)              # Te1[s3>>16]
259         lwl     $t3,3($i3)              # Te1[s0>>16]
260         lwr     $t0,2($i0)              # Te1[s1>>16]
261         _xtr    $i0,$s2,8-2
262         lwr     $t1,2($i1)              # Te1[s2>>16]
263         _xtr    $i1,$s3,8-2
264         lwr     $t2,2($i2)              # Te1[s3>>16]
265         _xtr    $i2,$s0,8-2
266         lwr     $t3,2($i3)              # Te1[s0>>16]
267         _xtr    $i3,$s1,8-2
268 #endif
269         and     $i0,0x3fc
270         and     $i1,0x3fc
271         and     $i2,0x3fc
272         and     $i3,0x3fc
273         $PTR_ADD $i0,$Tbl
274         $PTR_ADD $i1,$Tbl
275         $PTR_ADD $i2,$Tbl
276         $PTR_ADD $i3,$Tbl
277 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
278         rotr    $t0,$t0,8
279         rotr    $t1,$t1,8
280         rotr    $t2,$t2,8
281         rotr    $t3,$t3,8
282 # if defined(_MIPSEL)
283         lw      $t4,0($i0)              # Te2[s2>>8]
284         _xtr    $i0,$s3,0-2
285         lw      $t5,0($i1)              # Te2[s3>>8]
286         _xtr    $i1,$s0,0-2
287         lw      $t6,0($i2)              # Te2[s0>>8]
288         _xtr    $i2,$s1,0-2
289         lw      $t7,0($i3)              # Te2[s1>>8]
290         _xtr    $i3,$s2,0-2
291
292         and     $i0,0x3fc
293         and     $i1,0x3fc
294         and     $i2,0x3fc
295         and     $i3,0x3fc
296         $PTR_ADD $i0,$Tbl
297         $PTR_ADD $i1,$Tbl
298         $PTR_ADD $i2,$Tbl
299         $PTR_ADD $i3,$Tbl
300         lw      $t8,0($i0)              # Te3[s3]
301         $PTR_INS $i0,$s0,2,8
302         lw      $t9,0($i1)              # Te3[s0]
303         $PTR_INS $i1,$s1,2,8
304         lw      $t10,0($i2)             # Te3[s1]
305         $PTR_INS $i2,$s2,2,8
306         lw      $t11,0($i3)             # Te3[s2]
307         $PTR_INS $i3,$s3,2,8
308 # else
309         lw      $t4,0($i0)              # Te2[s2>>8]
310         $PTR_INS $i0,$s3,2,8
311         lw      $t5,0($i1)              # Te2[s3>>8]
312         $PTR_INS $i1,$s0,2,8
313         lw      $t6,0($i2)              # Te2[s0>>8]
314         $PTR_INS $i2,$s1,2,8
315         lw      $t7,0($i3)              # Te2[s1>>8]
316         $PTR_INS $i3,$s2,2,8
317
318         lw      $t8,0($i0)              # Te3[s3]
319         _xtr    $i0,$s0,24-2
320         lw      $t9,0($i1)              # Te3[s0]
321         _xtr    $i1,$s1,24-2
322         lw      $t10,0($i2)             # Te3[s1]
323         _xtr    $i2,$s2,24-2
324         lw      $t11,0($i3)             # Te3[s2]
325         _xtr    $i3,$s3,24-2
326
327         and     $i0,0x3fc
328         and     $i1,0x3fc
329         and     $i2,0x3fc
330         and     $i3,0x3fc
331         $PTR_ADD $i0,$Tbl
332         $PTR_ADD $i1,$Tbl
333         $PTR_ADD $i2,$Tbl
334         $PTR_ADD $i3,$Tbl
335 # endif
336         rotr    $t4,$t4,16
337         rotr    $t5,$t5,16
338         rotr    $t6,$t6,16
339         rotr    $t7,$t7,16
340
341         rotr    $t8,$t8,24
342         rotr    $t9,$t9,24
343         rotr    $t10,$t10,24
344         rotr    $t11,$t11,24
345 #else
346         lwl     $t4,2($i0)              # Te2[s2>>8]
347         lwl     $t5,2($i1)              # Te2[s3>>8]
348         lwl     $t6,2($i2)              # Te2[s0>>8]
349         lwl     $t7,2($i3)              # Te2[s1>>8]
350         lwr     $t4,1($i0)              # Te2[s2>>8]
351         _xtr    $i0,$s3,0-2
352         lwr     $t5,1($i1)              # Te2[s3>>8]
353         _xtr    $i1,$s0,0-2
354         lwr     $t6,1($i2)              # Te2[s0>>8]
355         _xtr    $i2,$s1,0-2
356         lwr     $t7,1($i3)              # Te2[s1>>8]
357         _xtr    $i3,$s2,0-2
358
359         and     $i0,0x3fc
360         and     $i1,0x3fc
361         and     $i2,0x3fc
362         and     $i3,0x3fc
363         $PTR_ADD $i0,$Tbl
364         $PTR_ADD $i1,$Tbl
365         $PTR_ADD $i2,$Tbl
366         $PTR_ADD $i3,$Tbl
367         lwl     $t8,1($i0)              # Te3[s3]
368         lwl     $t9,1($i1)              # Te3[s0]
369         lwl     $t10,1($i2)             # Te3[s1]
370         lwl     $t11,1($i3)             # Te3[s2]
371         lwr     $t8,0($i0)              # Te3[s3]
372         _xtr    $i0,$s0,24-2
373         lwr     $t9,0($i1)              # Te3[s0]
374         _xtr    $i1,$s1,24-2
375         lwr     $t10,0($i2)             # Te3[s1]
376         _xtr    $i2,$s2,24-2
377         lwr     $t11,0($i3)             # Te3[s2]
378         _xtr    $i3,$s3,24-2
379
380         and     $i0,0x3fc
381         and     $i1,0x3fc
382         and     $i2,0x3fc
383         and     $i3,0x3fc
384         $PTR_ADD $i0,$Tbl
385         $PTR_ADD $i1,$Tbl
386         $PTR_ADD $i2,$Tbl
387         $PTR_ADD $i3,$Tbl
388 #endif
389         xor     $t0,$t4
390         lw      $t4,0($i0)              # Te0[s0>>24]
391         xor     $t1,$t5
392         lw      $t5,0($i1)              # Te0[s1>>24]
393         xor     $t2,$t6
394         lw      $t6,0($i2)              # Te0[s2>>24]
395         xor     $t3,$t7
396         lw      $t7,0($i3)              # Te0[s3>>24]
397
398         xor     $t0,$t8
399         lw      $s0,0($key0)
400         xor     $t1,$t9
401         lw      $s1,4($key0)
402         xor     $t2,$t10
403         lw      $s2,8($key0)
404         xor     $t3,$t11
405         lw      $s3,12($key0)
406
407         xor     $t0,$t4
408         xor     $t1,$t5
409         xor     $t2,$t6
410         xor     $t3,$t7
411
412         sub     $cnt,1
413         $PTR_ADD $key0,16
414         xor     $s0,$t0
415         xor     $s1,$t1
416         xor     $s2,$t2
417         xor     $s3,$t3
418         .set    noreorder
419         bnez    $cnt,.Loop_enc
420         _xtr    $i0,$s1,16-2
421 #endif
422
423         .set    reorder
424         _xtr    $i1,$s2,16-2
425         _xtr    $i2,$s3,16-2
426         _xtr    $i3,$s0,16-2
427         and     $i0,0x3fc
428         and     $i1,0x3fc
429         and     $i2,0x3fc
430         and     $i3,0x3fc
431         $PTR_ADD $i0,$Tbl
432         $PTR_ADD $i1,$Tbl
433         $PTR_ADD $i2,$Tbl
434         $PTR_ADD $i3,$Tbl
435         lbu     $t0,2($i0)              # Te4[s1>>16]
436         _xtr    $i0,$s2,8-2
437         lbu     $t1,2($i1)              # Te4[s2>>16]
438         _xtr    $i1,$s3,8-2
439         lbu     $t2,2($i2)              # Te4[s3>>16]
440         _xtr    $i2,$s0,8-2
441         lbu     $t3,2($i3)              # Te4[s0>>16]
442         _xtr    $i3,$s1,8-2
443
444         and     $i0,0x3fc
445         and     $i1,0x3fc
446         and     $i2,0x3fc
447         and     $i3,0x3fc
448         $PTR_ADD $i0,$Tbl
449         $PTR_ADD $i1,$Tbl
450         $PTR_ADD $i2,$Tbl
451         $PTR_ADD $i3,$Tbl
452 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
453 # if defined(_MIPSEL)
454         lbu     $t4,2($i0)              # Te4[s2>>8]
455         $PTR_INS $i0,$s0,2,8
456         lbu     $t5,2($i1)              # Te4[s3>>8]
457         $PTR_INS $i1,$s1,2,8
458         lbu     $t6,2($i2)              # Te4[s0>>8]
459         $PTR_INS $i2,$s2,2,8
460         lbu     $t7,2($i3)              # Te4[s1>>8]
461         $PTR_INS $i3,$s3,2,8
462
463         lbu     $t8,2($i0)              # Te4[s0>>24]
464         _xtr    $i0,$s3,0-2
465         lbu     $t9,2($i1)              # Te4[s1>>24]
466         _xtr    $i1,$s0,0-2
467         lbu     $t10,2($i2)             # Te4[s2>>24]
468         _xtr    $i2,$s1,0-2
469         lbu     $t11,2($i3)             # Te4[s3>>24]
470         _xtr    $i3,$s2,0-2
471
472         and     $i0,0x3fc
473         and     $i1,0x3fc
474         and     $i2,0x3fc
475         and     $i3,0x3fc
476         $PTR_ADD $i0,$Tbl
477         $PTR_ADD $i1,$Tbl
478         $PTR_ADD $i2,$Tbl
479         $PTR_ADD $i3,$Tbl
480 # else
481         lbu     $t4,2($i0)              # Te4[s2>>8]
482         _xtr    $i0,$s0,24-2
483         lbu     $t5,2($i1)              # Te4[s3>>8]
484         _xtr    $i1,$s1,24-2
485         lbu     $t6,2($i2)              # Te4[s0>>8]
486         _xtr    $i2,$s2,24-2
487         lbu     $t7,2($i3)              # Te4[s1>>8]
488         _xtr    $i3,$s3,24-2
489
490         and     $i0,0x3fc
491         and     $i1,0x3fc
492         and     $i2,0x3fc
493         and     $i3,0x3fc
494         $PTR_ADD $i0,$Tbl
495         $PTR_ADD $i1,$Tbl
496         $PTR_ADD $i2,$Tbl
497         $PTR_ADD $i3,$Tbl
498         lbu     $t8,2($i0)              # Te4[s0>>24]
499         $PTR_INS $i0,$s3,2,8
500         lbu     $t9,2($i1)              # Te4[s1>>24]
501         $PTR_INS $i1,$s0,2,8
502         lbu     $t10,2($i2)             # Te4[s2>>24]
503         $PTR_INS $i2,$s1,2,8
504         lbu     $t11,2($i3)             # Te4[s3>>24]
505         $PTR_INS $i3,$s2,2,8
506 # endif
507         _ins    $t0,16
508         _ins    $t1,16
509         _ins    $t2,16
510         _ins    $t3,16
511
512         _ins2   $t0,$t4,8
513         lbu     $t4,2($i0)              # Te4[s3]
514         _ins2   $t1,$t5,8
515         lbu     $t5,2($i1)              # Te4[s0]
516         _ins2   $t2,$t6,8
517         lbu     $t6,2($i2)              # Te4[s1]
518         _ins2   $t3,$t7,8
519         lbu     $t7,2($i3)              # Te4[s2]
520
521         _ins2   $t0,$t8,24
522         lw      $s0,0($key0)
523         _ins2   $t1,$t9,24
524         lw      $s1,4($key0)
525         _ins2   $t2,$t10,24
526         lw      $s2,8($key0)
527         _ins2   $t3,$t11,24
528         lw      $s3,12($key0)
529
530         _ins2   $t0,$t4,0
531         _ins2   $t1,$t5,0
532         _ins2   $t2,$t6,0
533         _ins2   $t3,$t7,0
534 #else
535         lbu     $t4,2($i0)              # Te4[s2>>8]
536         _xtr    $i0,$s0,24-2
537         lbu     $t5,2($i1)              # Te4[s3>>8]
538         _xtr    $i1,$s1,24-2
539         lbu     $t6,2($i2)              # Te4[s0>>8]
540         _xtr    $i2,$s2,24-2
541         lbu     $t7,2($i3)              # Te4[s1>>8]
542         _xtr    $i3,$s3,24-2
543
544         and     $i0,0x3fc
545         and     $i1,0x3fc
546         and     $i2,0x3fc
547         and     $i3,0x3fc
548         $PTR_ADD $i0,$Tbl
549         $PTR_ADD $i1,$Tbl
550         $PTR_ADD $i2,$Tbl
551         $PTR_ADD $i3,$Tbl
552         lbu     $t8,2($i0)              # Te4[s0>>24]
553         _xtr    $i0,$s3,0-2
554         lbu     $t9,2($i1)              # Te4[s1>>24]
555         _xtr    $i1,$s0,0-2
556         lbu     $t10,2($i2)             # Te4[s2>>24]
557         _xtr    $i2,$s1,0-2
558         lbu     $t11,2($i3)             # Te4[s3>>24]
559         _xtr    $i3,$s2,0-2
560
561         and     $i0,0x3fc
562         and     $i1,0x3fc
563         and     $i2,0x3fc
564         and     $i3,0x3fc
565         $PTR_ADD $i0,$Tbl
566         $PTR_ADD $i1,$Tbl
567         $PTR_ADD $i2,$Tbl
568         $PTR_ADD $i3,$Tbl
569
570         _ins    $t0,16
571         _ins    $t1,16
572         _ins    $t2,16
573         _ins    $t3,16
574
575         _ins    $t4,8
576         _ins    $t5,8
577         _ins    $t6,8
578         _ins    $t7,8
579
580         xor     $t0,$t4
581         lbu     $t4,2($i0)              # Te4[s3]
582         xor     $t1,$t5
583         lbu     $t5,2($i1)              # Te4[s0]
584         xor     $t2,$t6
585         lbu     $t6,2($i2)              # Te4[s1]
586         xor     $t3,$t7
587         lbu     $t7,2($i3)              # Te4[s2]
588
589         _ins    $t8,24
590         lw      $s0,0($key0)
591         _ins    $t9,24
592         lw      $s1,4($key0)
593         _ins    $t10,24
594         lw      $s2,8($key0)
595         _ins    $t11,24
596         lw      $s3,12($key0)
597
598         xor     $t0,$t8
599         xor     $t1,$t9
600         xor     $t2,$t10
601         xor     $t3,$t11
602
603         _ins    $t4,0
604         _ins    $t5,0
605         _ins    $t6,0
606         _ins    $t7,0
607
608         xor     $t0,$t4
609         xor     $t1,$t5
610         xor     $t2,$t6
611         xor     $t3,$t7
612 #endif
613         xor     $s0,$t0
614         xor     $s1,$t1
615         xor     $s2,$t2
616         xor     $s3,$t3
617
618         jr      $ra
619 .end    _mips_AES_encrypt
620
621 .align  5
622 .globl  AES_encrypt
623 .ent    AES_encrypt
624 AES_encrypt:
625         .frame  $sp,$FRAMESIZE,$ra
626         .mask   $SAVED_REGS_MASK,-$SZREG
627         .set    noreorder
628 ___
629 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
630         .cpload $pf
631 ___
632 $code.=<<___;
633         $PTR_SUB $sp,$FRAMESIZE
634         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
635         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
636         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
637         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
638         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
639         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
640         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
641         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
642         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
643         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
644 ___
645 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
646         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
647         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
648         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
649         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
650         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
651 ___
652 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
653         .cplocal        $Tbl
654         .cpsetup        $pf,$zero,AES_encrypt
655 ___
656 $code.=<<___;
657         .set    reorder
658         $PTR_LA $Tbl,AES_Te             # PIC-ified 'load address'
659
660         lwl     $s0,0+$MSB($inp)
661         lwl     $s1,4+$MSB($inp)
662         lwl     $s2,8+$MSB($inp)
663         lwl     $s3,12+$MSB($inp)
664         lwr     $s0,0+$LSB($inp)
665         lwr     $s1,4+$LSB($inp)
666         lwr     $s2,8+$LSB($inp)
667         lwr     $s3,12+$LSB($inp)
668
669         bal     _mips_AES_encrypt
670
671         swr     $s0,0+$LSB($out)
672         swr     $s1,4+$LSB($out)
673         swr     $s2,8+$LSB($out)
674         swr     $s3,12+$LSB($out)
675         swl     $s0,0+$MSB($out)
676         swl     $s1,4+$MSB($out)
677         swl     $s2,8+$MSB($out)
678         swl     $s3,12+$MSB($out)
679
680         .set    noreorder
681         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
682         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
683         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
684         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
685         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
686         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
687         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
688         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
689         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
690         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
691 ___
692 $code.=<<___ if ($flavour =~ /nubi/i);
693         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
694         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
695         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
696         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
697         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
698 ___
699 $code.=<<___;
700         jr      $ra
701         $PTR_ADD $sp,$FRAMESIZE
702 .end    AES_encrypt
703 ___
704 \f
705 $code.=<<___;
706 .align  5
707 .ent    _mips_AES_decrypt
708 _mips_AES_decrypt:
709         .frame  $sp,0,$ra
710         .set    reorder
711         lw      $t0,0($key)
712         lw      $t1,4($key)
713         lw      $t2,8($key)
714         lw      $t3,12($key)
715         lw      $cnt,240($key)
716         $PTR_ADD $key0,$key,16
717
718         xor     $s0,$t0
719         xor     $s1,$t1
720         xor     $s2,$t2
721         xor     $s3,$t3
722
723         sub     $cnt,1
724 #if defined(__mips_smartmips)
725         ext     $i0,$s3,16,8
726 .Loop_dec:
727         ext     $i1,$s0,16,8
728         ext     $i2,$s1,16,8
729         ext     $i3,$s2,16,8
730         lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
731         ext     $i0,$s2,8,8
732         lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
733         ext     $i1,$s3,8,8
734         lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
735         ext     $i2,$s0,8,8
736         lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
737         ext     $i3,$s1,8,8
738
739         lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
740         ext     $i0,$s1,0,8
741         lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
742         ext     $i1,$s2,0,8
743         lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
744         ext     $i2,$s3,0,8
745         lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
746         ext     $i3,$s0,0,8
747
748         lwxs    $t8,$i0($Tbl)           # Td3[s1]
749         ext     $i0,$s0,24,8
750         lwxs    $t9,$i1($Tbl)           # Td3[s2]
751         ext     $i1,$s1,24,8
752         lwxs    $t10,$i2($Tbl)          # Td3[s3]
753         ext     $i2,$s2,24,8
754         lwxs    $t11,$i3($Tbl)          # Td3[s0]
755         ext     $i3,$s3,24,8
756
757         rotr    $t0,$t0,8
758         rotr    $t1,$t1,8
759         rotr    $t2,$t2,8
760         rotr    $t3,$t3,8
761
762         rotr    $t4,$t4,16
763         rotr    $t5,$t5,16
764         rotr    $t6,$t6,16
765         rotr    $t7,$t7,16
766
767         xor     $t0,$t4
768         lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
769         xor     $t1,$t5
770         lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
771         xor     $t2,$t6
772         lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
773         xor     $t3,$t7
774         lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
775
776         rotr    $t8,$t8,24
777         lw      $s0,0($key0)
778         rotr    $t9,$t9,24
779         lw      $s1,4($key0)
780         rotr    $t10,$t10,24
781         lw      $s2,8($key0)
782         rotr    $t11,$t11,24
783         lw      $s3,12($key0)
784
785         xor     $t0,$t8
786         xor     $t1,$t9
787         xor     $t2,$t10
788         xor     $t3,$t11
789
790         xor     $t0,$t4
791         xor     $t1,$t5
792         xor     $t2,$t6
793         xor     $t3,$t7
794
795         sub     $cnt,1
796         $PTR_ADD $key0,16
797         xor     $s0,$t0
798         xor     $s1,$t1
799         xor     $s2,$t2
800         xor     $s3,$t3
801         .set    noreorder
802         bnez    $cnt,.Loop_dec
803         ext     $i0,$s3,16,8
804
805         _xtr    $i0,$s3,16-2
806 #else
807         _xtr    $i0,$s3,16-2
808 .Loop_dec:
809         _xtr    $i1,$s0,16-2
810         _xtr    $i2,$s1,16-2
811         _xtr    $i3,$s2,16-2
812         and     $i0,0x3fc
813         and     $i1,0x3fc
814         and     $i2,0x3fc
815         and     $i3,0x3fc
816         $PTR_ADD $i0,$Tbl
817         $PTR_ADD $i1,$Tbl
818         $PTR_ADD $i2,$Tbl
819         $PTR_ADD $i3,$Tbl
820 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
821         lw      $t0,0($i0)              # Td1[s3>>16]
822         _xtr    $i0,$s2,8-2
823         lw      $t1,0($i1)              # Td1[s0>>16]
824         _xtr    $i1,$s3,8-2
825         lw      $t2,0($i2)              # Td1[s1>>16]
826         _xtr    $i2,$s0,8-2
827         lw      $t3,0($i3)              # Td1[s2>>16]
828         _xtr    $i3,$s1,8-2
829 #else
830         lwl     $t0,3($i0)              # Td1[s3>>16]
831         lwl     $t1,3($i1)              # Td1[s0>>16]
832         lwl     $t2,3($i2)              # Td1[s1>>16]
833         lwl     $t3,3($i3)              # Td1[s2>>16]
834         lwr     $t0,2($i0)              # Td1[s3>>16]
835         _xtr    $i0,$s2,8-2
836         lwr     $t1,2($i1)              # Td1[s0>>16]
837         _xtr    $i1,$s3,8-2
838         lwr     $t2,2($i2)              # Td1[s1>>16]
839         _xtr    $i2,$s0,8-2
840         lwr     $t3,2($i3)              # Td1[s2>>16]
841         _xtr    $i3,$s1,8-2
842 #endif
843
844         and     $i0,0x3fc
845         and     $i1,0x3fc
846         and     $i2,0x3fc
847         and     $i3,0x3fc
848         $PTR_ADD $i0,$Tbl
849         $PTR_ADD $i1,$Tbl
850         $PTR_ADD $i2,$Tbl
851         $PTR_ADD $i3,$Tbl
852 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
853         rotr    $t0,$t0,8
854         rotr    $t1,$t1,8
855         rotr    $t2,$t2,8
856         rotr    $t3,$t3,8
857 # if defined(_MIPSEL)
858         lw      $t4,0($i0)              # Td2[s2>>8]
859         _xtr    $i0,$s1,0-2
860         lw      $t5,0($i1)              # Td2[s3>>8]
861         _xtr    $i1,$s2,0-2
862         lw      $t6,0($i2)              # Td2[s0>>8]
863         _xtr    $i2,$s3,0-2
864         lw      $t7,0($i3)              # Td2[s1>>8]
865         _xtr    $i3,$s0,0-2
866
867         and     $i0,0x3fc
868         and     $i1,0x3fc
869         and     $i2,0x3fc
870         and     $i3,0x3fc
871         $PTR_ADD $i0,$Tbl
872         $PTR_ADD $i1,$Tbl
873         $PTR_ADD $i2,$Tbl
874         $PTR_ADD $i3,$Tbl
875         lw      $t8,0($i0)              # Td3[s1]
876         $PTR_INS $i0,$s0,2,8
877         lw      $t9,0($i1)              # Td3[s2]
878         $PTR_INS $i1,$s1,2,8
879         lw      $t10,0($i2)             # Td3[s3]
880         $PTR_INS $i2,$s2,2,8
881         lw      $t11,0($i3)             # Td3[s0]
882         $PTR_INS $i3,$s3,2,8
883 #else
884         lw      $t4,0($i0)              # Td2[s2>>8]
885         $PTR_INS $i0,$s1,2,8
886         lw      $t5,0($i1)              # Td2[s3>>8]
887         $PTR_INS $i1,$s2,2,8
888         lw      $t6,0($i2)              # Td2[s0>>8]
889         $PTR_INS $i2,$s3,2,8
890         lw      $t7,0($i3)              # Td2[s1>>8]
891         $PTR_INS $i3,$s0,2,8
892
893         lw      $t8,0($i0)              # Td3[s1]
894         _xtr    $i0,$s0,24-2
895         lw      $t9,0($i1)              # Td3[s2]
896         _xtr    $i1,$s1,24-2
897         lw      $t10,0($i2)             # Td3[s3]
898         _xtr    $i2,$s2,24-2
899         lw      $t11,0($i3)             # Td3[s0]
900         _xtr    $i3,$s3,24-2
901
902         and     $i0,0x3fc
903         and     $i1,0x3fc
904         and     $i2,0x3fc
905         and     $i3,0x3fc
906         $PTR_ADD $i0,$Tbl
907         $PTR_ADD $i1,$Tbl
908         $PTR_ADD $i2,$Tbl
909         $PTR_ADD $i3,$Tbl
910 #endif
911         rotr    $t4,$t4,16
912         rotr    $t5,$t5,16
913         rotr    $t6,$t6,16
914         rotr    $t7,$t7,16
915
916         rotr    $t8,$t8,24
917         rotr    $t9,$t9,24
918         rotr    $t10,$t10,24
919         rotr    $t11,$t11,24
920 #else
921         lwl     $t4,2($i0)              # Td2[s2>>8]
922         lwl     $t5,2($i1)              # Td2[s3>>8]
923         lwl     $t6,2($i2)              # Td2[s0>>8]
924         lwl     $t7,2($i3)              # Td2[s1>>8]
925         lwr     $t4,1($i0)              # Td2[s2>>8]
926         _xtr    $i0,$s1,0-2
927         lwr     $t5,1($i1)              # Td2[s3>>8]
928         _xtr    $i1,$s2,0-2
929         lwr     $t6,1($i2)              # Td2[s0>>8]
930         _xtr    $i2,$s3,0-2
931         lwr     $t7,1($i3)              # Td2[s1>>8]
932         _xtr    $i3,$s0,0-2
933
934         and     $i0,0x3fc
935         and     $i1,0x3fc
936         and     $i2,0x3fc
937         and     $i3,0x3fc
938         $PTR_ADD $i0,$Tbl
939         $PTR_ADD $i1,$Tbl
940         $PTR_ADD $i2,$Tbl
941         $PTR_ADD $i3,$Tbl
942         lwl     $t8,1($i0)              # Td3[s1]
943         lwl     $t9,1($i1)              # Td3[s2]
944         lwl     $t10,1($i2)             # Td3[s3]
945         lwl     $t11,1($i3)             # Td3[s0]
946         lwr     $t8,0($i0)              # Td3[s1]
947         _xtr    $i0,$s0,24-2
948         lwr     $t9,0($i1)              # Td3[s2]
949         _xtr    $i1,$s1,24-2
950         lwr     $t10,0($i2)             # Td3[s3]
951         _xtr    $i2,$s2,24-2
952         lwr     $t11,0($i3)             # Td3[s0]
953         _xtr    $i3,$s3,24-2
954
955         and     $i0,0x3fc
956         and     $i1,0x3fc
957         and     $i2,0x3fc
958         and     $i3,0x3fc
959         $PTR_ADD $i0,$Tbl
960         $PTR_ADD $i1,$Tbl
961         $PTR_ADD $i2,$Tbl
962         $PTR_ADD $i3,$Tbl
963 #endif
964
965         xor     $t0,$t4
966         lw      $t4,0($i0)              # Td0[s0>>24]
967         xor     $t1,$t5
968         lw      $t5,0($i1)              # Td0[s1>>24]
969         xor     $t2,$t6
970         lw      $t6,0($i2)              # Td0[s2>>24]
971         xor     $t3,$t7
972         lw      $t7,0($i3)              # Td0[s3>>24]
973
974         xor     $t0,$t8
975         lw      $s0,0($key0)
976         xor     $t1,$t9
977         lw      $s1,4($key0)
978         xor     $t2,$t10
979         lw      $s2,8($key0)
980         xor     $t3,$t11
981         lw      $s3,12($key0)
982
983         xor     $t0,$t4
984         xor     $t1,$t5
985         xor     $t2,$t6
986         xor     $t3,$t7
987
988         sub     $cnt,1
989         $PTR_ADD $key0,16
990         xor     $s0,$t0
991         xor     $s1,$t1
992         xor     $s2,$t2
993         xor     $s3,$t3
994         .set    noreorder
995         bnez    $cnt,.Loop_dec
996         _xtr    $i0,$s3,16-2
997 #endif
998
999         .set    reorder
1000         lw      $t4,1024($Tbl)          # prefetch Td4
1001         _xtr    $i0,$s3,16
1002         lw      $t5,1024+32($Tbl)
1003         _xtr    $i1,$s0,16
1004         lw      $t6,1024+64($Tbl)
1005         _xtr    $i2,$s1,16
1006         lw      $t7,1024+96($Tbl)
1007         _xtr    $i3,$s2,16
1008         lw      $t8,1024+128($Tbl)
1009         and     $i0,0xff
1010         lw      $t9,1024+160($Tbl)
1011         and     $i1,0xff
1012         lw      $t10,1024+192($Tbl)
1013         and     $i2,0xff
1014         lw      $t11,1024+224($Tbl)
1015         and     $i3,0xff
1016
1017         $PTR_ADD $i0,$Tbl
1018         $PTR_ADD $i1,$Tbl
1019         $PTR_ADD $i2,$Tbl
1020         $PTR_ADD $i3,$Tbl
1021         lbu     $t0,1024($i0)           # Td4[s3>>16]
1022         _xtr    $i0,$s2,8
1023         lbu     $t1,1024($i1)           # Td4[s0>>16]
1024         _xtr    $i1,$s3,8
1025         lbu     $t2,1024($i2)           # Td4[s1>>16]
1026         _xtr    $i2,$s0,8
1027         lbu     $t3,1024($i3)           # Td4[s2>>16]
1028         _xtr    $i3,$s1,8
1029
1030         and     $i0,0xff
1031         and     $i1,0xff
1032         and     $i2,0xff
1033         and     $i3,0xff
1034         $PTR_ADD $i0,$Tbl
1035         $PTR_ADD $i1,$Tbl
1036         $PTR_ADD $i2,$Tbl
1037         $PTR_ADD $i3,$Tbl
1038 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1039 # if defined(_MIPSEL)
1040         lbu     $t4,1024($i0)           # Td4[s2>>8]
1041         $PTR_INS $i0,$s0,0,8
1042         lbu     $t5,1024($i1)           # Td4[s3>>8]
1043         $PTR_INS $i1,$s1,0,8
1044         lbu     $t6,1024($i2)           # Td4[s0>>8]
1045         $PTR_INS $i2,$s2,0,8
1046         lbu     $t7,1024($i3)           # Td4[s1>>8]
1047         $PTR_INS $i3,$s3,0,8
1048
1049         lbu     $t8,1024($i0)           # Td4[s0>>24]
1050         _xtr    $i0,$s1,0
1051         lbu     $t9,1024($i1)           # Td4[s1>>24]
1052         _xtr    $i1,$s2,0
1053         lbu     $t10,1024($i2)          # Td4[s2>>24]
1054         _xtr    $i2,$s3,0
1055         lbu     $t11,1024($i3)          # Td4[s3>>24]
1056         _xtr    $i3,$s0,0
1057
1058         $PTR_ADD $i0,$Tbl
1059         $PTR_ADD $i1,$Tbl
1060         $PTR_ADD $i2,$Tbl
1061         $PTR_ADD $i3,$Tbl
1062 # else
1063         lbu     $t4,1024($i0)           # Td4[s2>>8]
1064         _xtr    $i0,$s0,24
1065         lbu     $t5,1024($i1)           # Td4[s3>>8]
1066         _xtr    $i1,$s1,24
1067         lbu     $t6,1024($i2)           # Td4[s0>>8]
1068         _xtr    $i2,$s2,24
1069         lbu     $t7,1024($i3)           # Td4[s1>>8]
1070         _xtr    $i3,$s3,24
1071
1072         $PTR_ADD $i0,$Tbl
1073         $PTR_ADD $i1,$Tbl
1074         $PTR_ADD $i2,$Tbl
1075         $PTR_ADD $i3,$Tbl
1076         lbu     $t8,1024($i0)           # Td4[s0>>24]
1077         $PTR_INS $i0,$s1,0,8
1078         lbu     $t9,1024($i1)           # Td4[s1>>24]
1079         $PTR_INS $i1,$s2,0,8
1080         lbu     $t10,1024($i2)          # Td4[s2>>24]
1081         $PTR_INS $i2,$s3,0,8
1082         lbu     $t11,1024($i3)          # Td4[s3>>24]
1083         $PTR_INS $i3,$s0,0,8
1084 # endif
1085         _ins    $t0,16
1086         _ins    $t1,16
1087         _ins    $t2,16
1088         _ins    $t3,16
1089
1090         _ins2   $t0,$t4,8
1091         lbu     $t4,1024($i0)           # Td4[s1]
1092         _ins2   $t1,$t5,8
1093         lbu     $t5,1024($i1)           # Td4[s2]
1094         _ins2   $t2,$t6,8
1095         lbu     $t6,1024($i2)           # Td4[s3]
1096         _ins2   $t3,$t7,8
1097         lbu     $t7,1024($i3)           # Td4[s0]
1098
1099         _ins2   $t0,$t8,24
1100         lw      $s0,0($key0)
1101         _ins2   $t1,$t9,24
1102         lw      $s1,4($key0)
1103         _ins2   $t2,$t10,24
1104         lw      $s2,8($key0)
1105         _ins2   $t3,$t11,24
1106         lw      $s3,12($key0)
1107
1108         _ins2   $t0,$t4,0
1109         _ins2   $t1,$t5,0
1110         _ins2   $t2,$t6,0
1111         _ins2   $t3,$t7,0
1112 #else
1113         lbu     $t4,1024($i0)           # Td4[s2>>8]
1114         _xtr    $i0,$s0,24
1115         lbu     $t5,1024($i1)           # Td4[s3>>8]
1116         _xtr    $i1,$s1,24
1117         lbu     $t6,1024($i2)           # Td4[s0>>8]
1118         _xtr    $i2,$s2,24
1119         lbu     $t7,1024($i3)           # Td4[s1>>8]
1120         _xtr    $i3,$s3,24
1121
1122         $PTR_ADD $i0,$Tbl
1123         $PTR_ADD $i1,$Tbl
1124         $PTR_ADD $i2,$Tbl
1125         $PTR_ADD $i3,$Tbl
1126         lbu     $t8,1024($i0)           # Td4[s0>>24]
1127         _xtr    $i0,$s1,0
1128         lbu     $t9,1024($i1)           # Td4[s1>>24]
1129         _xtr    $i1,$s2,0
1130         lbu     $t10,1024($i2)          # Td4[s2>>24]
1131         _xtr    $i2,$s3,0
1132         lbu     $t11,1024($i3)          # Td4[s3>>24]
1133         _xtr    $i3,$s0,0
1134
1135         $PTR_ADD $i0,$Tbl
1136         $PTR_ADD $i1,$Tbl
1137         $PTR_ADD $i2,$Tbl
1138         $PTR_ADD $i3,$Tbl
1139
1140         _ins    $t0,16
1141         _ins    $t1,16
1142         _ins    $t2,16
1143         _ins    $t3,16
1144
1145         _ins    $t4,8
1146         _ins    $t5,8
1147         _ins    $t6,8
1148         _ins    $t7,8
1149
1150         xor     $t0,$t4
1151         lbu     $t4,1024($i0)           # Td4[s1]
1152         xor     $t1,$t5
1153         lbu     $t5,1024($i1)           # Td4[s2]
1154         xor     $t2,$t6
1155         lbu     $t6,1024($i2)           # Td4[s3]
1156         xor     $t3,$t7
1157         lbu     $t7,1024($i3)           # Td4[s0]
1158
1159         _ins    $t8,24
1160         lw      $s0,0($key0)
1161         _ins    $t9,24
1162         lw      $s1,4($key0)
1163         _ins    $t10,24
1164         lw      $s2,8($key0)
1165         _ins    $t11,24
1166         lw      $s3,12($key0)
1167
1168         xor     $t0,$t8
1169         xor     $t1,$t9
1170         xor     $t2,$t10
1171         xor     $t3,$t11
1172
1173         _ins    $t4,0
1174         _ins    $t5,0
1175         _ins    $t6,0
1176         _ins    $t7,0
1177
1178         xor     $t0,$t4
1179         xor     $t1,$t5
1180         xor     $t2,$t6
1181         xor     $t3,$t7
1182 #endif
1183
1184         xor     $s0,$t0
1185         xor     $s1,$t1
1186         xor     $s2,$t2
1187         xor     $s3,$t3
1188
1189         jr      $ra
1190 .end    _mips_AES_decrypt
1191
1192 .align  5
1193 .globl  AES_decrypt
1194 .ent    AES_decrypt
1195 AES_decrypt:
1196         .frame  $sp,$FRAMESIZE,$ra
1197         .mask   $SAVED_REGS_MASK,-$SZREG
1198         .set    noreorder
1199 ___
1200 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1201         .cpload $pf
1202 ___
1203 $code.=<<___;
1204         $PTR_SUB $sp,$FRAMESIZE
1205         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1206         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1207         $REG_S  $s11,$FRAMESIZE-3*$SZREG($sp)
1208         $REG_S  $s10,$FRAMESIZE-4*$SZREG($sp)
1209         $REG_S  $s9,$FRAMESIZE-5*$SZREG($sp)
1210         $REG_S  $s8,$FRAMESIZE-6*$SZREG($sp)
1211         $REG_S  $s7,$FRAMESIZE-7*$SZREG($sp)
1212         $REG_S  $s6,$FRAMESIZE-8*$SZREG($sp)
1213         $REG_S  $s5,$FRAMESIZE-9*$SZREG($sp)
1214         $REG_S  $s4,$FRAMESIZE-10*$SZREG($sp)
1215 ___
1216 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1217         $REG_S  \$15,$FRAMESIZE-11*$SZREG($sp)
1218         $REG_S  \$14,$FRAMESIZE-12*$SZREG($sp)
1219         $REG_S  \$13,$FRAMESIZE-13*$SZREG($sp)
1220         $REG_S  \$12,$FRAMESIZE-14*$SZREG($sp)
1221         $REG_S  $gp,$FRAMESIZE-15*$SZREG($sp)
1222 ___
1223 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1224         .cplocal        $Tbl
1225         .cpsetup        $pf,$zero,AES_decrypt
1226 ___
1227 $code.=<<___;
1228         .set    reorder
1229         $PTR_LA $Tbl,AES_Td             # PIC-ified 'load address'
1230
1231         lwl     $s0,0+$MSB($inp)
1232         lwl     $s1,4+$MSB($inp)
1233         lwl     $s2,8+$MSB($inp)
1234         lwl     $s3,12+$MSB($inp)
1235         lwr     $s0,0+$LSB($inp)
1236         lwr     $s1,4+$LSB($inp)
1237         lwr     $s2,8+$LSB($inp)
1238         lwr     $s3,12+$LSB($inp)
1239
1240         bal     _mips_AES_decrypt
1241
1242         swr     $s0,0+$LSB($out)
1243         swr     $s1,4+$LSB($out)
1244         swr     $s2,8+$LSB($out)
1245         swr     $s3,12+$LSB($out)
1246         swl     $s0,0+$MSB($out)
1247         swl     $s1,4+$MSB($out)
1248         swl     $s2,8+$MSB($out)
1249         swl     $s3,12+$MSB($out)
1250
1251         .set    noreorder
1252         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1253         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1254         $REG_L  $s11,$FRAMESIZE-3*$SZREG($sp)
1255         $REG_L  $s10,$FRAMESIZE-4*$SZREG($sp)
1256         $REG_L  $s9,$FRAMESIZE-5*$SZREG($sp)
1257         $REG_L  $s8,$FRAMESIZE-6*$SZREG($sp)
1258         $REG_L  $s7,$FRAMESIZE-7*$SZREG($sp)
1259         $REG_L  $s6,$FRAMESIZE-8*$SZREG($sp)
1260         $REG_L  $s5,$FRAMESIZE-9*$SZREG($sp)
1261         $REG_L  $s4,$FRAMESIZE-10*$SZREG($sp)
1262 ___
1263 $code.=<<___ if ($flavour =~ /nubi/i);
1264         $REG_L  \$15,$FRAMESIZE-11*$SZREG($sp)
1265         $REG_L  \$14,$FRAMESIZE-12*$SZREG($sp)
1266         $REG_L  \$13,$FRAMESIZE-13*$SZREG($sp)
1267         $REG_L  \$12,$FRAMESIZE-14*$SZREG($sp)
1268         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1269 ___
1270 $code.=<<___;
1271         jr      $ra
1272         $PTR_ADD $sp,$FRAMESIZE
1273 .end    AES_decrypt
1274 ___
1275 }}}
1276 \f
1277 {{{
1278 my $FRAMESIZE=8*$SZREG;
1279 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1280
1281 my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1282 my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1283 my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1284 my ($rcon,$cnt)=($gp,$fp);
1285
1286 $code.=<<___;
1287 .align  5
1288 .ent    _mips_AES_set_encrypt_key
1289 _mips_AES_set_encrypt_key:
1290         .frame  $sp,0,$ra
1291         .set    noreorder
1292         beqz    $inp,.Lekey_done
1293         li      $t0,-1
1294         beqz    $key,.Lekey_done
1295         $PTR_ADD $rcon,$Tbl,256
1296
1297         .set    reorder
1298         lwl     $rk0,0+$MSB($inp)       # load 128 bits
1299         lwl     $rk1,4+$MSB($inp)
1300         lwl     $rk2,8+$MSB($inp)
1301         lwl     $rk3,12+$MSB($inp)
1302         li      $at,128
1303         lwr     $rk0,0+$LSB($inp)
1304         lwr     $rk1,4+$LSB($inp)
1305         lwr     $rk2,8+$LSB($inp)
1306         lwr     $rk3,12+$LSB($inp)
1307         .set    noreorder
1308         beq     $bits,$at,.L128bits
1309         li      $cnt,10
1310
1311         .set    reorder
1312         lwl     $rk4,16+$MSB($inp)      # load 192 bits
1313         lwl     $rk5,20+$MSB($inp)
1314         li      $at,192
1315         lwr     $rk4,16+$LSB($inp)
1316         lwr     $rk5,20+$LSB($inp)
1317         .set    noreorder
1318         beq     $bits,$at,.L192bits
1319         li      $cnt,8
1320
1321         .set    reorder
1322         lwl     $rk6,24+$MSB($inp)      # load 256 bits
1323         lwl     $rk7,28+$MSB($inp)
1324         li      $at,256
1325         lwr     $rk6,24+$LSB($inp)
1326         lwr     $rk7,28+$LSB($inp)
1327         .set    noreorder
1328         beq     $bits,$at,.L256bits
1329         li      $cnt,7
1330
1331         b       .Lekey_done
1332         li      $t0,-2
1333
1334 .align  4
1335 .L128bits:
1336         .set    reorder
1337         srl     $i0,$rk3,16
1338         srl     $i1,$rk3,8
1339         and     $i0,0xff
1340         and     $i1,0xff
1341         and     $i2,$rk3,0xff
1342         srl     $i3,$rk3,24
1343         $PTR_ADD $i0,$Tbl
1344         $PTR_ADD $i1,$Tbl
1345         $PTR_ADD $i2,$Tbl
1346         $PTR_ADD $i3,$Tbl
1347         lbu     $i0,0($i0)
1348         lbu     $i1,0($i1)
1349         lbu     $i2,0($i2)
1350         lbu     $i3,0($i3)
1351
1352         sw      $rk0,0($key)
1353         sw      $rk1,4($key)
1354         sw      $rk2,8($key)
1355         sw      $rk3,12($key)
1356         sub     $cnt,1
1357         $PTR_ADD $key,16
1358
1359         _bias   $i0,24
1360         _bias   $i1,16
1361         _bias   $i2,8
1362         _bias   $i3,0
1363
1364         xor     $rk0,$i0
1365         lw      $i0,0($rcon)
1366         xor     $rk0,$i1
1367         xor     $rk0,$i2
1368         xor     $rk0,$i3
1369         xor     $rk0,$i0
1370
1371         xor     $rk1,$rk0
1372         xor     $rk2,$rk1
1373         xor     $rk3,$rk2
1374
1375         .set    noreorder
1376         bnez    $cnt,.L128bits
1377         $PTR_ADD $rcon,4
1378
1379         sw      $rk0,0($key)
1380         sw      $rk1,4($key)
1381         sw      $rk2,8($key)
1382         li      $cnt,10
1383         sw      $rk3,12($key)
1384         li      $t0,0
1385         sw      $cnt,80($key)
1386         b       .Lekey_done
1387         $PTR_SUB $key,10*16
1388
1389 .align  4
1390 .L192bits:
1391         .set    reorder
1392         srl     $i0,$rk5,16
1393         srl     $i1,$rk5,8
1394         and     $i0,0xff
1395         and     $i1,0xff
1396         and     $i2,$rk5,0xff
1397         srl     $i3,$rk5,24
1398         $PTR_ADD $i0,$Tbl
1399         $PTR_ADD $i1,$Tbl
1400         $PTR_ADD $i2,$Tbl
1401         $PTR_ADD $i3,$Tbl
1402         lbu     $i0,0($i0)
1403         lbu     $i1,0($i1)
1404         lbu     $i2,0($i2)
1405         lbu     $i3,0($i3)
1406
1407         sw      $rk0,0($key)
1408         sw      $rk1,4($key)
1409         sw      $rk2,8($key)
1410         sw      $rk3,12($key)
1411         sw      $rk4,16($key)
1412         sw      $rk5,20($key)
1413         sub     $cnt,1
1414         $PTR_ADD $key,24
1415
1416         _bias   $i0,24
1417         _bias   $i1,16
1418         _bias   $i2,8
1419         _bias   $i3,0
1420
1421         xor     $rk0,$i0
1422         lw      $i0,0($rcon)
1423         xor     $rk0,$i1
1424         xor     $rk0,$i2
1425         xor     $rk0,$i3
1426         xor     $rk0,$i0
1427
1428         xor     $rk1,$rk0
1429         xor     $rk2,$rk1
1430         xor     $rk3,$rk2
1431         xor     $rk4,$rk3
1432         xor     $rk5,$rk4
1433
1434         .set    noreorder
1435         bnez    $cnt,.L192bits
1436         $PTR_ADD $rcon,4
1437
1438         sw      $rk0,0($key)
1439         sw      $rk1,4($key)
1440         sw      $rk2,8($key)
1441         li      $cnt,12
1442         sw      $rk3,12($key)
1443         li      $t0,0
1444         sw      $cnt,48($key)
1445         b       .Lekey_done
1446         $PTR_SUB $key,12*16
1447
1448 .align  4
1449 .L256bits:
1450         .set    reorder
1451         srl     $i0,$rk7,16
1452         srl     $i1,$rk7,8
1453         and     $i0,0xff
1454         and     $i1,0xff
1455         and     $i2,$rk7,0xff
1456         srl     $i3,$rk7,24
1457         $PTR_ADD $i0,$Tbl
1458         $PTR_ADD $i1,$Tbl
1459         $PTR_ADD $i2,$Tbl
1460         $PTR_ADD $i3,$Tbl
1461         lbu     $i0,0($i0)
1462         lbu     $i1,0($i1)
1463         lbu     $i2,0($i2)
1464         lbu     $i3,0($i3)
1465
1466         sw      $rk0,0($key)
1467         sw      $rk1,4($key)
1468         sw      $rk2,8($key)
1469         sw      $rk3,12($key)
1470         sw      $rk4,16($key)
1471         sw      $rk5,20($key)
1472         sw      $rk6,24($key)
1473         sw      $rk7,28($key)
1474         sub     $cnt,1
1475
1476         _bias   $i0,24
1477         _bias   $i1,16
1478         _bias   $i2,8
1479         _bias   $i3,0
1480
1481         xor     $rk0,$i0
1482         lw      $i0,0($rcon)
1483         xor     $rk0,$i1
1484         xor     $rk0,$i2
1485         xor     $rk0,$i3
1486         xor     $rk0,$i0
1487
1488         xor     $rk1,$rk0
1489         xor     $rk2,$rk1
1490         xor     $rk3,$rk2
1491         beqz    $cnt,.L256bits_done
1492
1493         srl     $i0,$rk3,24
1494         srl     $i1,$rk3,16
1495         srl     $i2,$rk3,8
1496         and     $i3,$rk3,0xff
1497         and     $i1,0xff
1498         and     $i2,0xff
1499         $PTR_ADD $i0,$Tbl
1500         $PTR_ADD $i1,$Tbl
1501         $PTR_ADD $i2,$Tbl
1502         $PTR_ADD $i3,$Tbl
1503         lbu     $i0,0($i0)
1504         lbu     $i1,0($i1)
1505         lbu     $i2,0($i2)
1506         lbu     $i3,0($i3)
1507         sll     $i0,24
1508         sll     $i1,16
1509         sll     $i2,8
1510
1511         xor     $rk4,$i0
1512         xor     $rk4,$i1
1513         xor     $rk4,$i2
1514         xor     $rk4,$i3
1515
1516         xor     $rk5,$rk4
1517         xor     $rk6,$rk5
1518         xor     $rk7,$rk6
1519
1520         $PTR_ADD $key,32
1521         .set    noreorder
1522         b       .L256bits
1523         $PTR_ADD $rcon,4
1524
1525 .L256bits_done:
1526         sw      $rk0,32($key)
1527         sw      $rk1,36($key)
1528         sw      $rk2,40($key)
1529         li      $cnt,14
1530         sw      $rk3,44($key)
1531         li      $t0,0
1532         sw      $cnt,48($key)
1533         $PTR_SUB $key,12*16
1534
1535 .Lekey_done:
1536         jr      $ra
1537         nop
1538 .end    _mips_AES_set_encrypt_key
1539
1540 .globl  AES_set_encrypt_key
1541 .ent    AES_set_encrypt_key
1542 AES_set_encrypt_key:
1543         .frame  $sp,$FRAMESIZE,$ra
1544         .mask   $SAVED_REGS_MASK,-$SZREG
1545         .set    noreorder
1546 ___
1547 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1548         .cpload $pf
1549 ___
1550 $code.=<<___;
1551         $PTR_SUB $sp,$FRAMESIZE
1552         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1553         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1554 ___
1555 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1556         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1557         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1558         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1559         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1560         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1561 ___
1562 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1563         .cplocal        $Tbl
1564         .cpsetup        $pf,$zero,AES_set_encrypt_key
1565 ___
1566 $code.=<<___;
1567         .set    reorder
1568         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1569
1570         bal     _mips_AES_set_encrypt_key
1571
1572         .set    noreorder
1573         move    $a0,$t0
1574         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1575         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1576 ___
1577 $code.=<<___ if ($flavour =~ /nubi/i);
1578         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1579         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1580         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1581         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1582         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1583 ___
1584 $code.=<<___;
1585         jr      $ra
1586         $PTR_ADD $sp,$FRAMESIZE
1587 .end    AES_set_encrypt_key
1588 ___
1589 \f
1590 my ($head,$tail)=($inp,$bits);
1591 my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1592 my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1593 $code.=<<___;
1594 .align  5
1595 .globl  AES_set_decrypt_key
1596 .ent    AES_set_decrypt_key
1597 AES_set_decrypt_key:
1598         .frame  $sp,$FRAMESIZE,$ra
1599         .mask   $SAVED_REGS_MASK,-$SZREG
1600         .set    noreorder
1601 ___
1602 $code.=<<___ if ($flavour =~ /o32/i);   # o32 PIC-ification
1603         .cpload $pf
1604 ___
1605 $code.=<<___;
1606         $PTR_SUB $sp,$FRAMESIZE
1607         $REG_S  $ra,$FRAMESIZE-1*$SZREG($sp)
1608         $REG_S  $fp,$FRAMESIZE-2*$SZREG($sp)
1609 ___
1610 $code.=<<___ if ($flavour =~ /nubi/i);  # optimize non-nubi prologue
1611         $REG_S  $s3,$FRAMESIZE-3*$SZREG($sp)
1612         $REG_S  $s2,$FRAMESIZE-4*$SZREG($sp)
1613         $REG_S  $s1,$FRAMESIZE-5*$SZREG($sp)
1614         $REG_S  $s0,$FRAMESIZE-6*$SZREG($sp)
1615         $REG_S  $gp,$FRAMESIZE-7*$SZREG($sp)
1616 ___
1617 $code.=<<___ if ($flavour !~ /o32/i);   # non-o32 PIC-ification
1618         .cplocal        $Tbl
1619         .cpsetup        $pf,$zero,AES_set_decrypt_key
1620 ___
1621 $code.=<<___;
1622         .set    reorder
1623         $PTR_LA $Tbl,AES_Te4            # PIC-ified 'load address'
1624
1625         bal     _mips_AES_set_encrypt_key
1626
1627         bltz    $t0,.Ldkey_done
1628
1629         sll     $at,$cnt,4
1630         $PTR_ADD $head,$key,0
1631         $PTR_ADD $tail,$key,$at
1632 .align  4
1633 .Lswap:
1634         lw      $rk0,0($head)
1635         lw      $rk1,4($head)
1636         lw      $rk2,8($head)
1637         lw      $rk3,12($head)
1638         lw      $rk4,0($tail)
1639         lw      $rk5,4($tail)
1640         lw      $rk6,8($tail)
1641         lw      $rk7,12($tail)
1642         sw      $rk0,0($tail)
1643         sw      $rk1,4($tail)
1644         sw      $rk2,8($tail)
1645         sw      $rk3,12($tail)
1646         $PTR_ADD $head,16
1647         $PTR_SUB $tail,16
1648         sw      $rk4,-16($head)
1649         sw      $rk5,-12($head)
1650         sw      $rk6,-8($head)
1651         sw      $rk7,-4($head)
1652         bne     $head,$tail,.Lswap
1653
1654         lw      $tp1,16($key)           # modulo-scheduled
1655         lui     $x80808080,0x8080
1656         sub     $cnt,1
1657         or      $x80808080,0x8080
1658         sll     $cnt,2
1659         $PTR_ADD $key,16
1660         lui     $x1b1b1b1b,0x1b1b
1661         nor     $x7f7f7f7f,$zero,$x80808080
1662         or      $x1b1b1b1b,0x1b1b
1663 .align  4
1664 .Lmix:
1665         and     $m,$tp1,$x80808080
1666         and     $tp2,$tp1,$x7f7f7f7f
1667         srl     $tp4,$m,7
1668         addu    $tp2,$tp2               # tp2<<1
1669         subu    $m,$tp4
1670         and     $m,$x1b1b1b1b
1671         xor     $tp2,$m
1672
1673         and     $m,$tp2,$x80808080
1674         and     $tp4,$tp2,$x7f7f7f7f
1675         srl     $tp8,$m,7
1676         addu    $tp4,$tp4               # tp4<<1
1677         subu    $m,$tp8
1678         and     $m,$x1b1b1b1b
1679         xor     $tp4,$m
1680
1681         and     $m,$tp4,$x80808080
1682         and     $tp8,$tp4,$x7f7f7f7f
1683         srl     $tp9,$m,7
1684         addu    $tp8,$tp8               # tp8<<1
1685         subu    $m,$tp9
1686         and     $m,$x1b1b1b1b
1687         xor     $tp8,$m
1688
1689         xor     $tp9,$tp8,$tp1
1690         xor     $tpe,$tp8,$tp4
1691         xor     $tpb,$tp9,$tp2
1692         xor     $tpd,$tp9,$tp4
1693
1694 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1695         rotr    $tp1,$tpd,16
1696          xor    $tpe,$tp2
1697         rotr    $tp2,$tp9,8
1698         xor     $tpe,$tp1
1699         rotr    $tp4,$tpb,24
1700         xor     $tpe,$tp2
1701         lw      $tp1,4($key)            # modulo-scheduled
1702         xor     $tpe,$tp4
1703 #else
1704         _ror    $tp1,$tpd,16
1705          xor    $tpe,$tp2
1706         _ror    $tp2,$tpd,-16
1707         xor     $tpe,$tp1
1708         _ror    $tp1,$tp9,8
1709         xor     $tpe,$tp2
1710         _ror    $tp2,$tp9,-24
1711         xor     $tpe,$tp1
1712         _ror    $tp1,$tpb,24
1713         xor     $tpe,$tp2
1714         _ror    $tp2,$tpb,-8
1715         xor     $tpe,$tp1
1716         lw      $tp1,4($key)            # modulo-scheduled
1717         xor     $tpe,$tp2
1718 #endif
1719         sub     $cnt,1
1720         sw      $tpe,0($key)
1721         $PTR_ADD $key,4
1722         bnez    $cnt,.Lmix
1723
1724         li      $t0,0
1725 .Ldkey_done:
1726         .set    noreorder
1727         move    $a0,$t0
1728         $REG_L  $ra,$FRAMESIZE-1*$SZREG($sp)
1729         $REG_L  $fp,$FRAMESIZE-2*$SZREG($sp)
1730 ___
1731 $code.=<<___ if ($flavour =~ /nubi/i);
1732         $REG_L  $s3,$FRAMESIZE-11*$SZREG($sp)
1733         $REG_L  $s2,$FRAMESIZE-12*$SZREG($sp)
1734         $REG_L  $s1,$FRAMESIZE-13*$SZREG($sp)
1735         $REG_L  $s0,$FRAMESIZE-14*$SZREG($sp)
1736         $REG_L  $gp,$FRAMESIZE-15*$SZREG($sp)
1737 ___
1738 $code.=<<___;
1739         jr      $ra
1740         $PTR_ADD $sp,$FRAMESIZE
1741 .end    AES_set_decrypt_key
1742 ___
1743 }}}
1744
1745 ######################################################################
1746 # Tables are kept in endian-neutral manner
1747 $code.=<<___;
1748 .rdata
1749 .align  10
1750 AES_Te:
1751 .byte   0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
1752 .byte   0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
1753 .byte   0xff,0xf2,0xf2,0x0d,    0xd6,0x6b,0x6b,0xbd
1754 .byte   0xde,0x6f,0x6f,0xb1,    0x91,0xc5,0xc5,0x54
1755 .byte   0x60,0x30,0x30,0x50,    0x02,0x01,0x01,0x03
1756 .byte   0xce,0x67,0x67,0xa9,    0x56,0x2b,0x2b,0x7d
1757 .byte   0xe7,0xfe,0xfe,0x19,    0xb5,0xd7,0xd7,0x62
1758 .byte   0x4d,0xab,0xab,0xe6,    0xec,0x76,0x76,0x9a
1759 .byte   0x8f,0xca,0xca,0x45,    0x1f,0x82,0x82,0x9d
1760 .byte   0x89,0xc9,0xc9,0x40,    0xfa,0x7d,0x7d,0x87
1761 .byte   0xef,0xfa,0xfa,0x15,    0xb2,0x59,0x59,0xeb
1762 .byte   0x8e,0x47,0x47,0xc9,    0xfb,0xf0,0xf0,0x0b
1763 .byte   0x41,0xad,0xad,0xec,    0xb3,0xd4,0xd4,0x67
1764 .byte   0x5f,0xa2,0xa2,0xfd,    0x45,0xaf,0xaf,0xea
1765 .byte   0x23,0x9c,0x9c,0xbf,    0x53,0xa4,0xa4,0xf7
1766 .byte   0xe4,0x72,0x72,0x96,    0x9b,0xc0,0xc0,0x5b
1767 .byte   0x75,0xb7,0xb7,0xc2,    0xe1,0xfd,0xfd,0x1c
1768 .byte   0x3d,0x93,0x93,0xae,    0x4c,0x26,0x26,0x6a
1769 .byte   0x6c,0x36,0x36,0x5a,    0x7e,0x3f,0x3f,0x41
1770 .byte   0xf5,0xf7,0xf7,0x02,    0x83,0xcc,0xcc,0x4f
1771 .byte   0x68,0x34,0x34,0x5c,    0x51,0xa5,0xa5,0xf4
1772 .byte   0xd1,0xe5,0xe5,0x34,    0xf9,0xf1,0xf1,0x08
1773 .byte   0xe2,0x71,0x71,0x93,    0xab,0xd8,0xd8,0x73
1774 .byte   0x62,0x31,0x31,0x53,    0x2a,0x15,0x15,0x3f
1775 .byte   0x08,0x04,0x04,0x0c,    0x95,0xc7,0xc7,0x52
1776 .byte   0x46,0x23,0x23,0x65,    0x9d,0xc3,0xc3,0x5e
1777 .byte   0x30,0x18,0x18,0x28,    0x37,0x96,0x96,0xa1
1778 .byte   0x0a,0x05,0x05,0x0f,    0x2f,0x9a,0x9a,0xb5
1779 .byte   0x0e,0x07,0x07,0x09,    0x24,0x12,0x12,0x36
1780 .byte   0x1b,0x80,0x80,0x9b,    0xdf,0xe2,0xe2,0x3d
1781 .byte   0xcd,0xeb,0xeb,0x26,    0x4e,0x27,0x27,0x69
1782 .byte   0x7f,0xb2,0xb2,0xcd,    0xea,0x75,0x75,0x9f
1783 .byte   0x12,0x09,0x09,0x1b,    0x1d,0x83,0x83,0x9e
1784 .byte   0x58,0x2c,0x2c,0x74,    0x34,0x1a,0x1a,0x2e
1785 .byte   0x36,0x1b,0x1b,0x2d,    0xdc,0x6e,0x6e,0xb2
1786 .byte   0xb4,0x5a,0x5a,0xee,    0x5b,0xa0,0xa0,0xfb
1787 .byte   0xa4,0x52,0x52,0xf6,    0x76,0x3b,0x3b,0x4d
1788 .byte   0xb7,0xd6,0xd6,0x61,    0x7d,0xb3,0xb3,0xce
1789 .byte   0x52,0x29,0x29,0x7b,    0xdd,0xe3,0xe3,0x3e
1790 .byte   0x5e,0x2f,0x2f,0x71,    0x13,0x84,0x84,0x97
1791 .byte   0xa6,0x53,0x53,0xf5,    0xb9,0xd1,0xd1,0x68
1792 .byte   0x00,0x00,0x00,0x00,    0xc1,0xed,0xed,0x2c
1793 .byte   0x40,0x20,0x20,0x60,    0xe3,0xfc,0xfc,0x1f
1794 .byte   0x79,0xb1,0xb1,0xc8,    0xb6,0x5b,0x5b,0xed
1795 .byte   0xd4,0x6a,0x6a,0xbe,    0x8d,0xcb,0xcb,0x46
1796 .byte   0x67,0xbe,0xbe,0xd9,    0x72,0x39,0x39,0x4b
1797 .byte   0x94,0x4a,0x4a,0xde,    0x98,0x4c,0x4c,0xd4
1798 .byte   0xb0,0x58,0x58,0xe8,    0x85,0xcf,0xcf,0x4a
1799 .byte   0xbb,0xd0,0xd0,0x6b,    0xc5,0xef,0xef,0x2a
1800 .byte   0x4f,0xaa,0xaa,0xe5,    0xed,0xfb,0xfb,0x16
1801 .byte   0x86,0x43,0x43,0xc5,    0x9a,0x4d,0x4d,0xd7
1802 .byte   0x66,0x33,0x33,0x55,    0x11,0x85,0x85,0x94
1803 .byte   0x8a,0x45,0x45,0xcf,    0xe9,0xf9,0xf9,0x10
1804 .byte   0x04,0x02,0x02,0x06,    0xfe,0x7f,0x7f,0x81
1805 .byte   0xa0,0x50,0x50,0xf0,    0x78,0x3c,0x3c,0x44
1806 .byte   0x25,0x9f,0x9f,0xba,    0x4b,0xa8,0xa8,0xe3
1807 .byte   0xa2,0x51,0x51,0xf3,    0x5d,0xa3,0xa3,0xfe
1808 .byte   0x80,0x40,0x40,0xc0,    0x05,0x8f,0x8f,0x8a
1809 .byte   0x3f,0x92,0x92,0xad,    0x21,0x9d,0x9d,0xbc
1810 .byte   0x70,0x38,0x38,0x48,    0xf1,0xf5,0xf5,0x04
1811 .byte   0x63,0xbc,0xbc,0xdf,    0x77,0xb6,0xb6,0xc1
1812 .byte   0xaf,0xda,0xda,0x75,    0x42,0x21,0x21,0x63
1813 .byte   0x20,0x10,0x10,0x30,    0xe5,0xff,0xff,0x1a
1814 .byte   0xfd,0xf3,0xf3,0x0e,    0xbf,0xd2,0xd2,0x6d
1815 .byte   0x81,0xcd,0xcd,0x4c,    0x18,0x0c,0x0c,0x14
1816 .byte   0x26,0x13,0x13,0x35,    0xc3,0xec,0xec,0x2f
1817 .byte   0xbe,0x5f,0x5f,0xe1,    0x35,0x97,0x97,0xa2
1818 .byte   0x88,0x44,0x44,0xcc,    0x2e,0x17,0x17,0x39
1819 .byte   0x93,0xc4,0xc4,0x57,    0x55,0xa7,0xa7,0xf2
1820 .byte   0xfc,0x7e,0x7e,0x82,    0x7a,0x3d,0x3d,0x47
1821 .byte   0xc8,0x64,0x64,0xac,    0xba,0x5d,0x5d,0xe7
1822 .byte   0x32,0x19,0x19,0x2b,    0xe6,0x73,0x73,0x95
1823 .byte   0xc0,0x60,0x60,0xa0,    0x19,0x81,0x81,0x98
1824 .byte   0x9e,0x4f,0x4f,0xd1,    0xa3,0xdc,0xdc,0x7f
1825 .byte   0x44,0x22,0x22,0x66,    0x54,0x2a,0x2a,0x7e
1826 .byte   0x3b,0x90,0x90,0xab,    0x0b,0x88,0x88,0x83
1827 .byte   0x8c,0x46,0x46,0xca,    0xc7,0xee,0xee,0x29
1828 .byte   0x6b,0xb8,0xb8,0xd3,    0x28,0x14,0x14,0x3c
1829 .byte   0xa7,0xde,0xde,0x79,    0xbc,0x5e,0x5e,0xe2
1830 .byte   0x16,0x0b,0x0b,0x1d,    0xad,0xdb,0xdb,0x76
1831 .byte   0xdb,0xe0,0xe0,0x3b,    0x64,0x32,0x32,0x56
1832 .byte   0x74,0x3a,0x3a,0x4e,    0x14,0x0a,0x0a,0x1e
1833 .byte   0x92,0x49,0x49,0xdb,    0x0c,0x06,0x06,0x0a
1834 .byte   0x48,0x24,0x24,0x6c,    0xb8,0x5c,0x5c,0xe4
1835 .byte   0x9f,0xc2,0xc2,0x5d,    0xbd,0xd3,0xd3,0x6e
1836 .byte   0x43,0xac,0xac,0xef,    0xc4,0x62,0x62,0xa6
1837 .byte   0x39,0x91,0x91,0xa8,    0x31,0x95,0x95,0xa4
1838 .byte   0xd3,0xe4,0xe4,0x37,    0xf2,0x79,0x79,0x8b
1839 .byte   0xd5,0xe7,0xe7,0x32,    0x8b,0xc8,0xc8,0x43
1840 .byte   0x6e,0x37,0x37,0x59,    0xda,0x6d,0x6d,0xb7
1841 .byte   0x01,0x8d,0x8d,0x8c,    0xb1,0xd5,0xd5,0x64
1842 .byte   0x9c,0x4e,0x4e,0xd2,    0x49,0xa9,0xa9,0xe0
1843 .byte   0xd8,0x6c,0x6c,0xb4,    0xac,0x56,0x56,0xfa
1844 .byte   0xf3,0xf4,0xf4,0x07,    0xcf,0xea,0xea,0x25
1845 .byte   0xca,0x65,0x65,0xaf,    0xf4,0x7a,0x7a,0x8e
1846 .byte   0x47,0xae,0xae,0xe9,    0x10,0x08,0x08,0x18
1847 .byte   0x6f,0xba,0xba,0xd5,    0xf0,0x78,0x78,0x88
1848 .byte   0x4a,0x25,0x25,0x6f,    0x5c,0x2e,0x2e,0x72
1849 .byte   0x38,0x1c,0x1c,0x24,    0x57,0xa6,0xa6,0xf1
1850 .byte   0x73,0xb4,0xb4,0xc7,    0x97,0xc6,0xc6,0x51
1851 .byte   0xcb,0xe8,0xe8,0x23,    0xa1,0xdd,0xdd,0x7c
1852 .byte   0xe8,0x74,0x74,0x9c,    0x3e,0x1f,0x1f,0x21
1853 .byte   0x96,0x4b,0x4b,0xdd,    0x61,0xbd,0xbd,0xdc
1854 .byte   0x0d,0x8b,0x8b,0x86,    0x0f,0x8a,0x8a,0x85
1855 .byte   0xe0,0x70,0x70,0x90,    0x7c,0x3e,0x3e,0x42
1856 .byte   0x71,0xb5,0xb5,0xc4,    0xcc,0x66,0x66,0xaa
1857 .byte   0x90,0x48,0x48,0xd8,    0x06,0x03,0x03,0x05
1858 .byte   0xf7,0xf6,0xf6,0x01,    0x1c,0x0e,0x0e,0x12
1859 .byte   0xc2,0x61,0x61,0xa3,    0x6a,0x35,0x35,0x5f
1860 .byte   0xae,0x57,0x57,0xf9,    0x69,0xb9,0xb9,0xd0
1861 .byte   0x17,0x86,0x86,0x91,    0x99,0xc1,0xc1,0x58
1862 .byte   0x3a,0x1d,0x1d,0x27,    0x27,0x9e,0x9e,0xb9
1863 .byte   0xd9,0xe1,0xe1,0x38,    0xeb,0xf8,0xf8,0x13
1864 .byte   0x2b,0x98,0x98,0xb3,    0x22,0x11,0x11,0x33
1865 .byte   0xd2,0x69,0x69,0xbb,    0xa9,0xd9,0xd9,0x70
1866 .byte   0x07,0x8e,0x8e,0x89,    0x33,0x94,0x94,0xa7
1867 .byte   0x2d,0x9b,0x9b,0xb6,    0x3c,0x1e,0x1e,0x22
1868 .byte   0x15,0x87,0x87,0x92,    0xc9,0xe9,0xe9,0x20
1869 .byte   0x87,0xce,0xce,0x49,    0xaa,0x55,0x55,0xff
1870 .byte   0x50,0x28,0x28,0x78,    0xa5,0xdf,0xdf,0x7a
1871 .byte   0x03,0x8c,0x8c,0x8f,    0x59,0xa1,0xa1,0xf8
1872 .byte   0x09,0x89,0x89,0x80,    0x1a,0x0d,0x0d,0x17
1873 .byte   0x65,0xbf,0xbf,0xda,    0xd7,0xe6,0xe6,0x31
1874 .byte   0x84,0x42,0x42,0xc6,    0xd0,0x68,0x68,0xb8
1875 .byte   0x82,0x41,0x41,0xc3,    0x29,0x99,0x99,0xb0
1876 .byte   0x5a,0x2d,0x2d,0x77,    0x1e,0x0f,0x0f,0x11
1877 .byte   0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
1878 .byte   0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
1879
1880 AES_Td:
1881 .byte   0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
1882 .byte   0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
1883 .byte   0x3b,0xab,0x6b,0xcb,    0x1f,0x9d,0x45,0xf1
1884 .byte   0xac,0xfa,0x58,0xab,    0x4b,0xe3,0x03,0x93
1885 .byte   0x20,0x30,0xfa,0x55,    0xad,0x76,0x6d,0xf6
1886 .byte   0x88,0xcc,0x76,0x91,    0xf5,0x02,0x4c,0x25
1887 .byte   0x4f,0xe5,0xd7,0xfc,    0xc5,0x2a,0xcb,0xd7
1888 .byte   0x26,0x35,0x44,0x80,    0xb5,0x62,0xa3,0x8f
1889 .byte   0xde,0xb1,0x5a,0x49,    0x25,0xba,0x1b,0x67
1890 .byte   0x45,0xea,0x0e,0x98,    0x5d,0xfe,0xc0,0xe1
1891 .byte   0xc3,0x2f,0x75,0x02,    0x81,0x4c,0xf0,0x12
1892 .byte   0x8d,0x46,0x97,0xa3,    0x6b,0xd3,0xf9,0xc6
1893 .byte   0x03,0x8f,0x5f,0xe7,    0x15,0x92,0x9c,0x95
1894 .byte   0xbf,0x6d,0x7a,0xeb,    0x95,0x52,0x59,0xda
1895 .byte   0xd4,0xbe,0x83,0x2d,    0x58,0x74,0x21,0xd3
1896 .byte   0x49,0xe0,0x69,0x29,    0x8e,0xc9,0xc8,0x44
1897 .byte   0x75,0xc2,0x89,0x6a,    0xf4,0x8e,0x79,0x78
1898 .byte   0x99,0x58,0x3e,0x6b,    0x27,0xb9,0x71,0xdd
1899 .byte   0xbe,0xe1,0x4f,0xb6,    0xf0,0x88,0xad,0x17
1900 .byte   0xc9,0x20,0xac,0x66,    0x7d,0xce,0x3a,0xb4
1901 .byte   0x63,0xdf,0x4a,0x18,    0xe5,0x1a,0x31,0x82
1902 .byte   0x97,0x51,0x33,0x60,    0x62,0x53,0x7f,0x45
1903 .byte   0xb1,0x64,0x77,0xe0,    0xbb,0x6b,0xae,0x84
1904 .byte   0xfe,0x81,0xa0,0x1c,    0xf9,0x08,0x2b,0x94
1905 .byte   0x70,0x48,0x68,0x58,    0x8f,0x45,0xfd,0x19
1906 .byte   0x94,0xde,0x6c,0x87,    0x52,0x7b,0xf8,0xb7
1907 .byte   0xab,0x73,0xd3,0x23,    0x72,0x4b,0x02,0xe2
1908 .byte   0xe3,0x1f,0x8f,0x57,    0x66,0x55,0xab,0x2a
1909 .byte   0xb2,0xeb,0x28,0x07,    0x2f,0xb5,0xc2,0x03
1910 .byte   0x86,0xc5,0x7b,0x9a,    0xd3,0x37,0x08,0xa5
1911 .byte   0x30,0x28,0x87,0xf2,    0x23,0xbf,0xa5,0xb2
1912 .byte   0x02,0x03,0x6a,0xba,    0xed,0x16,0x82,0x5c
1913 .byte   0x8a,0xcf,0x1c,0x2b,    0xa7,0x79,0xb4,0x92
1914 .byte   0xf3,0x07,0xf2,0xf0,    0x4e,0x69,0xe2,0xa1
1915 .byte   0x65,0xda,0xf4,0xcd,    0x06,0x05,0xbe,0xd5
1916 .byte   0xd1,0x34,0x62,0x1f,    0xc4,0xa6,0xfe,0x8a
1917 .byte   0x34,0x2e,0x53,0x9d,    0xa2,0xf3,0x55,0xa0
1918 .byte   0x05,0x8a,0xe1,0x32,    0xa4,0xf6,0xeb,0x75
1919 .byte   0x0b,0x83,0xec,0x39,    0x40,0x60,0xef,0xaa
1920 .byte   0x5e,0x71,0x9f,0x06,    0xbd,0x6e,0x10,0x51
1921 .byte   0x3e,0x21,0x8a,0xf9,    0x96,0xdd,0x06,0x3d
1922 .byte   0xdd,0x3e,0x05,0xae,    0x4d,0xe6,0xbd,0x46
1923 .byte   0x91,0x54,0x8d,0xb5,    0x71,0xc4,0x5d,0x05
1924 .byte   0x04,0x06,0xd4,0x6f,    0x60,0x50,0x15,0xff
1925 .byte   0x19,0x98,0xfb,0x24,    0xd6,0xbd,0xe9,0x97
1926 .byte   0x89,0x40,0x43,0xcc,    0x67,0xd9,0x9e,0x77
1927 .byte   0xb0,0xe8,0x42,0xbd,    0x07,0x89,0x8b,0x88
1928 .byte   0xe7,0x19,0x5b,0x38,    0x79,0xc8,0xee,0xdb
1929 .byte   0xa1,0x7c,0x0a,0x47,    0x7c,0x42,0x0f,0xe9
1930 .byte   0xf8,0x84,0x1e,0xc9,    0x00,0x00,0x00,0x00
1931 .byte   0x09,0x80,0x86,0x83,    0x32,0x2b,0xed,0x48
1932 .byte   0x1e,0x11,0x70,0xac,    0x6c,0x5a,0x72,0x4e
1933 .byte   0xfd,0x0e,0xff,0xfb,    0x0f,0x85,0x38,0x56
1934 .byte   0x3d,0xae,0xd5,0x1e,    0x36,0x2d,0x39,0x27
1935 .byte   0x0a,0x0f,0xd9,0x64,    0x68,0x5c,0xa6,0x21
1936 .byte   0x9b,0x5b,0x54,0xd1,    0x24,0x36,0x2e,0x3a
1937 .byte   0x0c,0x0a,0x67,0xb1,    0x93,0x57,0xe7,0x0f
1938 .byte   0xb4,0xee,0x96,0xd2,    0x1b,0x9b,0x91,0x9e
1939 .byte   0x80,0xc0,0xc5,0x4f,    0x61,0xdc,0x20,0xa2
1940 .byte   0x5a,0x77,0x4b,0x69,    0x1c,0x12,0x1a,0x16
1941 .byte   0xe2,0x93,0xba,0x0a,    0xc0,0xa0,0x2a,0xe5
1942 .byte   0x3c,0x22,0xe0,0x43,    0x12,0x1b,0x17,0x1d
1943 .byte   0x0e,0x09,0x0d,0x0b,    0xf2,0x8b,0xc7,0xad
1944 .byte   0x2d,0xb6,0xa8,0xb9,    0x14,0x1e,0xa9,0xc8
1945 .byte   0x57,0xf1,0x19,0x85,    0xaf,0x75,0x07,0x4c
1946 .byte   0xee,0x99,0xdd,0xbb,    0xa3,0x7f,0x60,0xfd
1947 .byte   0xf7,0x01,0x26,0x9f,    0x5c,0x72,0xf5,0xbc
1948 .byte   0x44,0x66,0x3b,0xc5,    0x5b,0xfb,0x7e,0x34
1949 .byte   0x8b,0x43,0x29,0x76,    0xcb,0x23,0xc6,0xdc
1950 .byte   0xb6,0xed,0xfc,0x68,    0xb8,0xe4,0xf1,0x63
1951 .byte   0xd7,0x31,0xdc,0xca,    0x42,0x63,0x85,0x10
1952 .byte   0x13,0x97,0x22,0x40,    0x84,0xc6,0x11,0x20
1953 .byte   0x85,0x4a,0x24,0x7d,    0xd2,0xbb,0x3d,0xf8
1954 .byte   0xae,0xf9,0x32,0x11,    0xc7,0x29,0xa1,0x6d
1955 .byte   0x1d,0x9e,0x2f,0x4b,    0xdc,0xb2,0x30,0xf3
1956 .byte   0x0d,0x86,0x52,0xec,    0x77,0xc1,0xe3,0xd0
1957 .byte   0x2b,0xb3,0x16,0x6c,    0xa9,0x70,0xb9,0x99
1958 .byte   0x11,0x94,0x48,0xfa,    0x47,0xe9,0x64,0x22
1959 .byte   0xa8,0xfc,0x8c,0xc4,    0xa0,0xf0,0x3f,0x1a
1960 .byte   0x56,0x7d,0x2c,0xd8,    0x22,0x33,0x90,0xef
1961 .byte   0x87,0x49,0x4e,0xc7,    0xd9,0x38,0xd1,0xc1
1962 .byte   0x8c,0xca,0xa2,0xfe,    0x98,0xd4,0x0b,0x36
1963 .byte   0xa6,0xf5,0x81,0xcf,    0xa5,0x7a,0xde,0x28
1964 .byte   0xda,0xb7,0x8e,0x26,    0x3f,0xad,0xbf,0xa4
1965 .byte   0x2c,0x3a,0x9d,0xe4,    0x50,0x78,0x92,0x0d
1966 .byte   0x6a,0x5f,0xcc,0x9b,    0x54,0x7e,0x46,0x62
1967 .byte   0xf6,0x8d,0x13,0xc2,    0x90,0xd8,0xb8,0xe8
1968 .byte   0x2e,0x39,0xf7,0x5e,    0x82,0xc3,0xaf,0xf5
1969 .byte   0x9f,0x5d,0x80,0xbe,    0x69,0xd0,0x93,0x7c
1970 .byte   0x6f,0xd5,0x2d,0xa9,    0xcf,0x25,0x12,0xb3
1971 .byte   0xc8,0xac,0x99,0x3b,    0x10,0x18,0x7d,0xa7
1972 .byte   0xe8,0x9c,0x63,0x6e,    0xdb,0x3b,0xbb,0x7b
1973 .byte   0xcd,0x26,0x78,0x09,    0x6e,0x59,0x18,0xf4
1974 .byte   0xec,0x9a,0xb7,0x01,    0x83,0x4f,0x9a,0xa8
1975 .byte   0xe6,0x95,0x6e,0x65,    0xaa,0xff,0xe6,0x7e
1976 .byte   0x21,0xbc,0xcf,0x08,    0xef,0x15,0xe8,0xe6
1977 .byte   0xba,0xe7,0x9b,0xd9,    0x4a,0x6f,0x36,0xce
1978 .byte   0xea,0x9f,0x09,0xd4,    0x29,0xb0,0x7c,0xd6
1979 .byte   0x31,0xa4,0xb2,0xaf,    0x2a,0x3f,0x23,0x31
1980 .byte   0xc6,0xa5,0x94,0x30,    0x35,0xa2,0x66,0xc0
1981 .byte   0x74,0x4e,0xbc,0x37,    0xfc,0x82,0xca,0xa6
1982 .byte   0xe0,0x90,0xd0,0xb0,    0x33,0xa7,0xd8,0x15
1983 .byte   0xf1,0x04,0x98,0x4a,    0x41,0xec,0xda,0xf7
1984 .byte   0x7f,0xcd,0x50,0x0e,    0x17,0x91,0xf6,0x2f
1985 .byte   0x76,0x4d,0xd6,0x8d,    0x43,0xef,0xb0,0x4d
1986 .byte   0xcc,0xaa,0x4d,0x54,    0xe4,0x96,0x04,0xdf
1987 .byte   0x9e,0xd1,0xb5,0xe3,    0x4c,0x6a,0x88,0x1b
1988 .byte   0xc1,0x2c,0x1f,0xb8,    0x46,0x65,0x51,0x7f
1989 .byte   0x9d,0x5e,0xea,0x04,    0x01,0x8c,0x35,0x5d
1990 .byte   0xfa,0x87,0x74,0x73,    0xfb,0x0b,0x41,0x2e
1991 .byte   0xb3,0x67,0x1d,0x5a,    0x92,0xdb,0xd2,0x52
1992 .byte   0xe9,0x10,0x56,0x33,    0x6d,0xd6,0x47,0x13
1993 .byte   0x9a,0xd7,0x61,0x8c,    0x37,0xa1,0x0c,0x7a
1994 .byte   0x59,0xf8,0x14,0x8e,    0xeb,0x13,0x3c,0x89
1995 .byte   0xce,0xa9,0x27,0xee,    0xb7,0x61,0xc9,0x35
1996 .byte   0xe1,0x1c,0xe5,0xed,    0x7a,0x47,0xb1,0x3c
1997 .byte   0x9c,0xd2,0xdf,0x59,    0x55,0xf2,0x73,0x3f
1998 .byte   0x18,0x14,0xce,0x79,    0x73,0xc7,0x37,0xbf
1999 .byte   0x53,0xf7,0xcd,0xea,    0x5f,0xfd,0xaa,0x5b
2000 .byte   0xdf,0x3d,0x6f,0x14,    0x78,0x44,0xdb,0x86
2001 .byte   0xca,0xaf,0xf3,0x81,    0xb9,0x68,0xc4,0x3e
2002 .byte   0x38,0x24,0x34,0x2c,    0xc2,0xa3,0x40,0x5f
2003 .byte   0x16,0x1d,0xc3,0x72,    0xbc,0xe2,0x25,0x0c
2004 .byte   0x28,0x3c,0x49,0x8b,    0xff,0x0d,0x95,0x41
2005 .byte   0x39,0xa8,0x01,0x71,    0x08,0x0c,0xb3,0xde
2006 .byte   0xd8,0xb4,0xe4,0x9c,    0x64,0x56,0xc1,0x90
2007 .byte   0x7b,0xcb,0x84,0x61,    0xd5,0x32,0xb6,0x70
2008 .byte   0x48,0x6c,0x5c,0x74,    0xd0,0xb8,0x57,0x42
2009
2010 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38  # Td4
2011 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2012 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2013 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2014 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2015 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2016 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2017 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2018 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2019 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2020 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2021 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2022 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2023 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2024 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2025 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2026 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2027 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2028 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2029 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2030 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2031 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2032 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2033 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2034 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2035 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2036 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2037 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2038 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2039 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2040 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2041 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2042
2043 AES_Te4:
2044 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
2045 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2046 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2047 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2048 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2049 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2050 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2051 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2052 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2053 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2054 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2055 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2056 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2057 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2058 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2059 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2060 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2061 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2062 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2063 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2064 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2065 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2066 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2067 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2068 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2069 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2070 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2071 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2072 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2073 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2074 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2075 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2076
2077 .byte   0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
2078 .byte   0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
2079 .byte   0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
2080 .byte   0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
2081 .byte   0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
2082 ___
2083 \f
2084 foreach (split("\n",$code)) {
2085         s/\`([^\`]*)\`/eval $1/ge;
2086
2087         # made-up _instructions, _xtr, _ins, _ror and _bias, cope
2088         # with byte order dependencies...
2089         if (/^\s+_/) {
2090             s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2091
2092             s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2093                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2094                                         :               eval("24-$3"))/e or
2095             s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2096                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2097                                         :               eval("24-$3"))/e or
2098             s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2099                 sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
2100                                         :               eval("24-$3"))/e or
2101             s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2102                 sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
2103                                         :               eval("$3*-1"))/e or
2104             s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2105                 sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
2106                                         :               eval("($3-16)&31"))/e;
2107
2108             s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2109                 sprintf("sll\t$1,$2,$3")/e                              or
2110             s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2111                 sprintf("and\t$1,$2,0xff")/e                            or
2112             s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2113         }
2114
2115         # convert lwl/lwr and swr/swl to little-endian order
2116         if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2117             s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2118                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e        or
2119             s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2120                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2121         }
2122
2123         if (!$big_endian) {
2124             s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2125             s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2126         }
2127
2128         print $_,"\n";
2129 }
2130
2131 close STDOUT;