2 # Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
19 # The module implements bn_GF2m_mul_2x2 polynomial multiplication used
20 # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
21 # the time being... Except that it has two code paths: one suitable
22 # for all SPARCv9 processors and one for VIS3-capable ones. Former
23 # delivers ~25-45% more, more for longer keys, heaviest DH and DSA
24 # verify operations on venerable UltraSPARC II. On T4 VIS3 code is
25 # ~100-230% faster than gcc-generated code and ~35-90% faster than
26 # the pure SPARCv9 code path.
29 open STDOUT,">$output";
38 ($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
39 ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
42 #include <sparc_arch.h>
45 .register %g2,#scratch
46 .register %g3,#scratch
53 .globl bn_GF2m_mul_2x2
56 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
57 ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0]
59 andcc %g1, SPARCV9_VIS3, %g0
67 .word 0x95b262ab ! xmulx %o1, %o3, %o2
68 .word 0x99b262cb ! xmulxhi %o1, %o3, %o4
69 srlx %o2, 32, %o1 ! 13 cycles later
79 save %sp,-STACK_FRAME-$locals,%sp
85 srlx $a12,1,$a48 ! 0x7fff...
87 srlx $a12,2,$a12 ! 0x3fff...
88 add %sp,STACK_BIAS+STACK_FRAME,$tab
94 srax $a4,63,@i[1] ! broadcast 61st bit
95 and $a48,$a4,$a4 ! (a<<2)&0x7fff...
97 srax $a2,63,@i[0] ! broadcast 62nd bit
98 and $a12,$a2,$a2 ! (a<<1)&0x3fff...
99 srax $a1,63,$lo ! broadcast 63rd bit
100 and $a48,$a1,$a1 ! (a<<0)&0x1fff...
107 stx %g0,[$tab+0*8] ! tab[0]=0
109 stx $a1,[$tab+1*8] ! tab[1]=a1
110 stx $a2,[$tab+2*8] ! tab[2]=a2
112 stx $a12,[$tab+3*8] ! tab[3]=a1^a2
115 stx $a4,[$tab+4*8] ! tab[4]=a4
117 stx $a1,[$tab+5*8] ! tab[5]=a1^a4
119 stx $a2,[$tab+6*8] ! tab[6]=a2^a4
121 stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4
124 stx $a8,[$tab+8*8] ! tab[8]=a8
126 stx $a1,[$tab+9*8] ! tab[9]=a1^a8
128 stx $a2,[$tab+10*8] ! tab[10]=a2^a8
130 stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8
133 stx $a48,[$tab+12*8] ! tab[12]=a4^a8
135 stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8
137 stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8
139 stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8
144 and @i[0],`0xf<<3`,@i[0]
146 ldx [$tab+@i[0]],@i[0]
149 and @i[1],`0xf<<3`,@i[1]
151 ldx [$tab+@i[1]],@i[1]
157 and @i[0],`0xf<<3`,@i[0]
159 for($n=1;$n<14;$n++) {
161 sllx @i[1],`$n*4`,@T[0]
162 ldx [$tab+@i[0]],@i[0]
163 srlx @i[1],`64-$n*4`,@T[1]
165 srlx $b,`($n+2)*4`-3,@i[1]
167 and @i[1],`0xf<<3`,@i[1]
169 push(@i,shift(@i)); push(@T,shift(@T));
172 sllx @i[1],`$n*4`,@T[0]
173 ldx [$tab+@i[0]],@i[0]
174 srlx @i[1],`64-$n*4`,@T[1]
177 sllx @i[0],`($n+1)*4`,@T[0]
179 srlx @i[0],`64-($n+1)*4`,@T[1]
192 .type bn_GF2m_mul_2x2,#function
193 .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
194 .asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
198 $code =~ s/\`([^\`]*)\`/eval($1)/gem;