X-Git-Url: https://code.wpia.club/?a=blobdiff_plain;f=lib%2Fopenssl%2Fcrypto%2Fbn%2Fasm%2Fx86_64-gf2m.pl;h=d962f62033caaaec31d2672a39df23ad28655c33;hb=02ed66432c92de70694700164f986190aad3cbc5;hp=226c66c35e35570715d8f7cc41215ef4e6c42a05;hpb=ba8f20d49b7c8142babdbe783ebd9c937405ba13;p=cassiopeia.git diff --git a/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl b/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl index 226c66c..d962f62 100644 --- a/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl +++ b/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -13,7 +20,7 @@ # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for # the time being... Except that it has two code paths: code suitable # for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and -# later. Improvement varies from one benchmark and µ-arch to another. +# later. Improvement varies from one benchmark and µ-arch to another. # Vanilla code path is at most 20% faster than compiler-generated code # [not very impressive], while PCLMULQDQ - whole 85%-160% better on # 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that @@ -31,7 +38,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; ($lo,$hi)=("%rax","%rdx"); $a=$lo; @@ -184,13 +191,13 @@ ___ $code.=<<___; movdqa %xmm0,%xmm4 movdqa %xmm1,%xmm5 - pclmulqdq \$0,%xmm1,%xmm0 # a1·b1 + pclmulqdq \$0,%xmm1,%xmm0 # a1·b1 pxor %xmm2,%xmm4 pxor %xmm3,%xmm5 - pclmulqdq \$0,%xmm3,%xmm2 # a0·b0 - pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1) + pclmulqdq \$0,%xmm3,%xmm2 # a0·b0 + pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1) xorps %xmm0,%xmm4 - xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1 + xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1 movdqa %xmm4,%xmm5 pslldq \$8,%xmm4 psrldq \$8,%xmm5 @@ -225,13 +232,13 @@ $code.=<<___; mov \$0xf,$mask mov $a1,$a mov $b1,$b - call _mul_1x1 # a1·b1 + call _mul_1x1 # a1·b1 mov $lo,16(%rsp) mov $hi,24(%rsp) mov 48(%rsp),$a mov 64(%rsp),$b - call _mul_1x1 # a0·b0 + call _mul_1x1 # a0·b0 mov $lo,0(%rsp) mov $hi,8(%rsp) @@ -239,7 +246,7 @@ $code.=<<___; mov 56(%rsp),$b xor 48(%rsp),$a xor 64(%rsp),$b - call _mul_1x1 # (a0+a1)·(b0+b1) + call _mul_1x1 # (a0+a1)·(b0+b1) ___ @r=("%rbx","%rcx","%rdi","%rsi"); $code.=<<___;