upd: openssl to 1.1.0

[cassiopeia.git] / lib / openssl / crypto / bn / asm / x86_64-gf2m.pl
diff --git a/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl b/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl

index 226c66c35e35570715d8f7cc41215ef4e6c42a05..d962f62033caaaec31d2672a39df23ad28655c33 100644 (file)
--- a/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl
+++ b/lib/openssl/crypto/bn/asm/x86_64-gf2m.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
  #
  # ====================================================================
  # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -13,7 +20,7 @@
  # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
  # the time being... Except that it has two code paths: code suitable
  # for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
-# later. Improvement varies from one benchmark and �-arch to another.
+# later. Improvement varies from one benchmark and ��-arch to another.
  # Vanilla code path is at most 20% faster than compiler-generated code
  # [not very impressive], while PCLMULQDQ - whole 85%-160% better on
  # 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
@@ -31,7 +38,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
  die "can't locate x86_64-xlate.pl";
  
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
  *STDOUT=*OUT;
  
  ($lo,$hi)=("%rax","%rdx");     $a=$lo;
@@ -184,13 +191,13 @@ ___
  $code.=<<___;
         movdqa          %xmm0,%xmm4
         movdqa          %xmm1,%xmm5
-       pclmulqdq       \$0,%xmm1,%xmm0 # a1�b1
+       pclmulqdq       \$0,%xmm1,%xmm0 # a1��b1
         pxor            %xmm2,%xmm4
         pxor            %xmm3,%xmm5
-       pclmulqdq       \$0,%xmm3,%xmm2 # a0�b0
-       pclmulqdq       \$0,%xmm5,%xmm4 # (a0+a1)�(b0+b1)
+       pclmulqdq       \$0,%xmm3,%xmm2 # a0��b0
+       pclmulqdq       \$0,%xmm5,%xmm4 # (a0+a1)��(b0+b1)
         xorps           %xmm0,%xmm4
-       xorps           %xmm2,%xmm4     # (a0+a1)�(b0+b1)-a0�b0-a1�b1
+       xorps           %xmm2,%xmm4     # (a0+a1)·(b0+b1)-a0·b0-a1��b1
         movdqa          %xmm4,%xmm5
         pslldq          \$8,%xmm4
         psrldq          \$8,%xmm5
@@ -225,13 +232,13 @@ $code.=<<___;
         mov     \$0xf,$mask
         mov     $a1,$a
         mov     $b1,$b
-       call    _mul_1x1                # a1�b1
+       call    _mul_1x1                # a1��b1
         mov     $lo,16(%rsp)
         mov     $hi,24(%rsp)
  
         mov     48(%rsp),$a
         mov     64(%rsp),$b
-       call    _mul_1x1                # a0�b0
+       call    _mul_1x1                # a0��b0
         mov     $lo,0(%rsp)
         mov     $hi,8(%rsp)
  
@@ -239,7 +246,7 @@ $code.=<<___;
         mov     56(%rsp),$b
         xor     48(%rsp),$a
         xor     64(%rsp),$b
-       call    _mul_1x1                # (a0+a1)�(b0+b1)
+       call    _mul_1x1                # (a0+a1)��(b0+b1)
  ___
         @r=("%rbx","%rcx","%rdi","%rsi");
  $code.=<<___;