-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... Except that it has two code paths: code suitable
# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
-# later. Improvement varies from one benchmark and �-arch to another.
+# later. Improvement varies from one benchmark and ��-arch to another.
# Vanilla code path is at most 20% faster than compiler-generated code
# [not very impressive], while PCLMULQDQ - whole 85%-160% better on
# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
($lo,$hi)=("%rax","%rdx"); $a=$lo;
$code.=<<___;
movdqa %xmm0,%xmm4
movdqa %xmm1,%xmm5
- pclmulqdq \$0,%xmm1,%xmm0 # a1�b1
+ pclmulqdq \$0,%xmm1,%xmm0 # a1��b1
pxor %xmm2,%xmm4
pxor %xmm3,%xmm5
- pclmulqdq \$0,%xmm3,%xmm2 # a0�b0
- pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)�(b0+b1)
+ pclmulqdq \$0,%xmm3,%xmm2 # a0��b0
+ pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)��(b0+b1)
xorps %xmm0,%xmm4
- xorps %xmm2,%xmm4 # (a0+a1)�(b0+b1)-a0�b0-a1�b1
+ xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1��b1
movdqa %xmm4,%xmm5
pslldq \$8,%xmm4
psrldq \$8,%xmm5
mov \$0xf,$mask
mov $a1,$a
mov $b1,$b
- call _mul_1x1 # a1�b1
+ call _mul_1x1 # a1��b1
mov $lo,16(%rsp)
mov $hi,24(%rsp)
mov 48(%rsp),$a
mov 64(%rsp),$b
- call _mul_1x1 # a0�b0
+ call _mul_1x1 # a0��b0
mov $lo,0(%rsp)
mov $hi,8(%rsp)
mov 56(%rsp),$b
xor 48(%rsp),$a
xor 64(%rsp),$b
- call _mul_1x1 # (a0+a1)�(b0+b1)
+ call _mul_1x1 # (a0+a1)��(b0+b1)
___
@r=("%rbx","%rcx","%rdi","%rsi");
$code.=<<___;