X-Git-Url: https://code.wpia.club/?a=blobdiff_plain;f=lib%2Fopenssl%2Fcrypto%2Frc4%2Fasm%2Frc4-586.pl;fp=lib%2Fopenssl%2Fcrypto%2Frc4%2Fasm%2Frc4-586.pl;h=7d6f97c59efba305caa7031771c1148c077faa88;hb=02ed66432c92de70694700164f986190aad3cbc5;hp=5c9ac6ad286e21dd3e53436165dac21ea49a1c92;hpb=89016837dcbf2775cd15dc8cbaba00dc6379f86e;p=cassiopeia.git diff --git a/lib/openssl/crypto/rc4/asm/rc4-586.pl b/lib/openssl/crypto/rc4/asm/rc4-586.pl index 5c9ac6a..7d6f97c 100644 --- a/lib/openssl/crypto/rc4/asm/rc4-586.pl +++ b/lib/openssl/crypto/rc4/asm/rc4-586.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # [Re]written by Andy Polyakov for the OpenSSL @@ -43,6 +50,9 @@ # Westmere 5.1/+94%(**) # Sandy Bridge 5.0/+8% # Atom 12.6/+6% +# VIA Nano 6.4/+9% +# Ivy Bridge 4.9/±0% +# Bulldozer 4.9/+15% # # (*) PIII can actually deliver 6.6 cycles per byte with MMX code, # but this specific code performs poorly on Core2. And vice @@ -60,7 +70,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],"rc4-586.pl"); +$output=pop; +open STDOUT,">$output"; + +&asm_init($ARGV[0],"rc4-586.pl",$x86only = $ARGV[$#ARGV] eq "386"); $xx="eax"; $yy="ebx"; @@ -144,7 +157,7 @@ if ($alt=0) { &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4)); # (*) This is the key to Core2 and Westmere performance. - # Whithout movz out-of-order execution logic confuses + # Without movz out-of-order execution logic confuses # itself and fails to reorder loads and stores. Problem # appears to be fixed in Sandy Bridge... } @@ -184,8 +197,11 @@ if ($alt=0) { &and ($ty,-4); # how many 4-byte chunks? &jz (&label("loop1")); - &test ($ty,-8); &mov (&wparam(3),$out); # $out as accumulator in these loops + if ($x86only) { + &jmp (&label("go4loop4")); + } else { + &test ($ty,-8); &jz (&label("go4loop4")); &picmeup($out,"OPENSSL_ia32cap_P"); @@ -228,6 +244,7 @@ if ($alt=0) { &cmp ($inp,&wparam(1)); # compare to input+len &je (&label("done")); &jmp (&label("loop1")); + } &set_label("go4loop4",16); &lea ($ty,&DWP(-4,$inp,$ty)); @@ -300,7 +317,7 @@ $ido="ecx"; $idx="edx"; # void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data); -&function_begin("private_RC4_set_key"); +&function_begin("RC4_set_key"); &mov ($out,&wparam(0)); # load key &mov ($idi,&wparam(1)); # load len &mov ($inp,&wparam(2)); # load data @@ -378,7 +395,7 @@ $idx="edx"; &xor ("eax","eax"); &mov (&DWP(-8,$out),"eax"); # key->x=0; &mov (&DWP(-4,$out),"eax"); # key->y=0; -&function_end("private_RC4_set_key"); +&function_end("RC4_set_key"); # const char *RC4_options(void); &function_begin_B("RC4_options"); @@ -408,3 +425,4 @@ $idx="edx"; &asm_finish(); +close STDOUT;