+{
+my $out="%r10";
+my $cnt="%rcx";
+my $max="%r11";
+my $lasttick="%r8d";
+my $lastdiff="%r9d";
+my $redzone=win64?8:-8;
+
+print<<___;
+.globl OPENSSL_instrument_bus
+.type OPENSSL_instrument_bus,\@abi-omnipotent
+.align 16
+OPENSSL_instrument_bus:
+ mov $arg1,$out # tribute to Win64
+ mov $arg2,$cnt
+ mov $arg2,$max
+
+ rdtsc # collect 1st tick
+ mov %eax,$lasttick # lasttick = tick
+ mov \$0,$lastdiff # lastdiff = 0
+ clflush ($out)
+ .byte 0xf0 # lock
+ add $lastdiff,($out)
+ jmp .Loop
+.align 16
+.Loop: rdtsc
+ mov %eax,%edx
+ sub $lasttick,%eax
+ mov %edx,$lasttick
+ mov %eax,$lastdiff
+ clflush ($out)
+ .byte 0xf0 # lock
+ add %eax,($out)
+ lea 4($out),$out
+ sub \$1,$cnt
+ jnz .Loop
+
+ mov $max,%rax
+ ret
+.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
+
+.globl OPENSSL_instrument_bus2
+.type OPENSSL_instrument_bus2,\@abi-omnipotent
+.align 16
+OPENSSL_instrument_bus2:
+ mov $arg1,$out # tribute to Win64
+ mov $arg2,$cnt
+ mov $arg3,$max
+ mov $cnt,$redzone(%rsp)
+
+ rdtsc # collect 1st tick
+ mov %eax,$lasttick # lasttick = tick
+ mov \$0,$lastdiff # lastdiff = 0
+
+ clflush ($out)
+ .byte 0xf0 # lock
+ add $lastdiff,($out)
+
+ rdtsc # collect 1st diff
+ mov %eax,%edx
+ sub $lasttick,%eax # diff
+ mov %edx,$lasttick # lasttick = tick
+ mov %eax,$lastdiff # lastdiff = diff
+.Loop2:
+ clflush ($out)
+ .byte 0xf0 # lock
+ add %eax,($out) # accumulate diff
+
+ sub \$1,$max
+ jz .Ldone2
+
+ rdtsc
+ mov %eax,%edx
+ sub $lasttick,%eax # diff
+ mov %edx,$lasttick # lasttick = tick
+ cmp $lastdiff,%eax
+ mov %eax,$lastdiff # lastdiff = diff
+ mov \$0,%edx
+ setne %dl
+ sub %rdx,$cnt # conditional --$cnt
+ lea ($out,%rdx,4),$out # conditional ++$out
+ jnz .Loop2
+
+.Ldone2:
+ mov $redzone(%rsp),%rax
+ sub $cnt,%rax
+ ret
+.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
+___
+}