invoke-latency/0000755000175000017500000000000013511415126012107 5ustar aphaphinvoke-latency/main.c0000644000175000017500000000352713511412367013212 0ustar aphaph#include extern void tare_entry(void); extern void hash_entry(void); extern void vtable_entry(void); extern void polymorphic_hash_entry(void); extern void polymorphic_vtable_entry(void); extern void setup(void); extern unsigned long invocation_counter; #define ITERATIONS 8000000.0 unsigned long rdtscp() { unsigned long value; __asm__ __volatile__("rdtscp; " // serializing read of tsc "shl $32,%%rdx; " // shift higher 32 bits stored in rdx up "or %%rdx,%%rax" // and or onto rax : "=a"(value) : : "%rcx", "%rdx"); return value; } unsigned long timing(void (*entry)(void)) { unsigned long tsc0 = rdtscp(); entry(); unsigned long tsc1 = rdtscp(); return (tsc1 - tsc0); } int main(int argc, char **argv) { setup(); timing(hash_entry); double t_tare = timing(tare_entry)/ITERATIONS; double t_hash = timing(hash_entry)/ITERATIONS; double t_vtable = timing(vtable_entry)/ITERATIONS; double t_poly = timing(polymorphic_hash_entry)/ITERATIONS; double t_poly_vtable = timing(polymorphic_vtable_entry)/ITERATIONS; for (int i = 0; i < 9; i++) { t_tare += timing(tare_entry)/ITERATIONS; t_hash += timing(hash_entry)/ITERATIONS; t_vtable += timing(vtable_entry)/ITERATIONS; t_poly += timing(polymorphic_hash_entry)/ITERATIONS; t_poly_vtable += timing(polymorphic_vtable_entry)/ITERATIONS; } t_hash -= t_tare; t_poly -= t_tare; t_vtable -= t_tare; t_poly_vtable -= t_tare; printf(" monomorphic polymorphic\n"); printf(" vtable %5.1f %5.1f\n", t_vtable/10, t_poly_vtable/10); printf(" hash %5.1f %5.1f\n", t_hash/10, t_poly/10); printf("\n"); printf("tare (direct call, subtracted from above numbers) = %.1f\n", t_tare/10); return 0; } invoke-latency/.#Makefile0000777000175000017500000000000013511343016020675 2aph@zarquon.pink.117055:1562315229ustar aphaphinvoke-latency/Makefile0000644000175000017500000000005413511342755013555 0ustar aphaphlatency: main.c testcases.s cc -g -o $@ $^ invoke-latency/testcases.s0000644000175000017500000002012013511412512014260 0ustar aphaph code_pointer_shift=32 .intel_syntax noprefix .macro unpack_table_entry rmethod, rentry mov \rmethod, \rentry shr \rentry, code_pointer_shift .endm .macro itable_call selector, selector_low, rklass, itable_code_map_mask_offset, itable_code_map_table_offset # Compute primary bucket mov r10d, \selector_low and r10d, DWORD PTR [\rklass+\itable_code_map_mask_offset] mov r10, QWORD PTR [\rklass+r10*8+\itable_code_map_table_offset] # Check for primary bucket match cmp r10d, \selector_low je 0f # Compute secondary bucket mov r10d, \selector_low shr r10d, 16 and r10d, DWORD PTR [\rklass+\itable_code_map_mask_offset] mov r10, QWORD PTR [\rklass+r10*8+\itable_code_map_table_offset] # Check for secondary bucket match cmp r10d, \selector_low je 0f # Receiver doesn't implement the required interface; throw exception int 3 # Unpack and call primary bucket 0: unpack_table_entry rbx, r10 call r10 .endm .macro old_vtable_call selector, rklass, vtable_offset mov r10, QWORD PTR [\rklass+\selector*8+\vtable_offset] call r10 .endm .macro vtable_call selector, dummy1, rklass, dummy2, vtable_offset mov r10, QWORD PTR [\rklass+\selector*8+\vtable_offset] unpack_table_entry rbx, r10 call r10 .endm .macro spill_regs # Save a bunch of stuff into the stack frame # The purpose of this is to make a call site more-or-less # realistic. mov DWORD PTR [rsp], eax mov QWORD PTR [rsp+8], rcx mov QWORD PTR [rsp+16], rdx # Load args mov rcx, QWORD PTR [rsp+24] .endm .macro direct_call selector, selector_low, rklass, itable_code_map_mask_offset, itable_code_map_table_offset call target1 .endm .macro call_seq selector call table_offset spill_regs mov rdx, QWORD PTR [rsp+32] mov eax, \selector mov r11, QWORD PTR [rdx + 8] # klass ptr \call rax, eax, r11, 0x1f8, \table_offset # mov rcx, QWORD PTR [rsp+8] add rcx, rax .endm .macro bump_counter mov rcx, QWORD PTR invocation_counter[rip] add rcx, 1 mov QWORD PTR invocation_counter[rip], rcx .endm .macro down_counter mov rcx, QWORD PTR invocation_counter[rip] add rcx, 1 mov QWORD PTR invocation_counter[rip], rcx .endm target1: bump_counter add rcx, rdx ret target2: bump_counter sub rcx, rdx ret target3: down_counter sub rcx, rdx ret .data klass1: .skip 0x1f8 .quad 7 .quad 0, 0, 0, 0, 0, 0, target1, 0 .quad 0, 0, 0, 0, 0, 0, 0, target1 klass2: .skip 0x1f8 .quad 7 .quad 0, 0, 0, 0, 0, 0, target2, 0 .quad 0, 0, 0, 0, 0, 0, 0, target2 klass3: .skip 0x1f8 .quad 7 .quad 0, 0, target2, 0, 0, 0, 0, 0 .quad 0, 0, 0, 0, 0, 0, 0, target2 obj1: .quad 1 # lock word .quad klass1 obj2: .quad 1 # lock word .quad klass2 obj3: .quad 1 # lock word .quad klass3 .globl invocation_counter .align 8 invocation_counter: .quad 0 .text .macro call_site obj call_type table_offset mov rdx, OFFSET FLAT:\obj mov QWORD PTR [rsp+32], rdx call_seq 0x54a0fae, \call_type, \table_offset .endm .macro vtable_call_site obj call_type table_offset mov rdx, OFFSET FLAT:\obj mov QWORD PTR [rsp+32], rdx call_seq 7, \call_type, \table_offset .endm .macro setup_klass klass offset vtable_offset # itable entry mov rax, QWORD PTR [\klass+0x200+\offset*8] shl rax, code_pointer_shift or rax, 0x54a0fae mov QWORD PTR [\klass+0x200+\offset*8], rax # vtable entry mov rax, QWORD PTR [\klass+0x240+\vtable_offset*8] shl rax, code_pointer_shift or rax, 0x54a0fae mov QWORD PTR [\klass+0x240+\vtable_offset*8], rax .endm .globl setup setup: setup_klass klass1 6 7 setup_klass klass2 6 7 setup_klass klass3 2 7 mov rsi, 100000000 # warm up 1: sub rsi, 1 jne 1b ret polymorphic_hash_call_site: enter 64, 0 mov QWORD PTR [rsp+32], rdx spill_regs mov rdx, QWORD PTR [rsp+32] mov eax, 0x54a0fae mov r11, QWORD PTR [rdx + 8] # klass ptr itable_call rax, eax, r11, 0x1f8, 0x200 # mov rcx, QWORD PTR [rsp+8] add rcx, rax leave ret polymorphic_vtable_call_site: enter 64, 0 mov QWORD PTR [rsp+32], rdx spill_regs mov rdx, QWORD PTR [rsp+32] mov r11, QWORD PTR [rdx + 8] # klass ptr vtable_call 7, dummy, r11, dummy, 0x240 # mov rcx, QWORD PTR [rsp+8] add rcx, rax leave ret .macro polymorphic_hash_call obj mov rdx, OFFSET FLAT:\obj call polymorphic_hash_call_site .endm .macro polymorphic_vtable_call obj mov rdx, OFFSET FLAT:\obj call polymorphic_vtable_call_site .endm .globl tare_entry tare_entry: mov rsi, 1000000 enter 0x100, 0 .align 64 1: call_site obj2 direct_call call_site obj1 direct_call call_site obj2 direct_call call_site obj2 direct_call call_site obj3 direct_call call_site obj1 direct_call call_site obj3 direct_call call_site obj1 direct_call sub rsi, 1 jne 1b leave ret .text .globl hash_entry hash_entry: mov rsi, 1000000 enter 0x100, 0 .align 64 1: call_site obj2 itable_call 0x200 call_site obj1 itable_call 0x200 call_site obj2 itable_call 0x200 call_site obj2 itable_call 0x200 call_site obj3 itable_call 0x200 call_site obj1 itable_call 0x200 call_site obj3 itable_call 0x200 call_site obj1 itable_call 0x200 sub rsi, 1 jne 1b leave ret .globl vtable_entry vtable_entry: mov rsi, 1000000 enter 0x100, 0 .align 64 1: vtable_call_site obj2 vtable_call 0x240 vtable_call_site obj1 vtable_call 0x240 vtable_call_site obj2 vtable_call 0x240 vtable_call_site obj2 vtable_call 0x240 vtable_call_site obj3 vtable_call 0x240 vtable_call_site obj1 vtable_call 0x240 vtable_call_site obj3 vtable_call 0x240 vtable_call_site obj1 vtable_call 0x240 sub rsi, 1 jne 1b leave ret .globl polymorphic_hash_entry polymorphic_hash_entry: mov rsi, 1000000 enter 0x100, 0 .align 64 1: polymorphic_hash_call obj2 polymorphic_hash_call obj1 polymorphic_hash_call obj2 polymorphic_hash_call obj2 polymorphic_hash_call obj3 polymorphic_hash_call obj1 polymorphic_hash_call obj3 polymorphic_hash_call obj1 sub rsi, 1 jne 1b leave ret .globl polymorphic_vtable_entry polymorphic_vtable_entry: mov rsi, 1000000 enter 0x100, 0 .align 64 1: polymorphic_vtable_call obj2 polymorphic_vtable_call obj1 polymorphic_vtable_call obj2 polymorphic_vtable_call obj2 polymorphic_vtable_call obj3 polymorphic_vtable_call obj1 polymorphic_vtable_call obj3 polymorphic_vtable_call obj1 sub rsi, 1 jne 1b leave ret