September 28, 2023
ElleJay
EXTERN printf
EXTERN atoi
SECTION .data
prn: DB `suml: %ld\n\0`
SECTION .text
; param rdi: 64 bit integer
; returns rax: 64 bit integer
suml: MOV rax, rdi
.loop: CMP rdi, 0
JLE .end
SUB rdi, 1
ADD rax, rdi
JMP .loop
.end: RET
GLOBAL main
main: CMP rdi, 2
MOV rax, 1
JNE .end
MOV rdi, [rsi+8]
CALL atoi
MOV rdi, rax
CALL suml
MOV rdi, prn
MOV rsi, rax
MOV rax, 0
CALL printf
MOV rax, 0
.end: RET
SECTION .note.GNU-stack noalloc noexec nowrite progbits$ ./suml 1
suml: 1
$ ./suml 2
suml: 3
$ ./suml 3
suml: 6
EXTERN printf
EXTERN atoi
SECTION .data
prn: DB `sumr: %ld\n\0`
SECTION .text
sumr: CMP rdi, 0
CMOVLE rax, rdi
JLE .end
MOV rax, rdi
SUB rdi, 1
PUSH rax
CALL sumr
POP rdi
ADD rax, rdi
.end: RET
GLOBAL main
main: CMP rdi, 2
MOV rax, 1
JNE .end
MOV rdi, [rsi+8]
CALL atoi
MOV rdi, rax
CALL sumr
MOV rdi, prn
MOV rsi, rax
MOV rax, 0
CALL printf
MOV rax, 0
.end: RET
SECTION .note.GNU-stack noalloc noexec nowrite progbits$ ./sumr 1
sumr: 1
$ ./sumr 2
sumr: 3
$ ./sumr 3
sumr: 6
EXTERN printf
EXTERN atoi
SECTION .data
prn: DB `sumtr: %ld\n\0`
SECTION .text
sumtr: MOV rax, rdi
.rec: CMP rdi, 0
JLE .end
SUB rdi, 1
ADD rax, rdi
PUSH rdi
CALL .rec
;JMP .rec
POP rdi
.end: RET
GLOBAL main
main: CMP rdi, 2
MOV rax, 1
JNE .end
MOV rdi, [rsi+8]
CALL atoi
MOV rdi, rax
CALL sumtr
MOV rdi, prn
MOV rsi, rax
MOV rax, 0
CALL printf
MOV rax, 0
.end: RET
SECTION .note.GNU-stack noalloc noexec nowrite progbits$ ./sumtr 1
sumtr: 1
$ ./sumtr 2
sumtr: 3
$ ./sumtr 3
sumtr: 6
Recursion
sumr: 125000250000
real 0m0.007s
user 0m0.003s
sys 0m0.004s
Tail-Call Recursion
sumtr: 125000250000
real 0m0.007s
user 0m0.004s
sys 0m0.003s
Without PUSH/POP:
sumtr: 125000250000
real 0m0.005s
user 0m0.004s
sys 0m0.001s
Tail Call Eliminated:
sumtr: 125000250000
real 0m0.001s
user 0m0.001s
sys 0m0.000s
Loop
suml: 125000250000
real 0m0.001s
user 0m0.001s
sys 0m0.000s
FoldL
FoldR
foldl: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
;
PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
PUSH rdx
PUSH rcx
CALL rax
MOV rsi, rax
POP rcx
POP rdx
POP rdi
;
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
CALL foldl
; rax: result
.end: RETfoldl: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
;
PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
PUSH rdx
PUSH rcx
CALL rax
MOV rsi, rax
POP rcx
POP rdx
POP rdi
;
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
CALL foldl
; rax: result
.end: RETfoldl: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
;
PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
PUSH rdx
PUSH rcx
CALL rax
MOV rsi, rax
POP rcx
POP rdx
POP rdi
;
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
CALL foldl
; rax: result
.end: RETfoldll: CMP rdx, 0
CMOVLE rax, rsi
; rax: result
JBE .end
MOV r8, rdi
;PUSH rdi
MOV rax, rdi
MOV rdi, rsi
MOV rsi, [rcx]
; rdi: init
; rsi: list[0]
; rax: fn
; rdx: n
; rcx: list
;PUSH rdx
;PUSH rcx
CALL rax
MOV rsi, rax
;POP rcx
;POP rdx
;POP rdi
MOV rdi, r8
SUB rdx, 1
ADD rcx, 8
; rdi: fn
; rsi: fn init list[0]
; rdx: n-1
; rcx: list[1:]
JMP foldll
; rax: result
.end: RETfoldr: 500000
real 0m0.000s
user 0m0.000s
sys 0m0.000s
foldl: 500000
real 0m0.000s
user 0m0.000s
sys 0m0.000s
foldll: 500000
real 0m0.000s
user 0m0.000s
sys 0m0.000s
Record perf(1) events:
perf record -o meow.syscap ./meow $(seq 1 120000) && \
perf report -s sym,srcline --stdio -i meow.syscap Radically reduced number of events:
| Version | # Event |
|---|---|
| Fold Right (Recursive) | 11689523 |
| Fold Left (Tail Recursive) | 10050866 |
| Fold Left (Tail Call Eliminated) | 6905606 |
| Overhead | Symbol | Source:Line |
|---|---|---|
| 12.62% | foldr |
foldr.asm:46 |
| 11.57% | __GI_strtoll |
|
| 11.42% | foldr |
foldr.asm:45 |
| 6.91% | __GI_____strtoll_l_internal |
|
| 6.90% | foldr |
foldr.asm:43 |
| 6.61% | 0xffffffff9b001917 |
|
| 6.61% | add |
foldr.asm:23 |
| 6.25% | main.fill |
foldr.asm:100 |
| 5.95% | __GI_____strtoll_l_internal |
|
| … | … |
| Overhead | Symbol | Source:Line |
|---|---|---|
| 28.79% | foldl.end |
foldl.asm:62 |
| 10.11% | __GI_strtoll |
|
| 9.92% | __GI_____strtoll_l_internal |
|
| 9.85% | __GI_____strtoll_l_internal |
|
| 9.68% | atoi@plt |
|
| 9.57% | atoi |
|
| 9.44% | __GI_____strtoll_l_internal |
|
| 9.44% | __GI_____strtoll_l_internal |
|
| 3.05% | __GI___tunables_init |
|
| … | … |
| Overhead | Symbol | Source:Line |
|---|---|---|
| 17.24% | __GI_strtoll |
|
| 16.52% | __GI_____strtoll_l_internal |
|
| 15.62% | atoi@plt |
|
| 15.25% | __GI_____strtoll_l_internal |
|
| 15.15% | __GI_____strtoll_l_internal |
|
| 15.04% | __GI_____strtoll_l_internal |
|
| 4.97% | __GI___tunables_init |
|
| 0.21% | _start |
|
| 0.01% | 0xffffffff9b001917 |
What happens to foldl when it evaluates lazily?