I have a special structure that's basically a vector where every element is a Tuple of the same length, and I want to recursively index into it. So for example, I want
recursive_getindex([(:a, :b, :c), (:d, :e, :f)], 4) == :d
I'm currently doing this with divrem
to find the outer index and the inner index, but I feel like there should be an efficient way to do it. My instinct would be to use reinterpret
, but I want to be able to support Tuples that might be padded or non-isbits.
Any suggestions for how to go about this?
divrem
is pretty optimal for this, since you're doing basically the same operation as for indexing a matrix linearly
Hm, I think I must be doing something wrong then because it's quite slow compared to linearly accessing a matrix:
julia> function recursive_getindex(v::Vector{NTuple{N, T}}, idx) where {N, T}
i,j = divrem(idx-1, N)
v[i+1][j+1]
end;
julia> let N = 10, M = 3
@btime sum(i -> recursive_getindex(v, i), 1:($M*$N)) setup=(v=[ntuple(_ ->rand(), $M) for _ ∈ 1:$N])
@btime sum(i -> getindex(v, i), 1:($M*$N)) setup=(v=rand($M, $N))
end
33.739 ns (0 allocations: 0 bytes)
12.827 ns (0 allocations: 0 bytes)
17.3642736448238
I guess what I should be doing is maybe interally just storing a Vector{T}
and then have a function for taking M
elements at a time as a Tuple
, since I know I can make that fast.
well it still has to load the tuple element first, seems like it doesn't want to fold that into one operation for some reason
e.g. with
Base.@propagate_inbounds function recursive_getindex(v::Vector{NTuple{N, T}}, idx) where {N, T}
@boundscheck checkbounds(1:length(v)*N, idx)
i, j = divrem(idx-1, N)
@inbounds begin
res = v[i+1][j+1]
end
return res
end;
I get
julia> code_native((a,b) -> @inbounds(recursive_getindex(a,b)), (Vector{NTuple{3,Float64}},Int); dump_module=false)
.text
; ┌ @ REPL[46]:1 within `#87`
push rbp
mov rbp, rsp
mov rax, qword ptr [r13 + 16]
; │┌ @ REPL[44]:3 within `recursive_getindex`
; ││┌ @ int.jl:86 within `-`
dec rsi
movabs rcx, 6148914691236517206
mov rax, qword ptr [rax + 16]
; │└└
mov rax, qword ptr [rax]
; │┌ @ REPL[44]:3 within `recursive_getindex`
; ││┌ @ div.jl:181 within `divrem` @ div.jl:203
; │││┌ @ int.jl:295 within `div`
mov rax, rsi
imul rcx
; ││└└
; ││ @ REPL[44]:5 within `recursive_getindex`
; ││┌ @ essentials.jl:907 within `getindex`
mov rcx, qword ptr [rdi]
; ││└
; ││ @ REPL[44]:3 within `recursive_getindex`
; ││┌ @ div.jl:181 within `divrem` @ div.jl:203
; │││┌ @ int.jl:295 within `div`
mov rax, rdx
shr rax, 63
add rax, rdx
; │││└
; │││ @ div.jl:181 within `divrem` @ div.jl:204
; │││┌ @ int.jl:88 within `*`
lea rax, [rax + 2*rax]
; ││└└
; ││ @ REPL[44]:5 within `recursive_getindex`
; ││┌ @ essentials.jl:907 within `getindex`
mov rdx, qword ptr [rcx + 8*rax + 16]
; │││ @ tuple.jl:31 within `getindex`
sub rsi, rax
; │││ @ essentials.jl:907 within `getindex`
mov qword ptr [rbp - 16], rdx
vmovups xmm0, xmmword ptr [rcx + 8*rax]
vmovaps xmmword ptr [rbp - 32], xmm0
; ││└
; ││ @ REPL[44]:7 within `recursive_getindex`
vmovsd xmm0, qword ptr [rbp + 8*rsi - 32] # xmm0 = mem[0],zero
pop rbp
ret
; └└
; ┌ @ REPL[44]:7 within `<invalid>`
nop word ptr cs:[rax + rax]
; └
while the Matrix
version can just load the value without having to do the intermediate divrem
:
julia> code_native((a,b) -> @inbounds(getindex(a,b)), (Matrix{Float64},Int); dump_module=false)
.text
; ┌ @ REPL[47]:1 within `#89`
push rbp
mov rbp, rsp
mov rax, qword ptr [r13 + 16]
mov rax, qword ptr [rax + 16]
mov rax, qword ptr [rax]
; │┌ @ essentials.jl:907 within `getindex`
mov rax, qword ptr [rdi]
vmovsd xmm0, qword ptr [rax + 8*rsi - 8] # xmm0 = mem[0],zero
pop rbp
ret
; └└
; ┌ @ essentials.jl:907 within `<invalid>`
nop word ptr [rax + rax]
; └
Mason Protter said:
I guess what I should be doing is maybe interally just storing a
Vector{T}
and then have a function for takingM
elements at a time as aTuple
, since I know I can make that fast.
yeah, that's a good case for abstracting this away
that being said, why not store this as a Matrix
internally, one tuple per column? the biggest difference time wise probably won't be the single element indexing anyway but the continued iteration & order that's being done in
Oh yeah, good idea with the storing a matrix internally!
I don't see why this function is called recursive_getindex
, isn't it more like flattened_getindex
?
Well, the real name is just getindex
, I'm writing a method for a specific struct
Last updated: Nov 06 2024 at 04:40 UTC