Add fields to struct without losing performance · helpdesk (published)

struct Test{T}
    dt
    v
    z
end

function Test(
        f::Vector,
        dt = zeros(Float32, length(f)),
        v = ones(Int64, length(f)),
        z = zeros(Float32, length(f) + 1)
    )

    Test{eltype(dt)}(dt, v, z)
end

And then use these fields in a downstream function without allocating arrays? e.g.

function transform(f::Vector{T}, tfm::Test) where {T}
    dt = tfm.dt
    v = tfm.v
    z = tfm.z
        ...
end

My original function which didn't utilize any custom types doesn't allocate anything but when trying to use my Test type I can't figure out how to avoid allocations

Dale Black (Jul 31 2021 at 01:08):

tfm = Test(f)
@benchmark transform($f, $tfm)

# returns
Memory estimate: 224 bytes, allocs estimate: 14.

@benchmark transform($f, $dt, $v, $z)

# returns
Memory estimate: 0 bytes, allocs estimate: 0.

Andrey Oskin (Jul 31 2021 at 05:04):

Dale Black (Jul 31 2021 at 18:23):

Thank you! That is useful but I don't think it solved my problem of allocating arrays. Any idea how I might pre-allocate the arrays in the Test function

struct Test{T <: AbstractArray} <: DistanceTransform
    dt::T
    v::T
    z::T
end

function Test(
        f::Vector,
        dt = zeros(Float32, length(f)),
        v = ones(Int64, length(f)),
        z = zeros(Float32, length(f) + 1)
    )

    Test{AbstractArray}(dt, v, z)
end

function transform(f::Vector{T}, tfm::Test) where {T}
    dt = tfm.dt
    v = tfm.v
    z = tfm.z
        ...
end

Jakob Nybo Nissen (Jul 31 2021 at 18:35):

Do you mean that you want to keep reusing the same existing arrays in the Test constructor instead of allocating new arrays for every instance of Test?

Dale Black (Jul 31 2021 at 18:36):

Dale Black (Jul 31 2021 at 18:37):

I don't really understand types yet so I might be confused, but yes I think that is what I want

Jakob Nybo Nissen (Jul 31 2021 at 18:38):

Hmm, I don't mean to be rude, but I suggest not doing optimizations until you know you have a problem. E.g. if you re-use the same arrays for different Test objects, you could run into some annoying bugs if the arrays are modified and you forget different Test objects share the same arrays

Jakob Nybo Nissen (Jul 31 2021 at 18:39):

But you said the transform function allocated. The part of the function you posted should not allocate, so if that function has a problem, it must be in the omitted part

Dale Black (Jul 31 2021 at 18:43):

That makes sense. I am attempting this in order to learn Julia better. It might also add some convenience in the future, but it might be too much of a headache.

f = [1, 0, 1, 0]
dt = zeros(Float32, length(f))
v = ones(Int64, length(f))
z = zeros(Float32, length(f) + 1)

function squared_euclidean_distance_transform(f::AbstractArray{T,1}, dt, v, z) where {T}
    n = length(f)
    k = 1
    z[1] = -Inf32
    z[2] = Inf32

    # Lower envelope operation
    for q in 2:n
        while true
            s = ((f[q] + q^2) - (f[v[k]] + v[k]^2)) / (2 * q - 2 * v[k])
            if s ≤ z[k]
                k -= 1
            else
                k += 1
                v[k] = q
                z[k] = s
                z[k + 1] = Inf32
                break
            end
        end
    end

    # Distance transform operation
    k = 1
    for q in 1:n
        while z[k + 1] < q
            k = k + 1
        end
        dt[q] = (q - v[k])^2 + f[v[k]]
    end
    return dt
end

@benchmark squared_euclidean_distance_transform($f, $dt, $v, $z)

# returns
Memory estimate: 0 bytes, allocs estimate: 0.

Jakob Nybo Nissen (Jul 31 2021 at 19:08):

Ahhh - the issue before was simply that the parameter of Test was AbstractArray - which means the fields were abstractly typed

Jakob Nybo Nissen (Jul 31 2021 at 19:09):

For the code to be efficient (and non-allocating), you need to be able to predict the type of each field only from the type of the Test object itself

Kyle Daruwalla (Jul 31 2021 at 19:15):

Like Jakob mentioned, you want to do Test{typeof(dt)}(dt, v, z) instead of Test{AbstractArray}(dt, v, z). Or even better just do Test(dt, v, z).

Dale Black (Jul 31 2021 at 19:29):

Does this work when you need each field to be a different type? I am getting some errors from both Test{typeof(dt)}(dt, v, z) and Test(dt, v, z)

Andrey Oskin (Jul 31 2021 at 19:46):

Probably that's because you defined all fields as having the same type.
You should do something like this

struct Test{T1 <: AbstractArray, T2 <: AbstractArray} <: DistanceTransform
    dt::T1
    v::T2
    z::T1
end

and use Test(dt, v, z) form to initialize it, since compiler can infer types on its own.

Andrey Oskin (Jul 31 2021 at 19:49):

By the way, you can use AbstractVector{T} alias instead of AbstractArray{T, 1}. It's more straightforward.

Dale Black (Jul 31 2021 at 20:31):

Thank you! One more thing, if I want to dispatch on the type of f how would I do that in the initialization function? The code below works, if I want to restrict f::Vector{Int64} or img::Matrix{Int64} but I run into errors when I try to allow more flexibility e.g. f::Vector{Number}

function Test(
        f::Vector{Int64},
        dt = zeros(Float32, length(f)),
        v = ones(Int64, length(f)),
        z = zeros(Float32, length(f) + 1)
    )

    Test(dt, v, z)
end

function Test(
        img::Matrix{Int64},
        dt = zeros(Float32, size(img)),
        v = ones(Int64, size(img)),
        z = zeros(Float32, size(img) .+ 1)
    )

    Test(dt, v, z)
end

Andrey Oskin (Jul 31 2021 at 20:39):

Dale Black (Jul 31 2021 at 20:51):

function Test(
        f::Vector,
        dt = zeros(Float32, length(f)),
        v = ones(Int64, length(f)),
        z = zeros(Float32, length(f) + 1)
    )

    Test(dt, v, z)
end

function Test(
        f::Vector{Number},
        dt = zeros(Float32, length(f)),
        v = ones(Int64, length(f)),
        z = zeros(Float32, length(f) + 1)
    )

    Test(dt, v, z)
end

f = [1, 0, 1, 0]
tfm = Test(f)

tfm
MethodError: no method matching Main.workspace134.Test(::Vector{Int64})

Closest candidates are:

Main.workspace134.Test(::T1, !Matched::T2, !Matched::T1) where {T1<:AbstractArray, T2<:AbstractArray} at /Users/daleblack/Google Drive/dev/julia/pluto notebooks/restructure_distance_transforms.jl#==#95897bab-7702-48e7-8e0d-b5c0c83d4d94:3

Main.workspace134.Test(!Matched::Vector{Number}) at /Users/daleblack/Google Drive/dev/julia/pluto notebooks/restructure_distance_transforms.jl#==#95897bab-7702-48e7-8e0d-b5c0c83d4d94:8

Main.workspace134.Test(!Matched::Vector{Number}, !Matched::Any) at /Users/daleblack/Google Drive/dev/julia/pluto notebooks/restructure_distance_transforms.jl#==#95897bab-7702-48e7-8e0d-b5c0c83d4d94:8

...

top-level scope@Local: 1[inlined]

Jakob Nybo Nissen (Jul 31 2021 at 21:04):

You hit a classical beginner mistake in Julia: Vector{Int} is not a subtype of Vector{Number}. That is, if A is a subtype of B, that does NOT mean that T{A} is a subtype of T{B}

Jakob Nybo Nissen (Jul 31 2021 at 21:05):

Dale Black (Jul 31 2021 at 23:51):

struct Test{T1 <: AbstractArray, T2 <: AbstractArray} <: DistanceTransform
    dt::T1
    v::T2
    z::T1
end

function Test(
        f::Vector{<: Real},
        dt = zeros(Float32, length(f)),
        v = ones(Int64, length(f)),
        z = zeros(Float32, length(f) + 1)
    )

    Test(dt, v, z)
end

function Test(
        img::Array{<: Real},
        dt = zeros(Float32, size(img)),
        v = ones(Int64, size(img)),
        z = zeros(Float32, size(img) .+ 1)
    )

    Test(dt, v, z)
end

tfm = SquaredEuclidean(f)

tfm
MethodError: Main.workspace162.Test(::Vector{Float32}, ::Vector{Int64}, ::Vector{Float32}) is ambiguous. Candidates:

Main.workspace162.Test(dt::T1, v::T2, z::T1) where {T1<:AbstractArray, T2<:AbstractArray} in Main.workspace162 at /Users/daleblack/Google Drive/dev/julia/pluto notebooks/restructure_distance_transforms.jl#==#95897bab-7702-48e7-8e0d-b5c0c83d4d94:3

Main.workspace162.Test(f::Vector{var"#s1108"} where var"#s1108"<:Real, dt, v) in Main.workspace162 at /Users/daleblack/Google Drive/dev/julia/pluto notebooks/restructure_distance_transforms.jl#==#95897bab-7702-48e7-8e0d-b5c0c83d4d94:8

Main.workspace162.Test(img::Array{var"#s1112", N} where {var"#s1112"<:Real, N}, dt, v) in Main.workspace162 at /Users/daleblack/Google Drive/dev/julia/pluto notebooks/restructure_distance_transforms.jl#==#95897bab-7702-48e7-8e0d-b5c0c83d4d94:18

Possible fix, define

Main.workspace162.Test(::T1, ::T2, ::T1) where {T1<:(Vector{var"#s1108"} where var"#s1108"<:Real), T2<:AbstractArray}

Main.workspace162.Test(::Vector{Float32}, ::Vector{Float32}, ::Vector{Int64}, ::Vector{Float32})@Other: 15
top-level scope@Local: 1[inlined]

Andrey Oskin (Aug 01 2021 at 04:57):

At this point you have introduced too many functions, best thing you can do is to restart Julia.

Andrey Oskin (Aug 01 2021 at 05:03):

But I can't understand, what is the difference between all these methods, they all create the same object. You do not use type information at all, so you can for example define

function Test(
  img::AbstractArray,
  dt = zeros(Float32, size(img)),
  v = ones(Int64, size(img)),
  z = zeros(Float32, size(img) .+ 1)
)
  Test(dt, v, z)
end

This definition will work for vectors and matrices, and for any type of elements.

Stream: helpdesk (published)

Topic: Add fields to struct without losing performance

Dale Black (Jul 31 2021 at 01:05):