diff --git a/.gitignore b/.gitignore
index 8c960ec..004bb7b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.jl.cov
 *.jl.*.cov
 *.jl.mem
+/Manifest.toml
\ No newline at end of file
diff --git a/src/HCubature.jl b/src/HCubature.jl
index 27a5200..89eeb37 100644
--- a/src/HCubature.jl
+++ b/src/HCubature.jl
@@ -20,7 +20,7 @@ module HCubature
 using StaticArrays, LinearAlgebra
 import Combinatorics, DataStructures, QuadGK
 
-export hcubature, hquadrature, hcubature_buffer, hcubature_count, hcubature_print
+export hcubature, hquadrature, hcubature_buffer, hcubature_count, hcubature_print, hcubature_return_evalbuf, hcubature_return_evalbuf_count, hcubature_return_evalbuf_print
 
 include("genz-malik.jl")
 include("gauss-kronrod.jl")
@@ -34,6 +34,12 @@ struct Box{n,T<:Real,TI,TE<:Real}
 end
 Base.isless(i::Box, j::Box) = isless(i.E, j.E)
 
+struct ReturnEvalBuffer{
+  S<:Union{Nothing,<:DataStructures.BinaryMaxHeap{<:Box}}}
+
+  buf::S # either a pre-allocateed partition buffer or nothing
+end
+
 cubrule(v::Val{n}, ::Type{T}) where {n,T} = GenzMalik(v, T)
 cubrule(::Val{1}, ::Type{T}) where {T} = GaussKronrod(T)
 
@@ -96,47 +102,88 @@ function hcubature_buffer_(f, a::Tuple{Vararg{Real,n}}, b::Tuple{Vararg{Real,n}}
     hcubature_buffer_(f, SVector{n}(float.(a)), SVector{n}(float.(b)), norm)
 end
 
-function hcubature_(f::F, a::SVector{n,T}, b::SVector{n,T}, norm, rtol_, atol, maxevals, initdiv, buf) where {F, n, T<:Real}
+function hcubature_(
+  f::F, a::SVector{n,T}, b::SVector{n,T}, 
+  norm, rtol_, atol, maxevals, initdiv, 
+  _buf::Union{Nothing,<:DataStructures.BinaryMaxHeap{<:Box},ReturnEvalBuffer},
+  eval_buf::Union{Nothing,<:DataStructures.BinaryMaxHeap{<:Box}}
+  ) where {F, n, T<:Real}
+
+    buf = _buf isa ReturnEvalBuffer ? _buf.buf : _buf
+
     rtol = rtol_ == 0 == atol ? sqrt(eps(T)) : rtol_
     (rtol < 0 || atol < 0) && throw(ArgumentError("invalid negative tolerance"))
     maxevals < 0 && throw(ArgumentError("invalid negative maxevals"))
     initdiv < 1 && throw(ArgumentError("initdiv must be positive"))
 
     rule = cubrule(Val{n}(), T)
-    numevals = evals_per_box = countevals(rule)
-
-    Δ = (b-a) / initdiv
-    b1 = initdiv == 1 ? b : a+Δ
-    I, E, kdiv = rule(f, a,b1, norm)
-    (n == 0 || iszero(prod(Δ))) && return I,E
-    firstbox = Box(a,b1, I,E,kdiv)
-    boxes = (buf===nothing) ? DataStructures.BinaryMaxHeap{typeof(firstbox)}() : (empty!(buf.valtree); buf)
-
-    push!(boxes, firstbox)
+    evals_per_box = countevals(rule)
 
     ma = Base.copymutable(a)
     mb = Base.copymutable(b)
 
-    if initdiv > 1 # initial box divided by initdiv along each dimension
+    if !isnothing(eval_buf)
+
+      isempty(eval_buf) && throw(ArgumentError("eval_buffer must be non-empty"))
+
+      eval_boxes = eval_buf.valtree
+      x = first(eval_boxes).a
+      y = first(eval_boxes).b
+      I, E, kdiv = rule(f, x,y, norm)
+      firstbox = Box(x, y, I, E, kdiv)
+
+      boxes = (buf===nothing) ? DataStructures.BinaryMaxHeap{typeof(firstbox)}() : (empty!(buf.valtree); buf)
+
+      push!(boxes, firstbox)
+
+      skip = true # skip the first box, which we already added
+      for box in eval_boxes
+        if skip; skip=false; continue; end
+        x = box.a
+        y = box.b
+        result_box = Box(x, y, rule(f, x,y, norm)...)
+        push!(boxes, result_box)
+        I += result_box.I
+        E += result_box.E
+      end
+
+      numevals = length(eval_buf.valtree) * evals_per_box
+    else
+      numevals = evals_per_box
+      Δ = (b-a) / initdiv
+      b1 = initdiv == 1 ? b : a+Δ
+      I, E, kdiv = rule(f, a,b1, norm)
+      (n == 0 || iszero(prod(Δ))) && return I,E
+      firstbox = Box(a,b1, I,E,kdiv)
+      boxes = (buf===nothing) ? DataStructures.BinaryMaxHeap{typeof(firstbox)}() : (empty!(buf.valtree); buf)
+
+      push!(boxes, firstbox)
+
+      if initdiv > 1 # initial box divided by initdiv along each dimension
         skip = true # skip the first box, which we already added
         @inbounds for c in CartesianIndices(ntuple(i->Base.OneTo(initdiv), Val{n}())) # Val ntuple loops are unrolled
-            if skip; skip=false; continue; end
-            for i = 1:n
-                ma[i] = a[i]+(c[i]-1)*Δ[i]
-                mb[i] = c[i]==initdiv ? b[i] : a[i]+c[i]*Δ[i]
-            end
-            x = SVector(ma)
-            y = SVector(mb)
-            # this is shorter and has unrolled loops, but somehow creates a type instability:
-            # x = SVector(ntuple(i -> a[i]+(c[i]-1)*Δ[i], Val{n}()))
-            # y = SVector(ntuple(i -> c[i]==initdiv ? b[i] : a[i]+c[i]*Δ[i], Val{n}()))
-            box = Box(x,y, rule(f, x,y, norm)...)
-            I += box.I; E += box.E; numevals += evals_per_box
-            push!(boxes, box)
+          if skip; skip=false; continue; end
+          for i = 1:n
+              ma[i] = a[i]+(c[i]-1)*Δ[i]
+              mb[i] = c[i]==initdiv ? b[i] : a[i]+c[i]*Δ[i]
+          end
+          x = SVector(ma)
+          y = SVector(mb)
+          # this is shorter and has unrolled loops, but somehow creates a type instability:
+          # x = SVector(ntuple(i -> a[i]+(c[i]-1)*Δ[i], Val{n}()))
+          # y = SVector(ntuple(i -> c[i]==initdiv ? b[i] : a[i]+c[i]*Δ[i], Val{n}()))
+          box = Box(x,y, rule(f, x,y, norm)...)
+          I += box.I; E += box.E; numevals += evals_per_box
+          push!(boxes, box)
         end
+      end
     end
 
-    (E ≤ max(rtol*norm(I), atol) || numevals ≥ maxevals) && return I,E
+    Inorm = norm(I)
+
+    if (E ≤ max(rtol*Inorm, atol) || numevals ≥ maxevals || !isfinite(Inorm))
+      return _buf isa ReturnEvalBuffer ? (I, E, boxes) : (I, E)
+    end
 
     @inbounds while true
         # get box with largest error
@@ -169,72 +216,90 @@ function hcubature_(f::F, a::SVector{n,T}, b::SVector{n,T}, norm, rtol_, atol, m
         I += boxes.valtree[i].I
         E += boxes.valtree[i].E
     end
-    return I,E
+
+    return _buf isa ReturnEvalBuffer ? (I, E, boxes) : (I, E)
 end
 
 function hcubature_(f, a::AbstractVector{T}, b::AbstractVector{S},
-                    norm, rtol, atol, maxevals, initdiv, buf) where {T<:Real, S<:Real}
+                    norm, rtol, atol, maxevals, initdiv, buf, eval_buf) where {T<:Real, S<:Real}
     length(a) == length(b) || throw(DimensionMismatch("endpoints $a and $b must have the same length"))
     F = float(promote_type(T, S))
-    return hcubature_(f, SVector{length(a),F}(a), SVector{length(a),F}(b), norm, rtol, atol, maxevals, initdiv, buf)
+    return hcubature_(f, SVector{length(a),F}(a), SVector{length(a),F}(b), norm, rtol, atol, maxevals, initdiv, buf, eval_buf)
 end
-function hcubature_(f, a::Tuple{Vararg{Real,n}}, b::Tuple{Vararg{Real,n}}, norm, rtol, atol, maxevals, initdiv, buf) where {n}
-    hcubature_(f, SVector{n}(float.(a)), SVector{n}(float.(b)), norm, rtol, atol, maxevals, initdiv, buf)
+function hcubature_(f, a::Tuple{Vararg{Real,n}}, b::Tuple{Vararg{Real,n}}, norm, rtol, atol, maxevals, initdiv, buf, eval_buf) where {n}
+    hcubature_(f, SVector{n}(float.(a)), SVector{n}(float.(b)), norm, rtol, atol, maxevals, initdiv, buf, eval_buf)
 end
 
 """
     hcubature(f, a, b; norm=norm, rtol=sqrt(eps), atol=0, maxevals=typemax(Int),
-    initdiv=1, buffer=nothing)
-
-Compute the n-dimensional integral of f(x), where `n == length(a) == length(b)`,
-over the hypercube whose corners are given by the vectors (or tuples) `a` and `b`.
-That is, dimension `x[i]` is integrated from `a[i]` to `b[i]`.  The
-return value of `hcubature` is a tuple `(I, E)` of the estimated integral
-`I` and an estimated error `E`.
-
-`f` should be a function `f(x)` that takes an n-dimensional vector `x`
-and returns the integrand at `x`.   The integrand can be any type that supports
-`+`, `-`, `*` real, and `norm` functions.  For example, the integrand
-can be real or complex numbers, vectors, matrices, etcetera.
-
-The integrand `f(x)` will be always be passed an `SVector{n,T}`,
-where `SVector` is an efficient vector type defined in the `StaticArrays`
-package and `T` is a floating-point type determined by promoting
-the endpoint `a` and `b` coordinates to a floating-point type.
-(Your integrand `f` should be type-stable: it should always return
-a value of the same type, given this type of `x`.)
+    initdiv=1, buffer=nothing, eval_buffer=nothing)
+
+Compute the n-dimensional integral of f(x), where `n == length(a) ==
+length(b)`, over the hypercube whose corners are given by the vectors (or
+tuples) `a` and `b`. That is, dimension `x[i]` is integrated from `a[i]` to
+`b[i]`.  The return value of `hcubature` is a tuple `(I, E)` of the estimated
+integral `I` and an estimated error `E`.
+
+`f` should be a function `f(x)` that takes an n-dimensional vector `x` and
+returns the integrand at `x`.   The integrand can be any type that supports
+`+`, `-`, `*` real, and `norm` functions.  For example, the integrand can be
+real or complex numbers, vectors, matrices, etcetera.
+
+The integrand `f(x)` will be always be passed an `SVector{n,T}`, where
+`SVector` is an efficient vector type defined in the `StaticArrays` package and
+`T` is a floating-point type determined by promoting the endpoint `a` and `b`
+coordinates to a floating-point type. (Your integrand `f` should be
+type-stable: it should always return a value of the same type, given this type
+of `x`.)
 
 The integrand will never be evaluated exactly at the boundaries of the
-integration volume.  (So, for example, it is possible to have an
-integrand that blows up at the boundaries, as long as the integral
-is finite, though such singularities will slow convergence.)
-
-The integration volume is adaptively subdivided, using a cubature
-rule due to Genz and Malik (1980), until the estimated error `E`
-satisfies `E ≤ max(rtol*norm(I), atol)`, i.e. `rtol` and `atol` are
-the relative and absolute tolerances requested, respectively.
-It also stops if the number of `f` evaluations exceeds `maxevals`.
-If neither `atol` nor `rtol` are specified, the
-default `rtol` is the square root of the precision `eps(T)`
-of the coordinate type `T` described above.
-Initially, the volume is divided into `initdiv` segments along each dimension.
+integration volume.  (So, for example, it is possible to have an integrand that
+blows up at the boundaries, as long as the integral is finite, though such
+singularities will slow convergence.)
+
+The integration volume is adaptively subdivided, using a cubature rule due to
+Genz and Malik (1980), until the estimated error `E` satisfies `E ≤
+max(rtol*norm(I), atol)`, i.e. `rtol` and `atol` are the relative and absolute
+tolerances requested, respectively. It also stops if the number of `f`
+evaluations exceeds `maxevals`. If neither `atol` nor `rtol` are specified, the
+default `rtol` is the square root of the precision `eps(T)` of the coordinate
+type `T` described above. Initially, the volume is divided into `initdiv`
+segments along each dimension.
 
 The error is estimated by `norm(I - I′)`, where `I′` is an alternative
-estimated integral (via an "embedded" lower-order cubature rule.)
-By default, the norm function used (for both this and the convergence
-test above) is `norm`, but you can pass an alternative norm by
-the `norm` keyword argument.  (This is especially useful when `f`
-returns a vector of integrands with different scalings.)
+estimated integral (via an "embedded" lower-order cubature rule.) By default,
+the norm function used (for both this and the convergence test above) is
+`norm`, but you can pass an alternative norm by the `norm` keyword argument.
+(This is especially useful when `f` returns a vector of integrands with
+different scalings.)
 
 In normal usage, `hcubature(...)` will allocate a buffer for internal
 computations. You can instead pass a preallocated buffer allocated using
-[`hcubature_buffer'](@ref) as the `buffer` argument. This buffer can be used across
-multiple calls to avoid repeated allocation.
+[`hcubature_buffer'](@ref) as the `buffer` argument. This buffer can be used
+across multiple calls to avoid repeated allocation.
+
+An existing partition of the domain can be reused by passing to `eval_buffer`
+kwarg, which then is used as the starting partition for the adaptive
+quadrature. Note that in such a case, `initdiv` is ignored and checks are not
+performed to make sure that the partition is non-overlapping and covers the
+domain of interest.  
 """
 hcubature(f, a, b; norm=norm, rtol::Real=0, atol::Real=0,
-                   maxevals::Integer=typemax(Int), initdiv::Integer=1, buffer=nothing) =
-    hcubature_(f, a, b, norm, rtol, atol, maxevals, initdiv, buffer)
+                   maxevals::Integer=typemax(Int), initdiv::Integer=1, buffer=nothing, eval_buffer=nothing) =
+    hcubature_(f, a, b, norm, rtol, atol, maxevals, initdiv, buffer, eval_buffer)
+
+"""
+    hcubature_return_evalbuf(f, a, b; kws...)
 
+Identical to [`hcubature`](@ref) but returns a triple `(I, E, buffer)` of 
+the estimated integral `I`, the estimated error bound `E`, and the partition
+`buffer` achieved during the integration process.
+
+The returned `buffer` can be reused through `eval_buffer` kwarg as the starting
+partition for the adaptive quadrature.
+"""
+hcubature_return_evalbuf(args...; buffer=nothing, kws...) =
+  hcubature(args...; buffer=ReturnEvalBuffer(buffer), kws...)
 
 """
     hcubature_count(f, a, b; kws...)
@@ -251,13 +316,24 @@ to improve the convergence rate.
 """
 function hcubature_count(f, a, b; kws...)
     count = Ref(0)
-    I, E = hcubature(a, b; kws...) do x
+    res = hcubature(a, b; kws...) do x
         count[] += 1
         f(x)
     end
-    return (I, E, count[])
+    return (res..., count[])
 end
 
+"""
+    hcubature_return_evalbuf_count(f, a, b; kws...)
+
+Identical to [`hcubature_return_evalbuf`](@ref) but returns `(I, E, buffer,
+count)` of the estimated integral `I`, the estimated error bound `E`, the
+partition `buffer` achieved during the integration process and a `count` of the
+number of times the integrand `f` was evaluated.
+"""
+hcubature_return_evalbuf_count(args...; buffer=nothing, kws...) = 
+  hcubature_count(args...; buffer=ReturnEvalBuffer(buffer), kws...)
+
 """
     hcubature_print([io], f, a, b; kws...)
 
@@ -281,6 +357,26 @@ hcubature_print(io::IO, f, a, b; kws...) = hcubature_count(a, b; kws...) do x
 end
 hcubature_print(f, a, b; kws...) = hcubature_print(stdout, f, a, b; kws...)
 
+"""
+    hcubature_return_evalbuf_print([io], f, a, b; kws...)
+
+Identical to [`hcubature_return_evalbuf`](@ref), but **prints** each integrand
+evaluation to the stream `io` (defaults to `stdout`) in the form:
+```
+f(x1) = y1
+f(x2) = y2
+...
+```
+which is useful for pedagogy and debugging.
+
+Also, like [`hcubature_return_evalbuf_count`](@ref), it returns a `(I, E,
+buffer, count)` of the estimated integral `I`, the estimated error bound `E`,
+the partition `buffer` achieved during the integration process and a `count` of
+the number of times the integrand `f` was evaluated.
+"""
+hcubature_return_evalbuf_print(args...; buffer=nothing, kws...) =
+  hcubature_print(args...; buffer=ReturnEvalBuffer(buffer), kws...)
+
 """
     hquadrature(f, a, b; norm=norm, rtol=sqrt(eps), atol=0, maxevals=typemax(Int), initdiv=1)
 
@@ -297,9 +393,9 @@ and call the [`quadgk`](@ref) function, which provides additional flexibility
 e.g. in choosing the order of the quadrature rule.
 """
 function hquadrature(f, a::T, b::S; norm=norm, rtol::Real=0, atol::Real=0,
-                     maxevals::Integer=typemax(Int), initdiv::Integer=1, buffer=nothing) where {T<:Real, S<:Real}
+                     maxevals::Integer=typemax(Int), initdiv::Integer=1, buffer=nothing, eval_buffer=nothing) where {T<:Real, S<:Real}
     F = float(promote_type(T, S))
-    hcubature_(x -> f(x[1]), SVector{1,F}(a), SVector{1,F}(b), norm, rtol, atol, maxevals, initdiv, buffer)
+    hcubature_(x -> f(x[1]), SVector{1,F}(a), SVector{1,F}(b), norm, rtol, atol, maxevals, initdiv, buffer, eval_buffer)
 end
 
 end # module
diff --git a/src/genz-malik.jl b/src/genz-malik.jl
index fa6b38a..fbdb198 100644
--- a/src/genz-malik.jl
+++ b/src/genz-malik.jl
@@ -70,7 +70,7 @@ const gmcache_lock = ReentrantLock() # thread-safety
 
 # internal code to construct n-dimensional Genz-Malik rule for coordinates of type `T`.
 function _GenzMalik(v::Val{n}, ::Type{T}) where {n, T<:Real}
-    n < 2 && throw(ArgumentError("invalid dimension $n: GenzMalik rule requires dimension > 2"))
+    n < 2 && throw(ArgumentError("invalid dimension $n: GenzMalik rule requires dimension ≥ 2"))
 
     λ₄ = sqrt(9/T(10))
     λ₂ = sqrt(9/T(70))
diff --git a/test/runtests.jl b/test/runtests.jl
index 5da3265..3dcde3a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -139,3 +139,42 @@ end
     @test hquadrature(x -> exp(-x^2), T(0), T(1); rtol = 1e-20)[1] ≈ 0.7468241328124270254
     @test hcubature(x -> exp(-x[1]^2), T.((0,0)), T.((1,1)); rtol = 1e-20)[1] ≈ 0.7468241328124270254
 end
+
+# wrappers to test hcubature_print API without printing anything
+hcubature_printnull(args...; kws...) = hcubature_print(devnull, args...; kws...)
+
+hcubature_return_evalbuf_printnull(args...; kws...) = hcubature_return_evalbuf_print(devnull, args...; kws...)
+
+@testset "eval_buffer" begin
+  osc(x) = sin(33*prod(x) + 5cos(20*sum(x))) # a highly oscillatory function
+  I, E, buf, count = hcubature_return_evalbuf_count(osc, [0.0,0.0], [1.0,1.0])
+  _I, _E, _count = hcubature_count(osc, [0.0,0.0], [1.0,1.0])
+  @test I == _I
+  @test E == _E
+  @test count == _count
+
+  for q in (hcubature, hcubature_count, hcubature_printnull, hcubature_return_evalbuf, hcubature_return_evalbuf_count, hcubature_return_evalbuf_printnull)
+    @test I ≈ q(osc, [0.0,0.0], [1.0,1.0], eval_buffer=buf, maxevals=0)[1] rtol=1e-15
+  end
+
+  I2, E2, buf2 = hcubature_return_evalbuf(x -> sin(x[1] + x[2]), [0.0,0.0], [1.0,1.0], maxevals=0) # 1-interval segbuf
+  @test buf2.valtree == [HCubature.Box(SVector((0.0,0.0)), SVector(1.0,1.0), I2, E2, only(buf2.valtree).kdiv)]
+
+  hcubature(osc, [0.0,0.0], [1.0,1.0], buffer=buf2)
+  @test buf.valtree == buf2.valtree
+
+  hcubature(x -> sin(x[1] + x[2]), [0.0,0.0], [1.0,1.0], buffer=buf2, maxevals=0)
+  @test buf2.valtree == [HCubature.Box(SVector((0.0,0.0)), SVector(1.0,1.0), I2, E2, only(buf2.valtree).kdiv)]
+
+  I3, E3, count3 =  hcubature_count(osc, [0.0,0.0], [1.0,1.0], buffer=buf2, eval_buffer=buf, maxevals=0)
+  @test buf2.valtree == buf.valtree && buf2 !== buf
+  @test I ≈ I3 rtol=1e-15
+  @test E ≈ E3 rtol=1e-15
+  @test count3 == length(buf)*17
+
+  # the partition should already meet the stopping criteria
+  @test hcubature_count(osc, [0.0,0.0], [1.0,1.0], eval_buffer=buf)[3] == length(buf)*17
+
+  # reusing for vector valued integral 
+  @test I ≈ hcubature_count(x -> [sin(100*sum(x)), osc(x)], [0.0,0.0], [1.0,1.0], eval_buffer=buf, maxevals=0)[1][2] rtol=1e-15
+end