BRTs for species distribution forecasting

using SimpleSDMLayers
using EvoTrees
using GBIF
using StatsBase
using StatsPlots

Justification for this use case: Boosted Regression Trees (BRTs) are a powerful way to predict the distribution of species. We will see how we can get information in and out of layers to use them, and how to use this model to predict a new distribution under a climate change scenario. This use-case assumes that you have read the manual pages for GBIF integration, future data, and pseudo-absences generation.

We will re-use the data from the pseudo-absences example:

sp = GBIF.taxon("Hypomyces lactifluorum")
observations = occurrences(
    sp, "hasCoordinate" => true, "limit" => 300, "country" => "CA", "country" => "US"
)
while length(observations) < size(observations)
    occurrences!(observations)
end

We will pick the entire BioClim layers at a 10 minutes resolution, and clip them to the observations (this adds a 5 degrees buffer).

layers = [
    clip(layer, observations) for layer in SimpleSDMPredictor(WorldClim, BioClim, 1:19)
];

To remove the sampling effect, we transform the presences to a grid, and generate pseudo-absences using the surface range envelope method.

presences = mask(layers[1], observations, Bool)
absences = rand(SurfaceRangeEnvelope, presences)
SDM response → 278×712 grid with 100109 Bool-valued cells
  Latitudes	24.666666666666668 ⇢ 71.0
  Longitudes	-169.0 ⇢ -50.33333333333333

The next step is to extract coordinates at which the species is present or pseudo-absent - we can rely on the replace method to empty any false values:

xy_presence = keys(replace(presences, false => nothing));
xy_absence = keys(replace(absences, false => nothing));
xy = vcat(xy_presence, xy_absence);

With the xy list of coordinates, we can get a predictor X, and a response y.

X = hcat([layer[xy] for layer in layers]...);
y = vcat(fill(1.0, length(xy_presence)), fill(0.0, length(xy_absence)));

To train the model, we will use a random subset representing 70% of the dataset:

train_size = floor(Int, 0.7 * length(y));
train_idx = sample(1:length(y), train_size; replace=false);
test_idx = setdiff(1:length(y), train_idx);

This gives use the training and testing (or evaluation) sets:

Xtrain, Xtest = X[train_idx, :], X[test_idx, :];
Ytrain, Ytest = y[train_idx], y[test_idx];

In order to fit the tree, we need to define a number of parameters. We will use a Gaussian maximum likelihood tree (from EvoTrees), which will give us a measure of the average prediction, but also the standard deviation. This is important in order to communicate uncertainty.

gaussian_tree_parameters = EvoTreeGaussian(;
    loss=:gaussian,
    metric=:gaussian,
    nrounds=100,
    nbins=100,
    λ=0.0,
    γ=0.0,
    η=0.1,
    max_depth=7,
    min_weight=1.0,
    rowsample=0.5,
    colsample=1.0,
)
EvoTrees.EvoTreeGaussian{Float64, EvoTrees.Gaussian, Int64}(EvoTrees.Gaussian(), 100, 0.0, 0.0, 0.1, 7, 1.0, 0.5, 1.0, 100, 0.5, :gaussian, MersenneTwister(123), "cpu")

We can now fit the BRT. This function has an additional print_every_n to update on the progress every n epochs, but we don't really need it here.

model = fit_evotree(gaussian_tree_parameters, Xtrain, Ytrain; X_eval=Xtest, Y_eval=Ytest)
EvoTrees.GBTree{Float64}(EvoTrees.Tree{Float64}[EvoTrees.Tree{Float64}([0], UInt8[0x00], [0.0], [0.0], [0.49948400412796695; -0.6928896485418512], Bool[0]), EvoTrees.Tree{Float64}([12, 1, 1, 7, 7, 15, 5, 8, 8, 19  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x21, 0x1b, 0x18, 0x52, 0x51, 0x19, 0x52, 0x2b, 0x2e, 0x27  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [646.4200000000001, 1.5581359839439393, 0.6853224754333492, 47.44285461425781, 47.15243927001954, 18.47151470184326, 29.316439170837402, 11.152385101318359, 11.86371458053589, 108.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [178.86371341310337, 3.7495181995774516, 50.38122952797035, 3.268496584496461e-13, 7.249448950578451, 1.1848262930062106, 38.473394477955, 4.973799150320701e-14, 1.1368683772161603e-13, 4.604811255984572  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 0.05005159958720331 -0.033281733746130915; 0.0 0.0 … 7.73328933572838e-5 -9.463615199391286e-5], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 8, 1, 2, 5, 10, 10, 17, 5, 12  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x22, 0x3f, 0x1c, 0x60, 0x50, 0x25, 0x55, 0x2c, 0x4f, 0x17  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [667.0, 15.694537239074707, 1.8183349895477297, 16.37946487426758, 28.76433334350586, 15.650710678100586, 21.921911907196044, 101.27999999999997, 28.475301685333253, 482.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [136.6752234557778, 8.02399661625212, 49.647029715974824, 1.1603951980521998, 10.35114364503951, 4.555323477117913, 31.01260400804891, 0.03981315184967116, 3.065785600055399, 6.445336216525142  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.00046168594914633453 -0.04030119873308357; 0.0 0.0 … 0.004071006808477647 -0.004460066429456947], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 1, 1, 5, 12, 15, 5, 8, 6, 18  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x20, 0x1b, 0x18, 0x2a, 0x12, 0x13, 0x50, 0x40, 0x14, 0x1e  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [628.3599999999997, 1.5581359839439393, 0.6853224754333492, 23.72797492980957, 421.65999999999997, 16.399472370147706, 28.76433334350586, 15.85595977783203, -24.66714973449707, 184.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [114.72323161345722, 2.523679768872441, 41.569243120254725, 0.09582199696694715, 4.696251295721442, 1.7413749664909872, 23.594621954542163, 0.0035133803473925695, 0.05615568304996099, 0.20682209403252827  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 0.011267614485295487 -0.02380069257115437; 0.0 0.0 … -0.0022568682449835373 -0.004284849781826946], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 18, 1, 3, 5, 10, 5, 8, 5, 3  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x24, 0x37, 0x19, 0x55, 0x24, 0x23, 0x5b, 0x40, 0x4f, 0x3e  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [696.0, 258.0, 1.101927012205124, 40.262601661682126, 23.063869857788085, 15.3910315990448, 31.93010467529297, 15.85595977783203, 28.475301685333253, 30.954730567932128  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [102.6866714387809, 12.363463879146963, 24.56274262843047, 2.5956253936903266, 8.225652898789125, 1.6927286344013588, 23.602539356528737, 0.8422531024640421, 6.136785677214025, 0.061009121952143364  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.03788022228795828 -0.04036325718905595; 0.0 0.0 … -0.034960328798781984 -0.024604845701104112], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 18, 1, 5, 10, 15, 5, 6, 10, 5  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x22, 0x37, 0x19, 0x29, 0x32, 0x13, 0x51, 0x39, 0x2c, 0x13  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [667.0, 258.0, 1.101927012205124, 23.639084892272948, 16.9425630569458, 16.399472370147706, 29.022843189239502, -12.348335351943971, 16.421430892944336, 20.297757968902587  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [90.65703946421408, 4.725102577250496, 22.84360785774831, 1.741291202260399, 3.889244476926633, 1.5172341394774733, 19.191428512479654, 0.0535930605633439, 3.8640772274473, 0.02270468602796516  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.0052077286315047105 0.03415237038984266; 0.0 0.0 … -0.003215444974469303 -0.0033553654093828123], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 16, 1, 8, 14, 10, 5, 4, 19, 5  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x22, 0x26, 0x1c, 0x40, 0x20, 0x19, 0x4c, 0x13, 0x2c, 0x4a  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [667.0, 268.0, 1.8183349895477297, 15.85595977783203, 19.0, 14.062718629837036, 27.819721298217782, 745.5309295654297, 134.0, 27.4436754989624  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [94.5261358396656, 4.824879389520888, 24.987058767088293, 1.287690070589008, 2.3354046438713123, 4.384722555869956, 14.922986564205345, 0.7104355750474554, 3.2980913460222006, 5.164900077075329  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 0.02904369335915657 -0.025847130722191206; 0.0 0.0 … -0.0082019287747691 -0.003390865971454143], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 8, 1, 19, 12, 18, 5, 8, 12, 10  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x22, 0x40, 0x17, 0x22, 0x18, 0x5a, 0x4c, 0x3d, 0x1a, 0x56  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [667.0, 15.85595977783203, 0.3868435603380202, 90.0, 496.0, 324.0, 27.819721298217782, 15.40221827507019, 520.0, 22.26404983520509  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [78.23085927197474, 5.478239330747172, 20.217022581670463, 0.9654293525805571, 4.690775064445397, 0.8176912282393971, 14.532308610142962, 0.10277421279049292, 0.6032358280413899, 1.49673193085777  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.04340224733006348 0.05233443847789861; 0.0 0.0 … -0.011951023675821947 0.006481232270706919], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 1, 1, 10, 12, 8, 15, 5, 0, 12  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x21, 0x1a, 0x18, 0x2f, 0x1f, 0x01, 0x37, 0x27, 0x00, 0x14  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [646.4200000000001, 1.2572163724899292, 0.6853224754333492, 16.705495643615723, 607.47, -5.157340440750122, 43.42528972625733, 23.423654708862305, 0.0, 446.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [77.81787338427361, 3.6056255618214124, 27.853354148791603, 0.1126642218831222, 3.7707706315643748, 2.42212612810798, 8.49358578435826, 0.07203486354359256, 0.0, 2.2969730759088662  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.037773517933735366 -0.0499465708999822; 0.0 0.0 … -0.030062768618092175 0.00522081154572688], Bool[1, 1, 1, 1, 1, 1, 1, 1, 0, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([12, 1, 1, 5, 3, 10, 15, 15, 0, 7  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x24, 0x1b, 0x19, 0x2b, 0x25, 0x23, 0x47, 0x1d, 0x00, 0x57  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [696.0, 1.5581359839439393, 1.101927012205124, 23.854774551391603, 26.379132766723632, 15.3910315990448, 53.84332347869873, 19.854459896087647, 0.0, 48.545793228149414  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [78.87654331388, 5.083869841808593, 19.305063866362595, 0.526733973450078, 7.955509881710448, 3.4127820788991965, 10.803203312138812, 0.40389199761924033, 0.0, 1.4028076383231252  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.03444144781068165 -0.03141753156219267; 0.0 0.0 … -0.04071241433051815 -0.054500852916002235], Bool[1, 1, 1, 1, 1, 1, 1, 1, 0, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])  …  EvoTrees.Tree{Float64}([7, 13, 19, 13, 0, 7, 6, 0, 0, 0  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x01, 0x62, 0x04, 0x5a, 0x00, 0x4b, 0x5e, 0x00, 0x00, 0x00  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [18.108540592193602, 368.55999999999995, 29.0, 201.49999999999977, 0.0, 45.33975028991699, 1.5676237344741772, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [9.679226929867191e9, 5.782146453857422, 7.319371895719696e9, 2.0766334533691406, 0.0, 20203.48120689392, 7.213040548251417e9, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 9.502831451244613e-5 -0.0005870180228386459; 0.0 0.0 … 0.04999880812858822 0.04980092324995616], Bool[1, 1, 1, 1, 0, 1, 1, 0, 0, 0  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([18, 17, 5, 9, 12, 10, 15, 14, 10, 19  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x11, 0x2c, 0x15, 0x5b, 0x45, 0x1c, 0x0b, 0x1e, 0x49, 0x55  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [134.0, 101.27999999999997, 20.70873046875, 17.029303874969482, 1077.0, 14.443700294494631, 13.718454446792602, 18.0, 19.074093170166016, 351.45000000000005  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [7.282130557507481e9, 9.212043386392265e9, 5.660177225204886e8, 2.856631802228921e9, 3.0910662085850906e8, 3.065669645595677e8, 2.880982942627146e8, 4.7865224711609685e8, 3.802145001722374e8, 13.420347213745117  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 0.00023348971778654477 0.0011264482828150645; 0.0 0.0 … 0.0499999396047661 0.04999987588281053], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([18, 14, 9, 17, 0, 7, 7, 2, 13, 0  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x07, 0x28, 0x57, 0x23, 0x00, 0x01, 0x08, 0x63, 0x5d, 0x00  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [94.0, 24.0, 15.76389796257019, 77.0, 0.0, 18.108540592193602, 23.444319915771484, 17.805028553009027, 273.4100000000001, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.1025534554884392e10, 2.4557679960960007e9, 8.321698187989653e9, 3.094157320168009e8, 0.0, 5.19479088824311e9, 5.777763525927414e9, 4.340478583990971e6, 158.96329307556152, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 6.176522506077282e-5 0.0009313493858418715; 0.0 0.0 … 0.049999208549340514 0.036224070306125386], Bool[1, 1, 1, 1, 0, 1, 1, 1, 1, 0  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([15, 19, 16, 3, 7, 1, 10, 4, 15, 5  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x47, 0x54, 0x01, 0x4a, 0x07, 0x63, 0x39, 0x1f, 0x0b, 0x1d  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [53.84332347869873, 342.0799999999999, 77.0, 36.11287322998046, 22.95331506729126, 20.57730180740356, 17.490790939331056, 867.824755859375, 13.718454446792602, 21.992626972198487  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [8.042906402144733e9, 1.307887769769802e10, 3.5436205670493326e9, 3.758035015576842e8, 2.4800885183425903e7, 27986.16880130768, 2.0034744915666566e9, 5.402193797207618e7, 2.7143373283384204e7, 5.228498776145935e6  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.00854885481909606 -0.002929843890487212; 0.0 0.0 … 0.04991387652722015 0.04995275238545588], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([9, 15, 15, 6, 9, 2, 2, 8, 10, 8  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x56, 0x0b, 0x45, 0x29, 0x55, 0x0c, 0x09, 0x12, 0x3b, 0x0e  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [15.487316608428959, 13.718454446792602, 52.79184650421142, -16.89569225311279, 15.325755834579468, 8.50373348236084, 8.107656898498535, 4.562804689407348, 17.6848392868042, 3.114600539207459  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.338976496041021e10, 1.4846910514540935e8, 2.1914501462173737e10, 24570.080493092537, 1.2402745372857791e8, 3.3286794392254944e9, 4.250761842700063e9, 12.360970139503479, 221.36282021142097, 6.6429663852876514e7  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 7.87917081127021e-5 0.0016831401905774731; 0.0 0.0 … 0.04999742556722893 0.049999999890484516], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([9, 18, 10, 18, 15, 12, 6, 8, 10, 16  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x56, 0x3f, 0x27, 0x32, 0x55, 0x5c, 0x61, 0x0e, 0x10, 0x38  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [15.487316608428959, 275.0, 15.84905221939087, 248.0, 61.89698467254639, 1601.04, 3.2185150313377364, 3.114600539207459, 12.430464973449707, 303.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [6.696679269455334e9, 3.658485815252006e8, 1.7090722331701584e10, 1.733857640842545e8, 6.278009662521112e8, 12.3450927734375, 3.4520837855011854e9, 1.9491416632004833e8, 1.1373618517429411e8, 1.3296284791975103e7  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 0.000312594577503334 0.0014334308839913626; 0.0 0.0 … 0.03162326867751198 0.04944230659414642], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([15, 4, 7, 0, 13, 1, 13, 0, 0, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x45, 0x03, 0x01, 0x00, 0x55, 0x52, 0x01, 0x00, 0x00, 0x11  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [52.79184650421142, 435.6427667236328, 18.108540592193602, 0.0, 149.0, 10.52863712310791, 28.369999999999997, 0.0, 0.0, 9.166732997894288  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.172223514867563e10, 5.354568623201785e10, 2.893281969028908e9, 0.0, 4.715933555064901e9, 3.975693026123562e8, 1.508002293607286e9, 0.0, 0.0, 1.864157382643543e8  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.02064189762505483 -0.00028084063224418764; 0.0 0.0 … 0.049978761648652135 0.049064482382899025], Bool[1, 1, 1, 0, 1, 1, 1, 0, 0, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([9, 8, 15, 8, 14, 2, 19, 15, 4, 17  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x54, 0x12, 0x4c, 0x11, 0x3e, 0x0f, 0x05, 0x57, 0x2a, 0x22  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [15.098091964721679, 4.562804689407348, 56.708344879150395, 4.152797250747681, 48.0, 8.927967977523803, 32.0, 62.87889995574952, 962.1202697753906, 73.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [4.2568733258641567e9, 1.4764977049244654e8, 8.223937888187996e9, 1.1600354934775472e8, 1.0066887062660855e8, 7.774612449025192e8, 1.4922452517005162e9, 1.0482167884488702e8, 30.06059405207634, 3.2648753624464594e7  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 0.00022443977314957908 0.00019009414199865567; 0.0 0.0 … 0.04999994667759198 0.04999997624535229], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([7, 13, 1, 12, 19, 8, 5, 0, 0, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x03, 0x45, 0x53, 0x1e, 0x62, 0x12, 0x47, 0x00, 0x00, 0x09  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [20.015707969665527, 116.0, 10.802053184509278, 587.2, 963.52, 4.562804689407348, 26.856770477294923, 0.0, 0.0, 8.107656898498535  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.00980254895793e10, 2.494990002927408e10, 1.2024545854344978e9, 1.39324951171875, 4.008164931791705e8, 1.0241451421571854e8, 2.9432810953949547e8, 0.0, 0.0, 7.697799388050143e6  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … 0.0007260340066759796 0.000255442184520386; 0.0 0.0 … 0.04975843610321414 -0.07093365483980194], Bool[1, 1, 1, 1, 1, 1, 1, 0, 0, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), EvoTrees.Tree{Float64}([2, 3, 9, 13, 15, 5, 19, 4, 15, 5  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0], UInt8[0x0a, 0x4e, 0x57, 0x49, 0x4c, 0x12, 0x59, 0x60, 0x17, 0x16  …  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], [8.225842475891113, 37.912544860839844, 15.76389796257019, 120.0, 56.708344879150395, 20.164384956359864, 438.92999999999984, 1562.091123046875, 17.76886869430542, 20.87046058654785  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.9235696109685413e10, 1.0101709382981491e9, 2.3236907692165546e9, 1.630024660731721e7, 7.115264892578125, 9.935329556541269e8, 2.408948458108185e9, 61.3726127649112, 452.85620748996735, 2.049896240234375  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0 0.0 … -0.0003489076420628494 0.00020985233381913285; 0.0 0.0 … 0.049999993110078876 0.049999990476903615], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])], EvoTrees.EvoTreeGaussian{Float64, EvoTrees.Gaussian, Int64}(EvoTrees.Gaussian(), 100, 0.0, 0.0, 0.1, 7, 1.0, 0.5, 1.0, 100, 0.5, :gaussian, MersenneTwister(123, (0, 99198, 98196, 604)), "cpu"), EvoTrees.Metric(16, -0.72379416f0), 2, nothing)

The next step is to gather all the values of all the layers in a matrix, in order to run the full spatial prediction:

all_values = hcat([layer[keys(layer)] for layer in layers]...);

If the matrix is too big, we could resort to a combination of clip, make the prediction on each tile, and then use hcat and vcat to combine them. This is not the case here, so we can predict directly:

pred = EvoTrees.predict(model, all_values);

Once the prediction is done, we can copy the values into a layer.

distribution = similar(layers[1], Float64)
distribution[keys(distribution)] = pred[:, 1]
distribution
SDM response → 278×712 grid with 100109 Float64-valued cells
  Latitudes	24.666666666666668 ⇢ 71.0
  Longitudes	-169.0 ⇢ -50.33333333333333

The BRT is able to calculate a measure of relative gain from the different variables:

top5_var = importance(model, collect(layernames(WorldClim, BioClim)))[1:5]
5-element Vector{Pair{String, Float64}}:
                    "Isothermality" => 0.3174587600615396
    "Precipitation of Driest Month" => 0.25752561560776804
        "Precipitation Seasonality" => 0.21789628730153954
 "Precipitation of Warmest Quarter" => 0.04372830487502854
          "Annual Mean Temperature" => 0.033672694833670275

This is an interesting alternative to VIF for variable selection. Let's examine how the most important variable relates to the predicted distribution score:

most_important_layer = findfirst(isequal(top5_var[1].first), collect(layernames(WorldClim, BioClim)))
histogram(
    layers[most_important_layer][xy_presence]; fill=(0, :teal, 0.2), lc=:teal, frame=:origin, lab="Present"
)
histogram!(
    layers[most_important_layer][xy_absence]; fill=(0, :white, 0.0), frame=:origin, lc=:grey, lab="Absent"
)
xaxis!(layernames(WorldClim, BioClim, most_important_layer))

It is interesting to notice that despite the importance of this predictor, the difference between the presence and absence locations are not as clear as we may expect!

We can similarly extract uncertainty:

uncertainty = similar(layers[1], Float64)
uncertainty[keys(uncertainty)] = pred[:, 2]
uncertainty
SDM response → 278×712 grid with 100109 Float64-valued cells
  Latitudes	24.666666666666668 ⇢ 71.0
  Longitudes	-169.0 ⇢ -50.33333333333333

And we can now visualize the prediction, which we force to be in [0,1].

p_dis = plot(rescale(distribution, (0, 1)); c=:bamako, frame=:box)
scatter!(xy_presence; lab="", c=:black, alpha=0.2, msw=0.0, ms=3)

We can do the same thing for the uncertainty

p_unc = plot(uncertainty; c=:tokyo, frame=:box)

Of course, this prediction is returing values for the entire range of the initial layer, so let's compare the distributions of the prediction score:

histogram(
    distribution[xy_presence]; fill=(0, :teal, 0.2), lc=:teal, frame=:origin, lab="Present"
)
histogram!(
    distribution[xy_absence]; fill=(0, :white, 0.0), frame=:origin, lc=:grey, lab="Absent"
)
xaxis!("Prediction score")

This looks like a good opportunity to do some thresholding. Note that the values are not moved back to the unit range, because we'll need the raw values for a little surprise later on. We will find the value of the score that optimizes Youden's J (Cohen's κ is also a suitable alternative):

cutoff = LinRange(extrema(distribution)..., 500);

obs = y .> 0

tp = zeros(Float64, length(cutoff));
fp = zeros(Float64, length(cutoff));
tn = zeros(Float64, length(cutoff));
fn = zeros(Float64, length(cutoff));

for (i, c) in enumerate(cutoff)
    prd = distribution[xy] .>= c
    tp[i] = sum(prd .& obs)
    tn[i] = sum(.!(prd) .& (.!obs))
    fp[i] = sum(prd .& (.!obs))
    fn[i] = sum(.!(prd) .& obs)
end

From this, we can calculate a number of validation measures:

tpr = tp ./ (tp .+ fn);
fpr = fp ./ (fp .+ tn);
J = (tp ./ (tp .+ fn)) + (tn ./ (tn .+ fp)) .- 1.0;
ppv = tp ./ (tp .+ fp);

The ROC-AUC is an overall measure of how good the fit is:

dx = [reverse(fpr)[i] - reverse(fpr)[i - 1] for i in 2:length(fpr)]
dy = [reverse(tpr)[i] + reverse(tpr)[i - 1] for i in 2:length(tpr)]
AUC = sum(dx .* (dy ./ 2.0))
0.9638736331765043

We can pick the value of the cutoff that maximizes J:

thr_index = last(findmax(J))
τ = cutoff[thr_index]
0.5312432697027681

Let's have a look at the ROC curve:

plot(fpr, tpr; aspectratio=1, frame=:box, lab="", dpi=600, size=(400, 400))
scatter!([fpr[thr_index]], [tpr[thr_index]]; lab="", c=:black)
plot!([0, 1], [0, 1]; c=:grey, ls=:dash, lab="")
xaxis!("False positive rate", (0, 1))
yaxis!("True positive rate", (0, 1))

And the precision-recall as well:

plot(tpr, ppv; aspectratio=1, frame=:box, lab="", dpi=600, size=(400, 400))
scatter!([tpr[thr_index]], [ppv[thr_index]]; lab="", c=:black)
plot!([0, 1], [1, 0]; c=:grey, ls=:dash, lab="")
xaxis!("True positive rate", (0, 1))
yaxis!("Positive predictive value", (0, 1))

We can now map the result using τ as a cutoff for the distribution data:

range_mask = broadcast(v -> v >= τ, distribution)
SDM response → 278×712 grid with 100109 Bool-valued cells
  Latitudes	24.666666666666668 ⇢ 71.0
  Longitudes	-169.0 ⇢ -50.33333333333333

And finally, plot the whole thing:

plot(distribution; c=:lightgrey, leg=false)
plot!(mask(range_mask, distribution); c=:darkgreen)
scatter!(xy_presence; lab="", c=:orange, alpha=0.5, msw=0.0, ms=2)