Preparing data for prediction

using SpeciesDistributionToolkit
using CairoMakie
spatial_extent = (left = 5.0, bottom = 57.5, right = 10.0, top = 62.7)
(left = 5.0, bottom = 57.5, right = 10.0, top = 62.7)
rangifer = taxon("Rangifer tarandus tarandus"; strict = false)
query = [
    "occurrenceStatus" => "PRESENT",
    "hasCoordinate" => true,
    "decimalLatitude" => (spatial_extent.bottom, spatial_extent.top),
    "decimalLongitude" => (spatial_extent.left, spatial_extent.right),
    "limit" => 300,
]
presences = occurrences(rangifer, query...)
for i in 1:3
    occurrences!(presences)
end
dataprovider = RasterData(CHELSA1, BioClim)

varnames = layerdescriptions(dataprovider)
Dict{String, String} with 19 entries:
  "BIO8"  => "Mean Temperature of Wettest Quarter"
  "BIO14" => "Precipitation of Driest Month"
  "BIO16" => "Precipitation of Wettest Quarter"
  "BIO18" => "Precipitation of Warmest Quarter"
  "BIO19" => "Precipitation of Coldest Quarter"
  "BIO10" => "Mean Temperature of Warmest Quarter"
  "BIO12" => "Annual Precipitation"
  "BIO13" => "Precipitation of Wettest Month"
  "BIO2"  => "Mean Diurnal Range (Mean of monthly (max temp - min temp))"
  "BIO11" => "Mean Temperature of Coldest Quarter"
  "BIO6"  => "Min Temperature of Coldest Month"
  "BIO4"  => "Temperature Seasonality (standard deviation ×100)"
  "BIO17" => "Precipitation of Driest Quarter"
  "BIO7"  => "Temperature Annual Range (BIO5-BIO6)"
  "BIO1"  => "Annual Mean Temperature"
  "BIO5"  => "Max Temperature of Warmest Month"
  "BIO9"  => "Mean Temperature of Driest Quarter"
  "BIO3"  => "Isothermality (BIO2/BIO7) (×100)"
  "BIO15" => "Precipitation Seasonality (Coefficient of Variation)"
layers = [
    convert(
        SimpleSDMResponse,
        1.0SimpleSDMPredictor(dataprovider; spatial_extent..., layer = lname),
    ) for
    lname in keys(varnames)
]
originallayers = deepcopy(layers)
layers (generic function with 7 methods)
presenceonly = mask(layers[1], presences, Bool)
absenceonly = SpeciesDistributionToolkit.sample(
    pseudoabsencemask(SurfaceRangeEnvelope, presenceonly),
    250,
)
replace!(presenceonly, false => nothing)
replace!(absenceonly, false => nothing)
for cell in absenceonly
    presenceonly[cell.longitude, cell.latitude] = false
end

for i in eachindex(layers)
    keys_to_void = setdiff(keys(layers[i]), keys(presenceonly))
    for k in keys_to_void
        layers[i][k] = nothing
    end
end

layers
refs = Ref.([layers..., presenceonly])

datastack = SimpleSDMStack([values(varnames)..., "Presence"], refs)

predictionstack = SimpleSDMStack([values(varnames)...], Ref.(originallayers))
using DataFrames
DataFrame(datastack)
using MLJ
y, X = unpack(select(DataFrame(datastack), Not([:longitude, :latitude])), ==(:Presence));
y = coerce(y, Continuous)
Standardizer = @load Standardizer pkg = MLJModels add = true verbosity = 0
LM = @load LinearRegressor pkg = MLJLinearModels add = true verbosity = 0
model = Standardizer() |> LM()
DeterministicPipeline(
  standardizer = Standardizer(
        features = Symbol[], 
        ignore = false, 
        ordered_factor = false, 
        count = false), 
  linear_regressor = LinearRegressor(
        fit_intercept = true, 
        solver = nothing), 
  cache = true)
mach = machine(model, X, y) |> fit!
perf_measures = [mcc, f1score, accuracy, balanced_accuracy]
evaluate!(
    mach;
    resampling = CV(; nfolds = 3, shuffle = true, rng = Xoshiro(234)),
    measure = perf_measures,
)
value = predict(mach, select(DataFrame(predictionstack), Not([:longitude, :latitude])));
nothing #hide
prediction = select(DataFrame(predictionstack), [:longitude, :latitude]);
prediction.value = value;
nothing #hide
output = Tables.materializer(SimpleSDMResponse)(prediction)
heatmap(sprinkle(output)...; colormap = :viridis)
scatter!(longitudes(presences), latitudes(presences))
current_figure()

This page was generated using Literate.jl.