%% stochastic_dominance_test takes latitutdes of samples, x, at an early period and latitudes of individuals, y
% at a later period, with respective abundances at those locations ax and ay, and tests for stochastic dominance of x to y (e.g. if
% F(z) is the df of x and G(z) is the df of y, is F>=G for all z
% last input, n_bs, is the number of bootstraps that are desired
function [p] = stochastic_dominance_test(x,y,ax,ay,n_bs)
nx = length(x);
ny = length(y);
%% Calculate empirical distribution functions
[fx,locs] = edf(x,y,ax);
[fy,~] = edf(y,x,ay);
%% Observed test statistic calulation
temp1 = fy-fx;
tOBS = max(temp1); % one-side Kolmogorov-Smirnov
%% Bootstrapping
fxO = fx; % copies empirical df, which is subsequently ordered
fyO = fy;
for j = 1:length(fx)
if fx(j) < fy(j)
fxO(j) = (fx(j) +fy(j))/2;
fyO(j) = fxO(j);
end
end
temp = zeros(1,n_bs); % pre-allocation
for i = 1:n_bs
pp = fxO(2:end) - [0; fxO(2:end-1)]; % gets the probability density for each lat associated with x
r = mnrnd(nx,pp); % r gives how many times each of those numbers was chosen
[F,~] = edf(locs(1:end-1),locs(1:end-1),r);
pp = fyO(2:end) - [0; fyO(2:end-1)]; % gets the probability density for each lat associated with y
r = mnrnd(ny,pp); % r gives how many times each of those numbers was chosen
[G,~] = edf(locs(1:end-1),locs(1:end-1),r);
temp1 = G - F;
temp(i) = max(temp1); % calculates Kolmogorov-Smirnov (KS) statistic for bootstrapped data
end
p = sum(temp>tOBS)/i; % calculates what proportion of times the bootstrapped test statistic exceeds the observed
p = p + .5*sum(temp==tOBS)/i; % adds 0.5 times the proportion of times the test statstic equals the observed
%% Nested Empirical CDF Calculation
function [EDF,xEDF] = edf(data_locs,desired_locs,abundance) % calculates the empirical cdf of the data (data_locs)...
% at locations (desired_locs) for abundance data with
% correspond with data_locs
locs_total = [data_locs; desired_locs]; % union of all locations
tracker = zeros(size(locs_total));
tracker(1:length(data_locs)) = abundance; % sets a tracker which assigns the abundance value to those which are "data"
[locs_sort,ix] = sort(locs_total); % sorts locations and tracker locs_total(ix) = locs_sort
tracker_sort = tracker(ix);
EDF = cumsum(tracker_sort)/sum(tracker_sort);
% only want unique values (for CvM stat, later)
[xEDF,ia] = unique(locs_sort,'last');
EDF = EDF(ia);
end
end