# ---- Packages ----
library(tidyverse)
library(lubridate)
library(sf)
library(terra)
library(rnaturalearth)
library(elevatr)
library(tidyterra)

# ---- 1) Download and parse the Nelchina Herd GPS data ----
# Source: USGS ScienceBase item 67a287e3d34e05baae1c8710 (1999–2002)
csv_url <- "https://www.sciencebase.gov/catalog/file/get/67a287e3d34e05baae1c8710?f=__disk__b7%2Fc1%2Fab%2Fb7c1ab6bebebabaa2ec5fa0cd2b07a7d8a848064"
csv_path <- file.path('_data/nelchina.csv')
download.file(csv_url, csv_path, mode = "wb")
raw <- readr::read_csv(csv_path, show_col_types = FALSE)
glimpse(raw)

# Parse timestamp; rename columns; drop fixes with missing or infinite coordinates
traj <- raw |>
  mutate(.time = parse_date_time(Location_Timestamp_AK,
                                 orders = c("ymd_HMS", "ymd_HM", "ymd"),
                                 tz = "UTC", quiet = TRUE)) |>
  filter(!is.na(.time)) |>
  rename(.id = Animal_ID, .lon = Longitude, .lat = Latitude) |>
  filter(is.finite(.lon), is.finite(.lat))

# 5 most-observed animals
top_ids <- traj |>
  count(.id) |>
  slice_max(n, n = 5) |>
  pull(.id)

# ---- 2) Build basemap and plot all 5 trajectories ----
trk_xlim <- range(traj$.lon[traj$.id %in% top_ids]) + 2*c(-0.5, 0.5)
trk_ylim <- range(traj$.lat[traj$.id %in% top_ids]) + 2*c(-0.3, 0.3)

# Alaska land boundary
ak <- ne_states(country = "united states of america", returnclass = "sf") |>
  filter(name == "Alaska")

# Major rivers and lakes for interior geographic context
rivers <- ne_download(scale = 50, type = "rivers_lake_centerlines",
                      category = "physical", returnclass = "sf")
lakes  <- ne_download(scale = 50, type = "lakes",
                      category = "physical", returnclass = "sf")

# DEM at zoom 7 (~4.9 km/pixel); compute hillshade from slope and aspect
bbox_sf <- st_bbox(c(xmin = trk_xlim[1], xmax = trk_xlim[2],
                     ymin = trk_ylim[1], ymax = trk_ylim[2]),
                   crs = st_crs(4326)) |>
  st_as_sfc() |>
  st_as_sf()

dem   <- get_elev_raster(bbox_sf, z = 7, src = "aws", clip = "bbox")
dem_r <- rast(dem)
hill  <- shade(
  terrain(dem_r, "slope",  unit = "radians"),
  terrain(dem_r, "aspect", unit = "radians"),
  angle = 40, direction = 315
)

# Clip rivers and lakes to the DEM extent
# Clip to actual DEM extent (elevatr expands bbox to tile boundaries)
rivers <- st_crop(rivers, st_bbox(dem_r))
# lakes  <- st_crop(lakes,  st_bbox(dem_r))

# Reusable layer list: hillshade + land outline + hydrography
basemap <- list(
  geom_spatraster(data = hill, aes(fill = hillshade)),
  scale_fill_distiller(palette = "Greys", na.value = NA, guide = "none"),
  # geom_sf(data = ak,     fill = NA,           color = "grey50",    linewidth = 0.3),
  geom_sf(data = lakes,  fill = "aliceblue",  color = "steelblue", linewidth = 0.2),
  geom_sf(data = rivers, color = "steelblue", linewidth = 0.3, alpha = 0.7)
)

# Plot: trajectories for all 5 top animals
ggplot() +
  basemap +
  geom_path(data = filter(traj, .id %in% top_ids),
            aes(.lon, .lat, group = factor(.id), color = factor(.id)),
            linewidth = 0.5, alpha = 0.9) +
  coord_sf(xlim = trk_xlim, ylim = trk_ylim, crs = 4326) +
  labs(x = NULL, y = NULL, color = "Animal") +
  theme_minimal() +
  theme(panel.grid = element_blank())

# Convert top-animal tracks to sf linestrings for correct reprojection in overview map
traj_lines <- traj |>
  filter(.id %in% top_ids) |>
  arrange(.id, .time) |>
  st_as_sf(coords = c(".lon", ".lat"), crs = 4326) |>
  group_by(.id) |>
  summarise(do_union = FALSE) |>
  st_cast("LINESTRING")

# Plot: Alaska overview with study area bounding box and centroid
ggplot() +
  geom_sf(data = ak, fill = "grey90", color = "grey50", linewidth = 0.3) +
  geom_sf(data = traj_lines, aes(group = factor(.id)),
          linewidth = 0.3, alpha = 0.6) +
  geom_sf(data = bbox_sf, fill = NA, color = "firebrick", linewidth = 0.7) +
  geom_sf(data = st_centroid(bbox_sf), shape = 3, size = 3,
          color = "firebrick", stroke = 1.2) +
  coord_sf(crs = 3338) +
  theme_void() +
  theme(legend.position = "none")

# ---- 3) Pick one individual for analysis ----
one_id <- top_ids[1]

# Single-animal track, sorted by time
trk <- traj |>
  filter(.id == one_id) |>
  arrange(.time)

# Project to Alaska Albers (EPSG:3338) and store absolute coordinates in km
coords <- trk |>
  st_as_sf(coords = c(".lon", ".lat"), crs = 4326) |>
  st_transform(3338) |>
  st_coordinates()

max_gap_days <- 3  # fixes more than this apart start a new segment

trk_proj <- trk |>
  mutate(
    abs_x   = coords[, 1] / 1000,
    abs_y   = coords[, 2] / 1000,
    day     = as.numeric(difftime(.time, .time[1], units = "days")),
    segment = cumsum(c(0, diff(day)) > max_gap_days)
  )

# Plot: full single-animal track on the basemap
ggplot() +
  basemap +
  geom_path(data = trk_proj, aes(.lon, .lat, group = segment), linewidth = 0.4, alpha = 0.7) +
  geom_point(data = slice(trk_proj, 1), aes(.lon, .lat),
             size = 3, shape = 21, fill = "white") +
  coord_sf(xlim = trk_xlim, ylim = trk_ylim, crs = 4326) +
  labs(x = NULL, y = NULL) +
  theme_minimal()

# ---- 4) Global BM estimate — naive fit over the full track ----
# Time and coordinate increments between successive fixes
dt_g <- diff(trk_proj$day)
dx_g <- diff(trk_proj$abs_x)
dy_g <- diff(trk_proj$abs_y)
T_g  <- sum(dt_g)
n_g  <- length(dt_g)

# MLE: drift = total displacement / total time; diffusion from residual increments
mu_x_g    <- sum(dx_g) / T_g
mu_y_g    <- sum(dy_g) / T_g
resid_x_g <- dx_g - mu_x_g * dt_g
resid_y_g <- dy_g - mu_y_g * dt_g
sigma2_g  <- sum(resid_x_g^2 / dt_g + resid_y_g^2 / dt_g) / (2 * n_g)
se_mu_g   <- sqrt(sigma2_g / T_g)

tibble(
  parameter = c("mu_x (km/day)", "mu_y (km/day)", "sigma2 (km2/day)"),
  estimate  = c(mu_x_g, mu_y_g, sigma2_g),
  se        = c(se_mu_g, se_mu_g, sigma2_g * sqrt(2 / (2 * n_g)))
)

# Plot: distance from first fix over time — cyclic pattern argues against a single BM
trk_proj |>
  mutate(dist_first = sqrt((abs_x - abs_x[1])^2 + (abs_y - abs_y[1])^2)) |>
  ggplot(aes(.time, dist_first)) +
  geom_line(linewidth = 0.4) +
  labs(x = NULL, y = "Distance from first fix (km)")

# ---- 5) Origin detection — centroid of pre-departure cluster ----
origin_r    <- 30  # km: search radius around first fix
origin_days <- 50  # days: time window before departure

# Distance from first fix, used only to seed the cluster membership test
dist0 <- sqrt((trk_proj$abs_x - trk_proj$abs_x[1])^2 +
              (trk_proj$abs_y - trk_proj$abs_y[1])^2)

# Average position of fixes near home before the outbound migration starts
origin <- trk_proj |>
  filter(dist0 < origin_r, day < origin_days) |>
  summarise(x = mean(abs_x), y = mean(abs_y),
            lon = mean(.lon), lat = mean(.lat))

# Re-express all coordinates relative to the detected origin
trk_proj <- trk_proj |>
  mutate(
    x_km = abs_x - origin$x,
    y_km = abs_y - origin$y,
    dist = sqrt(x_km^2 + y_km^2)
  )

# Plot: full track with detected origin marked (red cross)
ggplot() +
  basemap +
  geom_path(data = trk_proj, aes(.lon, .lat), linewidth = 0.4, alpha = 0.6) +
  geom_point(data = slice(trk_proj, 1), aes(.lon, .lat),
             size = 3, shape = 21, fill = "white") +
  annotate("point", x = origin$lon, y = origin$lat,
           shape = 3, size = 4, color = "firebrick") +
  coord_sf(xlim = trk_xlim, ylim = trk_ylim, crs = 4326) +
  labs(x = NULL, y = NULL) +
  theme_bw()

# ---- 6) Isolate first complete migration cycle ----
# Cycle boundaries defined by a horizontal threshold on the distance plot:
# start_idx: first fix above threshold (outbound departure)
# end_idx:   first fix back below threshold after the peak (return arrival)
# Both endpoints are at the same distance from origin by construction.
threshold <- 15   # km — adjust to move the cycle boundaries earlier or later

above     <- trk_proj$dist > threshold
peak_idx  <- which.max(trk_proj$dist[trk_proj$day < 200])
post_peak <- which(trk_proj$day > trk_proj$day[peak_idx])

start_idx <- which(above)[1]
end_idx   <- post_peak[which(!above[post_peak])[1]]

# Plot: distance from origin — horizontal threshold + vlines mark cycle boundaries
ggplot(trk_proj, aes(.time, dist)) +
  geom_line(linewidth = 0.4) +
  geom_hline(yintercept = threshold, linetype = "dashed", color = "steelblue") +
  geom_vline(xintercept = trk_proj$.time[start_idx],
             linetype = "dashed", color = "steelblue") +
  geom_vline(xintercept = trk_proj$.time[end_idx],
             linetype = "dashed", color = "steelblue") +
  labs(x = NULL, y = "Distance from origin (km)")

# Subset to the isolated cycle and label each fix as outbound or return
trk1 <- trk_proj[start_idx:end_idx, ] |>
  mutate(leg = if_else(row_number() <= (peak_idx - start_idx + 1),
                       "outbound", "return"))

# Plot: one cycle colored by leg; white = start, grey = end, cross = origin
trk1_xlim <- range(trk1$.lon) + c(-0.3, 0.3)
trk1_ylim <- range(trk1$.lat) + c(-0.2, 0.2)

ggplot() +
  basemap +
  geom_path(data = trk1, aes(.lon, .lat, color = leg, group = 1),
            linewidth = 0.5, alpha = 0.9) +
  geom_point(data = slice(trk1, 1),   aes(.lon, .lat),
             color = "black", size = 3, shape = 21, fill = "white") +
  geom_point(data = slice(trk1, n()), aes(.lon, .lat),
             color = "black", size = 3, shape = 21, fill = "grey60") +
  annotate("point", x = origin$lon, y = origin$lat,
           shape = 3, size = 4, color = "firebrick") +
  coord_sf(xlim = trk1_xlim, ylim = trk1_ylim, crs = 4326) +
  labs(x = NULL, y = NULL, color = "Leg") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90))

# ---- 7) BM estimation per leg ----
# MLE for drift (mu) and diffusion (sigma2) under isotropic 2D BM
bm_leg <- function(df) {
  t_  <- as.numeric(difftime(df$.time, df$.time[1], units = "days"))
  dt_ <- diff(t_)
  dx_ <- diff(df$x_km)
  dy_ <- diff(df$y_km)
  n_  <- length(dt_)
  T_  <- sum(dt_)
  mu_x_ <- sum(dx_) / T_
  mu_y_ <- sum(dy_) / T_
  resid_x_ <- dx_ - mu_x_ * dt_
  resid_y_ <- dy_ - mu_y_ * dt_
  sig2_  <- sum(resid_x_^2 / dt_ + resid_y_^2 / dt_) / (2 * n_)
  se_mu_ <- sqrt(sig2_ / T_)
  tibble(
    mu_x   = mu_x_,  se_mu_x = se_mu_,
    mu_y   = mu_y_,  se_mu_y = se_mu_,
    sigma2 = sig2_,  se_sig2 = sig2_ * sqrt(2 / (2 * n_))
  )
}

# Fit per leg; results printed below
bm_estimates <- trk1 |>
  group_by(leg) |>
  group_modify(~ bm_leg(.x)) |>
  ungroup()

bm_estimates

# ---- 8) Overlay drift arrows and diffusion circles ----
# Arrow: ref_dist km in drift direction, rooted at each leg's start
# Circle: 1σ spread over t* = ref_dist / |μ| days (same window as the arrow)
ref_dist <- 20

# Starting position (km, origin-relative) for each leg
leg_origins <- trk1 |>
  group_by(leg) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(leg, x0 = x_km, y0 = y_km)

# Arrow tips and circle radii from BM estimates
arrows_df <- bm_estimates |>
  left_join(leg_origins, by = "leg") |>
  mutate(
    mag   = sqrt(mu_x^2 + mu_y^2),
    t_ref = ref_dist / mag,
    x1    = x0 + ref_dist * mu_x / mag,
    y1    = y0 + ref_dist * mu_y / mag,
    r     = sqrt(sigma2 * t_ref)
  )

# Diffusion circles as point sequences
circles_df <- arrows_df |>
  rowwise() |>
  reframe(
    leg = leg,
    cx  = x1 + r * cos(seq(0, 2 * pi, length.out = 120)),
    cy  = y1 + r * sin(seq(0, 2 * pi, length.out = 120))
  )

# Plot: trajectory with drift arrows and diffusion circles
ggplot(trk1, aes(x_km, y_km, color = leg, group = 1)) +
  geom_path(linewidth = 0.5, alpha = 0.7) +
  geom_point(data = slice(trk1, 1), color = "black", size = 3, shape = 21, fill = "white") +
  geom_path(data = circles_df, aes(cx, cy, color = leg, group = leg), linewidth = 0.8) +
  geom_segment(
    data = arrows_df,
    aes(x = x0, y = y0, xend = x1, yend = y1, color = leg),
    arrow = arrow(length = unit(0.2, "cm"), type = "closed"),
    linewidth = 1
  ) +
  annotate("text", x = arrows_df$x1[1] + 1, y = arrows_df$y1[1] - 4,
           label = paste0("drift (", ref_dist, " km) + \u03c3 over t*"),
           size = 2.8, hjust = 0, color = "grey30") +
  coord_equal() +
  labs(x = "Easting from origin (km)", y = "Northing from origin (km)", color = "Leg")

# ---- 9) Simplified BBMM — diffusion estimate and utilization distribution ----
# Estimate sigma2 under the BB model: expected increment at each step is
# the straight-line guidance toward the endpoint, not zero drift as in BM.
t_bb    <- as.numeric(difftime(trk1$.time, trk1$.time[1], units = "days"))
T_bb    <- tail(t_bb, 1)
dt_bb   <- diff(t_bb)
dx_bb   <- diff(trk1$x_km)
dy_bb   <- diff(trk1$y_km)
t_start <- t_bb[-length(t_bb)]
t_end   <- t_bb[-1]
x_end   <- tail(trk1$x_km, 1)
y_end   <- tail(trk1$y_km, 1)

# BB guided drift toward endpoint at each step
mu_x_bb  <- (x_end - trk1$x_km[-nrow(trk1)]) / (T_bb - t_start) * dt_bb
mu_y_bb  <- (y_end - trk1$y_km[-nrow(trk1)]) / (T_bb - t_start) * dt_bb

# BB step variance weight: (T - t_i) / (T - t_{i+1}); exclude steps near endpoint
keep     <- (T_bb - t_start) > 0.1 & (T_bb - t_end) > 0.1
vw       <- (T_bb - t_start) / (T_bb - t_end)
resid_bb <- (dx_bb - mu_x_bb)^2 + (dy_bb - mu_y_bb)^2

sigma2_bb <- sum(resid_bb[keep] * vw[keep] / dt_bb[keep]) / (2 * sum(keep))
cat("BB sigma2 estimate:", round(sigma2_bb, 2), "km²/day\n")

# Step midpoints and BB midpoint variances
steps_bb <- trk1 |>
  arrange(.time) |>
  mutate(
    dt  = c(diff(as.numeric(difftime(.time, .time[1], units = "days"))), NA),
    mx  = (x_km + lead(x_km)) / 2,
    my  = (y_km + lead(y_km)) / 2,
    var = sigma2_bb * dt / 4
  ) |>
  filter(!is.na(dt))

# Evaluate UD on a grid; only the running sum is held in memory
pad <- 20  # km padding around cycle extent
gx  <- seq(min(trk1$x_km) - pad, max(trk1$x_km) + pad, length.out = 150)
gy  <- seq(min(trk1$y_km) - pad, max(trk1$y_km) + pad, length.out = 150)

ud <- Reduce("+", lapply(seq_len(nrow(steps_bb)), function(i) {
  v <- steps_bb$var[i]
  outer(
    exp(-(gx - steps_bb$mx[i])^2 / (2 * v)),
    exp(-(gy - steps_bb$my[i])^2 / (2 * v))
  ) / (2 * pi * v)
}))

ud <- ud / sum(ud)

# Density thresholds for 50% and 95% isopleths
ud_sorted <- sort(ud, decreasing = TRUE)
cum_ud    <- cumsum(ud_sorted)
thresh_50 <- ud_sorted[which(cum_ud >= 0.50)[1]]
thresh_95 <- ud_sorted[which(cum_ud >= 0.95)[1]]

ud_df <- expand.grid(x = gx, y = gy) |>
  mutate(p = as.vector(ud))

# Range area from the UD grid
cell_area <- (gx[2] - gx[1]) * (gy[2] - gy[1])
cat("95% UD range:", round(sum(ud > thresh_95) * cell_area), "km²\n")
cat("50% UD range:", round(sum(ud > thresh_50) * cell_area), "km²\n")

# Plot: single-animal BBMM utilization distribution with 50% and 95% isopleths
ggplot() +
  geom_raster(data = ud_df, aes(x, y, fill = p)) +
  scale_fill_viridis_c(option = "A", guide = "none") +
  geom_contour(data = ud_df, aes(x, y, z = p),
               breaks = c(thresh_50, thresh_95),
               color = "white", linewidth = 0.5) +
  geom_path(data = trk1, aes(x_km, y_km),
            linewidth = 0.3, alpha = 0.5, color = "white") +
  annotate("point", x = 0, y = 0, shape = 3, size = 4, color = "firebrick") +
  coord_equal() +
  labs(x = "Easting from origin (km)", y = "Northing from origin (km)")

