library(httr)
library(jsonlite)
library(spatstat.geom)
library(mgcv)
library(hawkesbow)

## --- data ---

# pull USGS earthquake data near The Geysers (CA) over 2025-01-01 to 2026-02-01
base <- "https://earthquake.usgs.gov/fdsnws/event/1/query"
q <- list(
  format = "geojson",
  starttime = "2025-01-01",
  endtime   = "2026-02-01",
  latitude  = 38.78,
  longitude = -122.75,
  maxradiuskm = 30,
  minmagnitude = 2.0,
  orderby = "time-asc",
  limit = 20000
)
h <- handle(base)
res <- GET(base, query = q, handle = h)
stop_for_status(res)
x <- content(res, as = "text", encoding = "UTF-8")
gj <- fromJSON(x, simplifyVector = FALSE)

# flatten features
feat <- gj$features
df <- data.frame(
  time = as.POSIXct(vapply(feat, \(f) f$properties$time, numeric(1)) / 1000,
                    origin = "1970-01-01", tz = "UTC"),
  mag  = vapply(feat, \(f) f$properties$mag,  numeric(1)),
  place= vapply(feat, \(f) f$properties$place, character(1)),
  lon  = vapply(feat, \(f) f$geometry$coordinates[[1]], numeric(1)),
  lat  = vapply(feat, \(f) f$geometry$coordinates[[2]], numeric(1)),
  depth_km = vapply(feat, \(f) f$geometry$coordinates[[3]], numeric(1))
)

# raster plot
t <- sort(df$time)
plot(range(t), c(0, 1), type = "n",
     yaxt = "n", ylab = "", xlab = "Time",
     main = "Event raster")
abline(v = t, col = "grey40", lwd = 0.5)

## --- homogeneous poisson process ---

df <- df[order(df$time), ]

# count process
plot(df$time, seq_len(nrow(df)), type = "s",
     xlab = "Time", ylab = expression(N[t]))

# arrival and interarrival times
t <- sort(as.numeric(df$time))
dt <- diff(t)

# MLE for intensity
lambda_hat <- 1 / mean(dt)

# histogram + fitted exponential density
hist(dt, breaks = 50, freq = FALSE,
     main = "Interarrival times", xlab = "dt (seconds)")
curve(dexp(x, rate = lambda_hat), add = TRUE, lty = 2)

# Q-Q plot relative to exponential distribution (log scale): see "bursts"
qqplot(qexp(ppoints(length(dt)), rate = lambda_hat), dt, log = "xy",
       xlab = "Fitted", ylab = "Observed",
       main = "Q-Q vs Exp (log-log)")
abline(0, 1, lty = 2)

## --- nonhomogeneous poisson process ---

# rescale (seconds -> days) and reindex
t0 <- min(t)
t_days <- (t - t0) / 86400
t_event <- t_days

# bin time
breaks <- seq(0, max(t_days), length.out = 201)
y <- hist(t_days, breaks = breaks, plot = FALSE)$counts
dt_bin <- diff(breaks)
t_mid <- (breaks[-1] + breaks[-length(breaks)]) / 2

# data for modeling
dfb <- data.frame(y = y, t = t_mid, dt = dt_bin)

# observed count process on bin edges
N_obs <- findInterval(breaks, t_event)

# fit via Poisson GAM
fit_gam <- gam(y ~ s(t, bs = "cr", k = 60) + offset(log(dt)),
               family = poisson(),
               method = "REML",
               sp = 2,
               data = dfb)

# intensity function
mu_hat_gam <- predict(fit_gam, type = "response")  # counts/bin
lambda_hat_gam <- mu_hat_gam / dfb$dt              # events/day
plot(t_mid, lambda_hat_gam, type = "l",
     xlab = "t (days)", ylab = expression(hat(lambda)(t)),
     main = "Intensity (GAM fit)")

# cumulative intensity
Lambda_hat_gam <- c(0, cumsum(mu_hat_gam))
plot(breaks, Lambda_hat_gam, type = "s", col = "blue",
     xlab = "t (days)", ylab = expression(hat(Lambda)(t)),
     main = "Cumulative intensity (GAM fit)")
lines(breaks, N_obs, type = "s", lwd = 1)

# rescaled interarrival times
u <- diff(approx(breaks, Lambda_hat_gam,
                 xout = t_event, rule = 2, ties = "ordered")$y)

# still see "bursts"
qqplot(qexp(ppoints(length(u)), rate = 1), u, log = "xy",
       xlab = "Expected", ylab = "Observed",
       main = "Q-Q: rescaled interarrivals (GAM fit)")
abline(0, 1, lty = 2)

## --- fit power law kernel hawkes model ---

# model inputs
times <- sort(t_days)
Tmax  <- max(times)

# compute mle
opts <- list(
  xtol_rel = 1e-8,
  ftol_rel = 1e-10,
  maxeval  = 5000,
  print_level = 0
)
fit_h <- mle(times, end = Tmax, kern = "PowerLaw", opts = opts)

# parameter estimates
theta_hat <- fit_h$par  # (eta, mu, shape, scale)

# plot conditional intensity
grid <- seq(0, Tmax, length.out = 2000)
lam_h <- intensity(times, grid,
                   fun = theta_hat[2], repr = theta_hat[1],
                   family = "powerlaw", shape = theta_hat[3], scale = theta_hat[4])
plot(grid, log(lam_h), type = "l",
     xlab = "t (days since start)",
     ylab = "log conditional intensity")
rug(times)

# zoom in a little: can see the decay in intensity after events
plot(grid, log(lam_h), type = "l",
     xlim = c(100, 150),
     ylim = c(0, 0.6),
     xlab = "t (days since start)",
     ylab = "log conditional intensity")
rug(times)

# check diagnostics on rescaled interarrivals (takes a minute)
u <- diff(compensator(times, times,
                      fun = theta_hat[2], repr = theta_hat[1],
                      family = "powerlaw", shape = theta_hat[3], scale = theta_hat[4]))

# better!
c(obs = mean(u < 0.1), exp = pexp(0.1))
qqplot(qexp(ppoints(length(u))), u, log = "xy",
       xlab = "Expected", ylab = "Observed",
       main = "Q-Q Plot (Power-law Hawkes)")
abline(0, 1, lty = 2)
