source('https://ruizt.quarto.pub/stat545/scripts/long-run-sim-fns.R')

## Part 1: two-state chain -----------------------------------------------------

P_twostate <- function(p, q) {
  matrix(c(p, 1 - p,
           1 - q, q),
         nrow = 2, byrow = TRUE)
}

## Case 1 (strong limit): fast-mixing two-state chain
##
## Prompt: Choose p and q so the chain is NOT deterministic and NOT “sticky”.
## In particular, avoid p or q equal to 0 or 1. (Example: p = 0.4, q = 0.8.)
## Then run the plots and notice that:
## - empirical marginals (across simulations at fixed n) stabilize as n increases
## - time averages also stabilize to the same value

P <- P_twostate(p = 0.6, q = 0.7)

paths <- sim_mc_many(P,
                     nsim = 10000,
                     nstep = 1000,
                     init = c(0.5, 0.5))

# (1) A few raw paths
plot_paths(paths,
           n_max = 100,
           jitter = 0.01)

# (2) Empirical marginals over time: proportion of simulations in each state at time n
plot_empirical_marginals(paths,
                         states = 1,
                         n_max = 200,
                         alpha = 0.8)

# (3) Per-path running time averages (state 1)
plot_timeavgs(paths,
              states = 1,
              n_max = 400,
              jitter = 0)

# (4) Average running time averages across paths (state 1)
plot_avg_timeavgs(paths,
                  states = 1,
                  n_max = 400)

## Discussion questions for case 1:
## Q1. What is each plot showing (paths vs marginals vs time averages)? Which one is estimating the limiting distribution?
## Q2. Do the empirical marginals and the average running time average agree near n_max?
## Q3. Further explorations (optional): increase n_max, show both states (states = c(1, 2)), vary the initial distribution.


## Case 2 (no limit): oscillating two-state chain
##
## Prompt: Choose p and q so the chain alternates deterministically between states:
##   P(1 -> 2) = 1 and P(2 -> 1) = 1.
## In terms of p = P(1->1) and q = P(2->2), what values make that happen?
## Then inspect the same plots as before and notice/discuss what changes.

P_osc <- P_twostate(p = ..., q = ...)

paths_osc <- sim_mc_many(P_osc,
                         nsim = 10000,
                         nstep = 500,
                         init = c(0.3, 0.7))

# (1) A few raw paths
plot_paths(paths_osc,
           n_max = 100,
           jitter = 0.01)

# (2) Empirical marginals over time (will oscillate)
plot_empirical_marginals(paths_osc,
                         states = c(1, 2),
                         n_max = 20,
                         alpha = 0.8)

# (3) Per-path running time averages (state 1)
plot_timeavgs(paths_osc,
              states = 1,
              n_max = 200,
              jitter = 0)

# (4) Average running time averages across paths (state 1)
plot_avg_timeavgs(paths_osc,
                  states = 1,
                  n_max = 200)

## Discussion question (Case 2)
## Q1. As n increases, which plots still stabilize and which do not?


## Part 2: Random walks on a cycle graph with 4 states -------------------------

# transition matrix
# Graph: 1--2--3--4--1
P_rw <- matrix(c(
  0,   1/2, 0,   1/2,
  1/2, 0,   1/2, 0,
  0,   1/2, 0,   1/2,
  1/2, 0,   1/2, 0
), nrow = 4, byrow = TRUE)


## Case 1: (non-lazy random walk)
##
## Prompt: Inspect the plots below. The chain alternates between the two classes
## {1,3} and {2,4}, so empirical marginals at time n can keep oscillating.

paths_rw <- sim_mc_many(P_rw,
                        nsim = 10000,
                        nstep = 500,
                        x0 = 1)

# (1) A few raw paths
plot_paths(paths_rw,
           n_max = 100,
           jitter = 0.01)

# (2) Empirical marginals over time
plot_empirical_marginals(paths_rw,
                         states = 1:4,
                         n_max = 20,
                         alpha = 0.6)

# (3) Per-path running time averages
plot_timeavgs(paths_rw,
              states = 1:4,
              n_max = 400,
              jitter = 0)

# (4) Average running time averages across paths
plot_avg_timeavgs(paths_rw,
                  states = 1:4,
                  n_max = 100,
                  alpha = 0.6)

## Discussion question (Case 1)
## Q1. As n increases, which plots stabilize and which do not?


## Case 2: lazy random walk
##
## Prompt: Modify the walk so it stays put with probability 1/2 at each step.
## Inspect the same plots as before and compare.

P_lazy <- 0.5 * diag(4) + 0.5 * P_rw

paths_lazy <- sim_mc_many(P_lazy,
                          nsim = 10000,
                          nstep = 500,
                          x0 = 1)

# (1) A few raw paths
plot_paths(paths_lazy,
           n_max = 100,
           jitter = 0.01)

# (2) Empirical marginals over time (should stabilize now)
plot_empirical_marginals(paths_lazy,
                         states = 1:4,
                         n_max = 20,
                         alpha = 0.8)

# (3) Per-path running time averages
plot_timeavgs(paths_lazy,
              states = 1:4,
              n_max = 400,
              jitter = 0)

# (4) Average running time averages across paths
plot_avg_timeavgs(paths_lazy,
                  states = 1:4,
                  n_max = 100)

## Discussion question (Case 2)
## Q1. Compared to the non-lazy walk, which plots change behavior the most, and what is different?