MDP.InducedMarkovChain

@[simp]

theorem MDP.P'_get {State : Type u_1} {Act : Type u_2} {s : State} {α : Act} {s' : State} {M : MDP State Act} {h : (M.P' s α).isSome = true} :

((M.P' s α).get h) s' = M.P s α s'

source

@[simp]

theorem MDP.MScheduler.P'_isSome {State : Type u_1} {Act : Type u_2} {s : State} {M : MDP State Act} (ℒ : 𝔏[M]) :

(M.P' s (ℒ {s})).isSome = true

source

@[simp]

theorem MDP.Scheduler.P'_isSome {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (π : M.Path) :

(M.P' π.last (𝒮 π)).isSome = true

source

def MDP.inducedMC {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (ℒ : 𝔏[M]) (ι : State) :

MarkovChain State

Equations

M.inducedMC ℒ ι = { ι := ι, P := fun (s : State) => (M.P' s (ℒ {s})).get ⋯ }

Instances For

source

@[simp]

theorem MDP.inducedMC_P {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ℒ : 𝔏[M]} {ι s s' : State} :

((M.inducedMC ℒ ι).P s) s' = M.P s (ℒ {s}) s'

source

def MDP.Path.toMC {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ℒ : 𝔏[M]} {ι : State} (π : M.Path) (h : Prob (↑ℒ) π ≠ 0) (h' : π[0] = ι := by rfl) :

(M.inducedMC ℒ ι).Path

Equations

π.toMC h h' = { states := π.states, length_pos := ⋯, initial := ⋯, property := ⋯ }

Instances For

source

theorem MDP.inducedMC_cyl {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ℒ : 𝔏[M]} (π : M.Path) (h' : Path.Prob (↑ℒ) π ≠ 0) :

MarkovChain.Pr (π.toMC h' ⋯).Cyl = Path.Prob (↑ℒ) π

The probability of a path π with memoryless scheduler ℒ is the measure of the cylinder set generated by π on the induced Markov Chain using ℒ.

source

noncomputable def MDP.Path.pmf {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (𝒮 : 𝔖[M]) :

PMF M.Path

Equations

π.pmf 𝒮 = ((M.P' π.last (𝒮 π)).get ⋯).bindOnSupport fun (s : State) (hs : s ∈ ((M.P' π.last (𝒮 π)).get ⋯).support) => PMF.pure (π.extend ⟨s, ⋯⟩)

Instances For

source

noncomputable def MDP.inducedMC' {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (𝒮 : 𝔖[M]) (ι : State) :

MarkovChain M.Path

Equations

M.inducedMC' 𝒮 ι = { ι := {ι}, P := fun (π : M.Path) => π.pmf 𝒮 }

Instances For

source

@[simp]

theorem MDP.inducedMC'_P {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ι : State} {𝒮 : 𝔖[M]} {s s' : M.Path} :

((M.inducedMC' 𝒮 ι).P s) s' = (s.pmf 𝒮) s'

source

def MDP.Path.toMC' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ι : State} {𝒮 : 𝔖[M]} (π : M.Path) (h : Prob 𝒮 π ≠ 0) (h' : π[0] = ι := by rfl) :

(M.inducedMC' 𝒮 ι).Path

Equations

π.toMC' h h' = { states := List.ofFn fun (x : Fin ‖π‖) => π.take ↑x, length_pos := ⋯, initial := ⋯, property := ⋯ }

Instances For

source

theorem MDP.inducedMC'_cyl {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {𝒮 : 𝔖[M]} (π : M.Path) (h' : Path.Prob 𝒮 π ≠ 0) :

MarkovChain.Pr (π.toMC' h' ⋯).Cyl = Path.Prob 𝒮 π

The probability of a path π with scheduler 𝒮 is the measure of the cylinder set generated by π on the induced Markov Chain using 𝒮.