Documentation

MDP.InducedMarkovChain

@[simp]
theorem MDP.P'_get {State : Type u_1} {Act : Type u_2} {s : State} {α : Act} {s' : State} {M : MDP State Act} {h : (M.P' s α).isSome = true} :
((M.P' s α).get h) s' = M.P s α s'
@[simp]
theorem MDP.MScheduler.P'_isSome {State : Type u_1} {Act : Type u_2} {s : State} {M : MDP State Act} ( : 𝔏[M]) :
(M.P' s ( {s})).isSome = true
@[simp]
theorem MDP.Scheduler.P'_isSome {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (π : M.Path) :
(M.P' π.last (𝒮 π)).isSome = true
def MDP.inducedMC {State : Type u_1} {Act : Type u_2} (M : MDP State Act) ( : 𝔏[M]) (ι : State) :
Equations
Instances For
    @[simp]
    theorem MDP.inducedMC_P {State : Type u_1} {Act : Type u_2} {M : MDP State Act} { : 𝔏[M]} {ι s s' : State} :
    ((M.inducedMC ι).P s) s' = M.P s ( {s}) s'
    def MDP.Path.toMC {State : Type u_1} {Act : Type u_2} {M : MDP State Act} { : 𝔏[M]} {ι : State} (π : M.Path) (h : Prob (↑) π 0) (h' : π[0] = ι := by rfl) :
    (M.inducedMC ι).Path
    Equations
    • π.toMC h h' = { states := π.states, length_pos := , initial := , property := }
    Instances For
      theorem MDP.inducedMC_cyl {State : Type u_1} {Act : Type u_2} {M : MDP State Act} { : 𝔏[M]} (π : M.Path) (h' : Path.Prob (↑) π 0) :
      MarkovChain.Pr (π.toMC h' ).Cyl = Path.Prob (↑) π

      The probability of a path π with memoryless scheduler is the measure of the cylinder set generated by π on the induced Markov Chain using .

      noncomputable def MDP.Path.pmf {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (𝒮 : 𝔖[M]) :
      Equations
      Instances For
        noncomputable def MDP.inducedMC' {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (𝒮 : 𝔖[M]) (ι : State) :
        Equations
        Instances For
          @[simp]
          theorem MDP.inducedMC'_P {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ι : State} {𝒮 : 𝔖[M]} {s s' : M.Path} :
          ((M.inducedMC' 𝒮 ι).P s) s' = (s.pmf 𝒮) s'
          def MDP.Path.toMC' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ι : State} {𝒮 : 𝔖[M]} (π : M.Path) (h : Prob 𝒮 π 0) (h' : π[0] = ι := by rfl) :
          (M.inducedMC' 𝒮 ι).Path
          Equations
          Instances For
            theorem MDP.inducedMC'_cyl {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {𝒮 : 𝔖[M]} (π : M.Path) (h' : Path.Prob 𝒮 π 0) :
            MarkovChain.Pr (π.toMC' h' ).Cyl = Path.Prob 𝒮 π

            The probability of a path π with scheduler 𝒮 is the measure of the cylinder set generated by π on the induced Markov Chain using 𝒮.