Documentation

MDP.Bellman

theorem iSup_iterate_succ' {α : Type u_1} [CompleteLattice α] (f : αα) :
⨆ (n : ), f^[n + 1] = ⨆ (n : ), f^[n]
theorem iSup_iterate_succ {α : Type u_1} [CompleteLattice α] (f : αα) :
⨆ (n : ), f^[n + 1] = ⨆ (n : ), f^[n]
noncomputable def MDP.Φf {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (s : State) (α : Act) :
Equations
Instances For
    noncomputable def MDP.Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) :

    The Bellman operator.

    Equations
    • MDP.Φ c = { toFun := fun (v : M.Costs) (s : State) => c s + ⨅ (α : (M.act s)), (MDP.Φf s α) v, monotone' := }
    Instances For
      noncomputable def MDP.Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} ( : 𝔏[M]) (c : M.Costs) :

      The Bellman operator with a fixed scheduler (necessarily Markovian).

      Equations
      Instances For
        theorem MDP.Φ.monotone' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :
        theorem MDP.Φ_le_Φℒ {State✝ : Type u_3} {Act✝ : Type u_4} {M✝ : MDP State✝ Act✝} { : 𝔏[M✝]} :
        noncomputable def MDP.lfp_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :
        M.CostsM.Costs
        Equations
        Instances For
          theorem MDP.iSup_succ_Φ_eq_iSup_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) :
          ⨆ (n : ), (⇑(Φ c))^[n + 1] = ⨆ (n : ), (⇑(Φ c))^[n]
          theorem MDP.map_lfp_Φ {State✝ : Type u_3} {Act✝ : Type u_4} {x✝ : MDP State✝ Act✝} {c : x✝.Costs} :
          (Φ c) (lfp_Φ c) = lfp_Φ c
          noncomputable def MDP.lfp_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} ( : 𝔏[M]) :
          M.CostsM.Costs
          Equations
          Instances For
            theorem MDP.map_lfp_Φℒ {State✝ : Type u_3} {Act✝ : Type u_4} {M✝ : MDP State✝ Act✝} {c : 𝔏[M✝]} {𝒮 : M✝.Costs} :
            (Φℒ c 𝒮) (lfp_Φℒ c 𝒮) = lfp_Φℒ c 𝒮
            theorem MDP.Φf_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {a : Act} :
            theorem MDP.Φℒ_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : 𝔏[M]} { : M.Costs} :
            theorem MDP.lfp_Φℒ_eq_iSup_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :
            lfp_Φℒ = fun (c : 𝔏[M]) ( : M.Costs) => ⨆ (n : ), (⇑(Φℒ c ))^[n]
            theorem MDP.lfp_Φℒ_eq_iSup_succ_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :
            lfp_Φℒ = fun (c : 𝔏[M]) ( : M.Costs) => ⨆ (n : ), (⇑(Φℒ c ))^[n + 1]
            theorem MDP.Φ_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] {c : M.Costs} :
            theorem MDP.lfp_Φ_eq_iSup_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] :
            lfp_Φ = fun (c : M.Costs) => ⨆ (n : ), (⇑(Φ c))^[n]
            theorem MDP.lfp_Φ_eq_iSup_succ_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] :
            lfp_Φ = fun (c : M.Costs) => ⨆ (n : ), (⇑(Φ c))^[n + 1]