MDP.Bellman

theorem iSup_iterate_succ' {α : Type u_1} [CompleteLattice α] (f : α → α) :

⨆ (n : ℕ), f^[n + 1] ⊥ = ⨆ (n : ℕ), f^[n] ⊥

theorem iSup_iterate_succ {α : Type u_1} [CompleteLattice α] (f : α → α) :

⨆ (n : ℕ), f^[n + 1] ⊥ = ⨆ (n : ℕ), f^[n] ⊥

theorem fixedPoints.lfp_eq_sSup_succ_iterate {α : Type u_1} [CompleteLattice α] (f : α →o α) (h : OmegaCompletePartialOrder.ωScottContinuous ⇑f) :

OrderHom.lfp f = ⨆ (n : ℕ), (⇑f)^[n + 1] ⊥

source

noncomputable def MDP.Φf {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (s : State) (α : Act) :

M.Costs →o ENNReal

Equations

MDP.Φf s α = { toFun := fun (v : M.Costs) => ∑' (s' : ↑(M.succs_univ s)), M.P s α ↑s' * v ↑s', monotone' := ⋯ }

Instances For

source

noncomputable def MDP.Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) :

M.Costs →o M.Costs

The Bellman operator.

Equations

MDP.Φ c = { toFun := fun (v : M.Costs) (s : State) => c s + ⨅ (α : ↑(M.act s)), (MDP.Φf s ↑α) v, monotone' := ⋯ }

Instances For

source

noncomputable def MDP.Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (ℒ : 𝔏[M]) (c : M.Costs) :

M.Costs →o M.Costs

The Bellman operator with a fixed scheduler (necessarily Markovian).

Equations

MDP.Φℒ ℒ c = { toFun := fun (v : M.Costs) (s : State) => c s + (MDP.Φf s (ℒ {s})) v, monotone' := ⋯ }

Instances For

source

theorem MDP.Φ.monotone' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

Monotone Φ

source

theorem MDP.Φ_le_Φℒ {State✝ : Type u_3} {Act✝ : Type u_4} {M✝ : MDP State✝ Act✝} {ℒ : 𝔏[M✝]} :

Φ ≤ Φℒ ℒ

source

noncomputable def MDP.lfp_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

M.Costs → M.Costs

Equations

MDP.lfp_Φ = ⇑OrderHom.lfp ∘ MDP.Φ

Instances For

source

theorem MDP.iSup_succ_Φ_eq_iSup_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) :

⨆ (n : ℕ), (⇑(Φ c))^[n + 1] ⊥ = ⨆ (n : ℕ), (⇑(Φ c))^[n] ⊥

source

theorem MDP.map_lfp_Φ {State✝ : Type u_3} {Act✝ : Type u_4} {x✝ : MDP State✝ Act✝} {c : x✝.Costs} :

(Φ c) (lfp_Φ c) = lfp_Φ c

source

noncomputable def MDP.lfp_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (ℒ : 𝔏[M]) :

M.Costs → M.Costs

Equations

MDP.lfp_Φℒ ℒ = ⇑OrderHom.lfp ∘ MDP.Φℒ ℒ

Instances For

source

theorem MDP.map_lfp_Φℒ {State✝ : Type u_3} {Act✝ : Type u_4} {M✝ : MDP State✝ Act✝} {c : 𝔏[M✝]} {𝒮 : M✝.Costs} :

(Φℒ c 𝒮) (lfp_Φℒ c 𝒮) = lfp_Φℒ c 𝒮

source

theorem MDP.Φf_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {a : Act} :

OmegaCompletePartialOrder.ωScottContinuous ⇑(Φf s a)

source

theorem MDP.Φℒ_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : 𝔏[M]} {ℒ : M.Costs} :

OmegaCompletePartialOrder.ωScottContinuous ⇑(Φℒ c ℒ)

source

theorem MDP.lfp_Φℒ_eq_iSup_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

lfp_Φℒ = fun (c : 𝔏[M]) (ℒ : M.Costs) => ⨆ (n : ℕ), (⇑(Φℒ c ℒ))^[n] ⊥

source

theorem MDP.lfp_Φℒ_eq_iSup_succ_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

lfp_Φℒ = fun (c : 𝔏[M]) (ℒ : M.Costs) => ⨆ (n : ℕ), (⇑(Φℒ c ℒ))^[n + 1] ⊥

source

theorem MDP.Φ_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] {c : M.Costs} :

OmegaCompletePartialOrder.ωScottContinuous ⇑(Φ c)

source

theorem MDP.lfp_Φ_eq_iSup_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] :

lfp_Φ = fun (c : M.Costs) => ⨆ (n : ℕ), (⇑(Φ c))^[n] ⊥

source

theorem MDP.lfp_Φ_eq_iSup_succ_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] :

lfp_Φ = fun (c : M.Costs) => ⨆ (n : ℕ), (⇑(Φ c))^[n + 1] ⊥