MDP.Bellman

theorem iSup_iterate_succ' {α : Type u_1} [CompleteLattice α] (f : α → α) :

⨆ (n : ℕ), f^[n + 1] ⊥ = ⨆ (n : ℕ), f^[n] ⊥

theorem iSup_iterate_succ {α : Type u_1} [CompleteLattice α] (f : α → α) :

⨆ (n : ℕ), f^[n + 1] ⊥ = ⨆ (n : ℕ), f^[n] ⊥

theorem fixedPoints.lfp_eq_sSup_succ_iterate {α : Type u_1} [CompleteLattice α] (f : α →o α) (h : OmegaCompletePartialOrder.ωScottContinuous ⇑f) :

OrderHom.lfp f = ⨆ (n : ℕ), (⇑f)^[n + 1] ⊥

source

noncomputable def MDP.Φf {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (s : State) (α : Act) :

M.Costs →o ENNReal

Equations

MDP.Φf s α = { toFun := fun (v : M.Costs) => ∑' (s' : ↑(M.succs_univ s)), M.P s α ↑s' * v ↑s', monotone' := ⋯ }

Instances For

source

noncomputable def MDP.Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (O : Optimization) (c : M.Costs) :

M.Costs →o M.Costs

The Bellman operator.

Equations

MDP.Φ O c = { toFun := fun (v : M.Costs) (s : State) => c s + (O.sOpt (M.act s)) fun (α : Act) => (MDP.Φf s α) v, monotone' := ⋯ }

Instances For

source

@[reducible, inline, deprecated "Φ 𝒟" (since := "2025-09-15")]

noncomputable abbrev MDP.dΦ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) :

M.Costs →o M.Costs

The demonic Bellman operator.

Equations

MDP.dΦ c = MDP.Φ Optimization.Demonic c

Instances For

source

@[reducible, inline, deprecated "Φ 𝒜" (since := "2025-09-15")]

noncomputable abbrev MDP.aΦ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) :

M.Costs →o M.Costs

The angelic Bellman operator.

Equations

MDP.aΦ c = MDP.Φ Optimization.Angelic c

Instances For

source

noncomputable def MDP.Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (ℒ : 𝔏[M]) (c : M.Costs) :

M.Costs →o M.Costs

The Bellman operator with a fixed scheduler (necessarily Markovian).

Equations

MDP.Φℒ ℒ c = { toFun := fun (v : M.Costs) (s : State) => c s + (MDP.Φf s (ℒ {s})) v, monotone' := ⋯ }

Instances For

source

theorem MDP.Φ.monotone' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {O : Optimization} :

Monotone (Φ O)

source

theorem MDP.dΦ_le_Φℒ {State✝ : Type u_3} {Act✝ : Type u_4} {M✝ : MDP State✝ Act✝} {ℒ : 𝔏[M✝]} :

Φ Optimization.Demonic ≤ Φℒ ℒ

source

@[deprecated "lfp (M.Φ O)" (since := "2025-09-15")]

noncomputable def MDP.lfp_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

M.Costs → M.Costs

Equations

MDP.lfp_Φ = ⇑OrderHom.lfp ∘ MDP.Φ Optimization.Demonic

Instances For

source

theorem MDP.iSup_succ_Φ_eq_iSup_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (O : Optimization) (c : M.Costs) :

⨆ (n : ℕ), (⇑(Φ O c))^[n + 1] ⊥ = ⨆ (n : ℕ), (⇑(Φ O c))^[n] ⊥

source

theorem MDP.iSup_succ_Φ_eq_iSup_Φ_apply {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {x : State} (O : Optimization) (c : M.Costs) :

⨆ (n : ℕ), (⇑(Φ O c))^[n + 1] ⊥ x = ⨆ (n : ℕ), (⇑(Φ O c))^[n] ⊥ x

source

noncomputable def MDP.lfp_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (ℒ : 𝔏[M]) :

M.Costs → M.Costs

Equations

MDP.lfp_Φℒ ℒ = ⇑OrderHom.lfp ∘ MDP.Φℒ ℒ

Instances For

source

theorem MDP.map_lfp_Φℒ {State✝ : Type u_3} {Act✝ : Type u_4} {M✝ : MDP State✝ Act✝} {c : 𝔏[M✝]} {𝒮 : M✝.Costs} :

(Φℒ c 𝒮) (lfp_Φℒ c 𝒮) = lfp_Φℒ c 𝒮

source

theorem MDP.Φf_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {a : Act} :

OmegaCompletePartialOrder.ωScottContinuous ⇑(Φf s a)

source

theorem MDP.Φℒ_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : 𝔏[M]} {ℒ : M.Costs} :

OmegaCompletePartialOrder.ωScottContinuous ⇑(Φℒ c ℒ)

source

theorem MDP.lfp_Φℒ_eq_iSup_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

lfp_Φℒ = fun (c : 𝔏[M]) (ℒ : M.Costs) => ⨆ (n : ℕ), (⇑(Φℒ c ℒ))^[n] ⊥

source

theorem MDP.lfp_Φℒ_eq_iSup_succ_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

lfp_Φℒ = fun (c : 𝔏[M]) (ℒ : M.Costs) => ⨆ (n : ℕ), (⇑(Φℒ c ℒ))^[n + 1] ⊥

source

class Optimization.ΦContinuous {S : Type u_3} {A : Type u_4} (O : Optimization) (M : MDP S A) :

Prop

Φ_continuous (c : M.Costs) : OmegaCompletePartialOrder.ωScottContinuous ⇑(MDP.Φ O c)

Instances

source

theorem MDP.lfp_Φ_eq_iSup_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {O : Optimization} [i : O.ΦContinuous M] :

OrderHom.lfp (Φ O c) = ⨆ (n : ℕ), (⇑(Φ O c))^[n] ⊥

source

theorem MDP.lfp_Φ_eq_iSup_succ_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {O : Optimization} [i : O.ΦContinuous M] :

OrderHom.lfp (Φ O c) = ⨆ (n : ℕ), (⇑(Φ O c))^[n + 1] ⊥

source

theorem MDP.Φ_𝒜_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} :

OmegaCompletePartialOrder.ωScottContinuous ⇑(Φ Optimization.Angelic c)

source

instance MDP.instΦContinuousAngelic {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

Optimization.Angelic.ΦContinuous M

source

theorem MDP.Φ_𝒟_ωScottContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] {c : M.Costs} :

OmegaCompletePartialOrder.ωScottContinuous ⇑(Φ Optimization.Demonic c)

source

instance MDP.instΦContinuousDemonic {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] :

Optimization.Demonic.ΦContinuous M

source

instance MDP.instΦContinuous {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] {O : Optimization} :

O.ΦContinuous M