MDP.OptimalCost

source

noncomputable def MDP.EC {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) (𝒮 : 𝔖[M]) (n : ℕ) (s : State) :

ENNReal

Equations

MDP.EC c 𝒮 n s = ∑' (π : ↑Path[M,s,=n]), MDP.Path.ECost c 𝒮 ↑π

Instances For

source

@[reducible, inline]

noncomputable abbrev MDP.OEC {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) (s : State) :

ENNReal

Equations

MDP.OEC c s = ⨅ (𝒮 : 𝔖[M]), ⨆ (n : ℕ), MDP.EC c 𝒮 n s

Instances For

source

@[simp]

theorem MDP.EC_zero {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {𝒮 : 𝔖[M]} :

EC c 𝒮 0 = 0

source

@[simp]

theorem MDP.EC_one {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {𝒮 : 𝔖[M]} :

EC c 𝒮 1 = c

source

@[simp]

theorem MDP.EC_one' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {𝒮 : 𝔖[M]} {s : State} :

EC c 𝒮 1 s = c s

source

theorem MDP.EC_le_succ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {𝒮 : 𝔖[M]} {n : ℕ} {s : State} [DecidableEq State] :

EC c 𝒮 n s ≤ EC c 𝒮 (n + 1) s

source

theorem MDP.EC_monotone {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {𝒮 : 𝔖[M]} {s : State} [DecidableEq State] :

Monotone fun (x : ℕ) => EC c 𝒮 x s

source

theorem MDP.EC_succ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} {n : ℕ} [DecidableEq State] (𝒮 : 𝔖[M]) :

EC c 𝒮 (n + 1) = c + fun (s : State) => ∑' (s' : ↑(M.succs_univ s)), M.P s (𝒮 {s}) ↑s' * EC c (𝒮.specialize s s') n ↑s'

source

theorem MDP.EC_eq {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {n : ℕ} {s : State} {𝒮 𝒮' : 𝔖[M]} {c : M.Costs} (h : ∀ π ∈ Path[M,s,≤n], 𝒮 π = 𝒮' π) :

EC c 𝒮 n s = EC c 𝒮' n s

source

theorem MDP.EC_le {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {n : ℕ} {s : State} {𝒮 𝒮' : 𝔖[M]} {c : M.Costs} (h : ∀ π ∈ Path[M,s,≤n], 𝒮 π = 𝒮' π) :

EC c 𝒮 n s ≤ EC c 𝒮' n s

source

@[simp]

theorem MDP.EC_markovian_scheduler_specialize {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} {s₀ : State} {s : ↑(M.succs_univ s₀)} {n : ℕ} {𝒮 : 𝔖[M]} [𝒮.Markovian] :

EC c (𝒮.specialize s₀ s) n ↑s = EC c 𝒮 n ↑s

source

theorem MDP.bound_EC_succ_eq_bound_EC {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {n : ℕ} {c : M.Costs} (s : State) (s' : ↑(M.succs_univ s)) :

⨅ (ℬ : 𝔖[M,s,≤n + 1]), EC c (↑(ℬ.specialize s s')) n ↑s' = ⨅ (ℬ : 𝔖[M,↑s',≤n]), EC c (↑ℬ) n ↑s'

source

theorem MDP.iInf_EC_specialized_eq_bounded {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} {n : ℕ} (s : State) (s' : ↑(M.succs_univ s)) :

⨅ (𝒮 : 𝔖[M]), EC c (𝒮.specialize s s') n ↑s' = ⨅ (ℬ : 𝔖[M,s,≤n + 1]), EC c (↑(ℬ.specialize s s')) n ↑s'

source

theorem MDP.iInf_scheduler_eq_iInf_act_iInf_scheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {c : M.Costs} {n : ℕ} :

⨅ (𝒮 : 𝔖[M]), ∑' (s' : ↑(M.succs_univ s)), M.P s (𝒮 {s}) ↑s' * EC c (𝒮.specialize s s') n ↑s' = ⨅ (α : ↑(M.act s)), ⨅ (𝒮 : 𝔖[M]), ∑' (s' : ↑(M.succs_univ s)), M.P s ↑α ↑s' * EC c (𝒮.specialize s s') n ↑s'

source

theorem MDP.tsum_iInf_bounded_comm {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] [M.FiniteBranching] {s : State} {n : ℕ} (f : (s' : ↑(M.succs_univ s)) → 𝔖[M,↑s',≤n] → ENNReal) :

∑' (s' : ↑(M.succs_univ s)), ⨅ (ℬ : 𝔖[M,↑s',≤n]), f s' ℬ = ⨅ (ℬ : 𝔖[M,s,≤n + 1]), ∑' (s' : ↑(M.succs_univ s)), f s' (ℬ.specialize s s')

source

theorem MDP.tsum_iInf_EC_comm {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] [M.FiniteBranching] {s : State} {α : Act} {c : M.Costs} {n : ℕ} :

∑' (s' : ↑(M.succs_univ s)), ⨅ (𝒮 : 𝔖[M]), M.P s α ↑s' * EC c (𝒮.specialize s s') n ↑s' = ⨅ (𝒮 : 𝔖[M]), ∑' (s' : ↑(M.succs_univ s)), M.P s α ↑s' * EC c (𝒮.specialize s s') n ↑s'

source

theorem MDP.iInf_EC_eq_specialized {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} {n : ℕ} (s : State) (s' : ↑(M.succs_univ s)) :

⨅ (𝒮 : 𝔖[M]), EC c 𝒮 n ↑s' = ⨅ (𝒮 : 𝔖[M]), EC c (𝒮.specialize s s') n ↑s'

source

theorem MDP.iInf_EC_succ_eq_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} {n : ℕ} [M.FiniteBranching] :

⨅ (𝒮 : 𝔖[M]), EC c 𝒮 (n + 1) = (Φ c) (⨅ (𝒮 : 𝔖[M]), EC c 𝒮 n)

source

theorem MDP.iInf_EC_eq_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} {n : ℕ} [M.FiniteBranching] :

⨅ (𝒮 : 𝔖[M]), EC c 𝒮 n = (⇑(Φ c))^[n] ⊥

source

theorem MDP.iSup_iInf_EC_eq_iSup_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} [M.FiniteBranching] :

⨆ (n : ℕ), ⨅ (𝒮 : 𝔖[M]), EC c 𝒮 n = ⨆ (n : ℕ), (⇑(Φ c))^[n] ⊥

source

theorem MDP.iSup_iInf_EC_eq_lfp_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} [M.FiniteBranching] :

⨆ (n : ℕ), ⨅ (𝒮 : 𝔖[M]), EC c 𝒮 n = lfp_Φ c

source

theorem MDP.Φℒ_step_ECℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {n : ℕ} (c : M.Costs) (ℒ : 𝔏[M]) :

EC c (↑ℒ) (n + 1) = (Φℒ ℒ c) (EC c (↑ℒ) n)

source

theorem MDP.iSup_ECℒ_eq_lfp_Φℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} (ℒ : 𝔏[M]) [M.FiniteBranching] :

⨆ (n : ℕ), EC c (↑ℒ) n = lfp_Φℒ ℒ c

source

noncomputable def MDP.ℒ' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] (c : M.Costs) :

𝔏[M]

Equations

MDP.ℒ' c = ⟨{ toFun := fun (π : M.Path) => (M.act π.last).toFinset.argmin ⋯ fun (x : Act) => (MDP.Φf π.last x) (MDP.lfp_Φ c), property := ⋯ }, ⋯⟩

Instances For

source

noncomputable def MDP.ℒ'_spec {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] (c : M.Costs) (s : State) :

⨅ (α : ↑(M.act s)), (Φf s ↑α) (lfp_Φ c) = (fun (x : Act) => (Φf s x) (lfp_Φ c)) ((ℒ' c) {s})

Equations

⋯ = ⋯

Instances For

source

theorem MDP.lfp_Φℒ_eq_lfp_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} [M.FiniteBranching] :

lfp_Φℒ (ℒ' c) c = lfp_Φ c

source

theorem MDP.iSup_iInf_EC_eq_iInf_iSup_EC {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} [M.FiniteBranching] :

⨆ (n : ℕ), ⨅ (𝒮 : 𝔖[M]), EC c 𝒮 n = ⨅ (𝒮 : 𝔖[M]), ⨆ (n : ℕ), EC c 𝒮 n

source

theorem MDP.iInf_iSup_EC_eq_iInf_iSup_ECℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} [M.FiniteBranching] :

⨅ (𝒮 : 𝔖[M]), ⨆ (n : ℕ), EC c 𝒮 n = ⨅ (ℒ : 𝔏[M]), ⨆ (n : ℕ), EC c (↑ℒ) n

source

theorem MDP.iSup_iInf_EC_le_iSup_iInf_ECℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {c : M.Costs} :

⨆ (n : ℕ), ⨅ (𝒮 : 𝔖[M]), EC c 𝒮 n ≤ ⨆ (n : ℕ), ⨅ (ℒ : 𝔏[M]), EC c (↑ℒ) n

source

theorem MDP.iSup_iInf_ECℒ_eq_iInf_iSup_ECℒ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} [M.FiniteBranching] :

⨆ (n : ℕ), ⨅ (ℒ : 𝔏[M]), EC c (↑ℒ) n = ⨅ (ℒ : 𝔏[M]), ⨆ (n : ℕ), EC c (↑ℒ) n

source

theorem MDP.iInf_iSup_EC_eq_lfp_Φ {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {c : M.Costs} [M.FiniteBranching] :

⨅ (𝒮 : 𝔖[M]), ⨆ (n : ℕ), EC c 𝒮 n = lfp_Φ c