Documentation

MDP.Paths.Cost

@[reducible, inline]

abbrev MDP.Costs {State : Type u_1} {Act : Type u_2} :

MDP State Act → Type u_1

Equations

x✝.Costs = (State → ENNReal)

Instances For

noncomputable def MDP.Path.Cost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) (π : M.Path) :

Equations

MDP.Path.Cost c π = (List.map c π.states).sum

Instances For

noncomputable def MDP.Path.ECost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) (𝒮 : 𝔖[M]) (π : M.Path) :

Equations

MDP.Path.ECost c 𝒮 π = MDP.Path.Cost c π * MDP.Path.Prob 𝒮 π

Instances For

theorem MDP.Path.prepend_Cost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (c : M.Costs) (s : ↑(M.prev_univ π[0])) :

Cost c (π.prepend s) = c ↑s + Cost c π

theorem MDP.Path.extend_Cost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (c : M.Costs) (s : ↑(M.succs_univ π.last)) :

Cost c (π.extend s) = Cost c π + c ↑s

theorem MDP.Path.Cost_tail {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (h : 1 < ‖π‖) (c : M.Costs) :

Cost c π = c π[0] + Cost c π.tail

theorem MDP.Path.ECost_tail {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) [DecidableEq State] (𝒮 : 𝔖[M]) (c : M.Costs) (h : 1 < ‖π‖) :

ECost c 𝒮 π = M.P π[0] (𝒮 {π[0]}) π[1] * (c π[0] * Prob (𝒮.specialize π[0] ⟨π[1], ⋯⟩) π.tail + ECost c (𝒮.specialize π[0] ⟨π[1], ⋯⟩) π.tail)

theorem MDP.Path.prepend_ECost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) {s : ↑(M.prev_univ π[0])} [DecidableEq State] (𝒮 : 𝔖[M]) (c : M.Costs) :

ECost c 𝒮 (π.prepend s) = M.P (↑s) (𝒮 {↑s}) π[0] * (c ↑s * Prob (𝒮.specialize ↑s ⟨π[0], ⋯⟩) π + ECost c (𝒮.specialize ↑s ⟨π[0], ⋯⟩) π)