Documentation

MDP.Paths.Cost

@[reducible, inline]
abbrev MDP.Costs {State : Type u_1} {Act : Type u_2} :
MDP State ActType u_1
Equations
Instances For
    noncomputable def MDP.Path.Cost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) (π : M.Path) :
    Equations
    Instances For
      noncomputable def MDP.Path.ECost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (c : M.Costs) (𝒮 : 𝔖[M]) (π : M.Path) :
      Equations
      Instances For
        theorem MDP.Path.prepend_Cost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (c : M.Costs) (s : (M.prev_univ π[0])) :
        Cost c (π.prepend s) = c s + Cost c π
        theorem MDP.Path.extend_Cost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (c : M.Costs) (s : (M.succs_univ π.last)) :
        Cost c (π.extend s) = Cost c π + c s
        theorem MDP.Path.Cost_tail {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) (h : 1 < π) (c : M.Costs) :
        Cost c π = c π[0] + Cost c π.tail
        theorem MDP.Path.ECost_tail {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) [DecidableEq State] (𝒮 : 𝔖[M]) (c : M.Costs) (h : 1 < π) :
        ECost c 𝒮 π = M.P π[0] (𝒮 {π[0]}) π[1] * (c π[0] * Prob (𝒮.specialize π[0] π[1], ) π.tail + ECost c (𝒮.specialize π[0] π[1], ) π.tail)
        theorem MDP.Path.prepend_ECost {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (π : M.Path) {s : (M.prev_univ π[0])} [DecidableEq State] (𝒮 : 𝔖[M]) (c : M.Costs) :
        ECost c 𝒮 (π.prepend s) = M.P (↑s) (𝒮 {s}) π[0] * (c s * Prob (𝒮.specialize s π[0], ) π + ECost c (𝒮.specialize s π[0], ) π)