Documentation

MDP.Basic

Markov Decision Process (MDP) #

MDP's are probabilistic transition systems consisting of sets of (possibly infinite) states and actions, equipped with a probability function.

Main definitions #

structure MDP (State : Type u_1) (Act : Type u_2) :
Type (max u_1 u_2)
  • P' : StateActOption (PMF State)
  • exists_P'_isSome (s : State) : ∃ (α : Act), (self.P' s α).isSome = true
Instances For
    def MDP.P {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) (α : Act) (s' : State) :

    The transition probability of going from s to s' using action α

    Equations
    • M.P s α s' = match M.P' s α with | some pmf => pmf s' | x => 0
    Instances For
      def MDP.act {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) :
      Set Act

      An action is enabled in state s iff choosing this action gives a positive probability to any other state.

      Equations
      Instances For
        def MDP.succs {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (α : Act) (s : State) :
        Set State

        α-successors of s are states s' which have a positive probability for a given action α, such that 0 < M.P s α s'.

        Equations
        Instances For
          def MDP.prev {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (α : Act) (s' : State) :
          Set State

          α-predecessors of s are states s' such that s' ∈ M.succs α s.

          Equations
          Instances For
            def MDP.succs_univ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) :
            Set State

            Successors of s are those s' with s' ∈ M.succs α for some α.

            Equations
            Instances For
              def MDP.prev_univ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) :
              Set State

              Predecessors of s are those s' with s' ∈ M.prev α for some α.

              Equations
              Instances For
                def MDP.mem_prev_univ_iff_mem_succs_univ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s s' : State) :
                Equations
                • =
                Instances For
                  class MDP.FiniteBranching {State : Type u_1} {Act : Type u_2} (M : MDP State Act) :
                  Instances
                    noncomputable def MDP.ofP {State : Type u_1} {Act : Type u_2} (P : StateActStateENNReal) (h₁ : ∀ (s : State) (α : Act), ∑' (s' : State), P s α s' = 0 ∑' (s' : State), P s α s' = 1) (h₂ : ∀ (s : State), ∃ (α : Act) (s' : State), 0 < P s α s') :
                    MDP State Act
                    Equations
                    • One or more equations did not get rendered due to their size.
                    Instances For
                      @[simp]
                      theorem MDP.ofP_P {State : Type u_1} {Act : Type u_2} (P : StateActStateENNReal) (h₁ : ∀ (s : State) (α : Act), ∑' (s' : State), P s α s' = 0 ∑' (s' : State), P s α s' = 1) (h₂ : ∀ (s : State), ∃ (α : Act) (s' : State), 0 < P s α s') :
                      (ofP P h₁ h₂).P = P
                      @[simp]
                      theorem MDP.P_le_one {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) (α : Act) (s' : State) :
                      M.P s α s' 1
                      @[simp]
                      theorem MDP.P_ne_top {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) (α : Act) (s' : State) :
                      M.P s α s'
                      instance MDP.instFiniteElemActOfFintype {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [Fintype Act] :
                      Finite (M.act s)
                      noncomputable instance MDP.instFintypeAct {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [Fintype Act] :
                      Fintype (M.act s)
                      Equations
                      instance MDP.instNonemptyAct {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} :
                      Nonempty (M.act s)
                      noncomputable def MDP.default_act {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) :
                      Act
                      Equations
                      Instances For
                        @[simp]
                        theorem MDP.default_act_spec {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) :
                        instance MDP.instFiniteElemActOfFiniteBranching {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [i : M.FiniteBranching] :
                        Finite (M.act s)
                        noncomputable instance MDP.instFintypeElemActOfFiniteBranching {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [M.FiniteBranching] :
                        Fintype (M.act s)
                        Equations
                        theorem MDP.actFinite {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [i : M.FiniteBranching] :
                        (M.act s).Finite
                        noncomputable def MDP.act₀ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [M.FiniteBranching] (s : State) :
                        Finset Act
                        Equations
                        Instances For
                          @[simp]
                          theorem MDP.act₀_eq_act {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [i : M.FiniteBranching] :
                          (M.act₀ s) = M.act s
                          @[simp]
                          theorem MDP.act₀_mem_iff_act_mem {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} {a : Act} [M.FiniteBranching] :
                          a M.act₀ s a M.act s
                          theorem MDP.act₀_prop {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} {a : Act} [M.FiniteBranching] (h : a M.act₀ s) :
                          instance MDP.instNonemptySubtypeMemFinsetAct₀ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [M.FiniteBranching] :
                          Nonempty { x : Act // x M.act₀ s }
                          noncomputable def MDP.act₀_nonempty {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [M.FiniteBranching] (s : State) :
                          Equations
                          • =
                          Instances For
                            theorem MDP.P_ne_zero_sum_eq_one {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} {a : Act} {s' : State} (h : ¬M.P s a s' = 0) :
                            ∑' (s'' : State), M.P s a s'' = 1
                            noncomputable instance MDP.act.instDefault {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} :
                            Inhabited (M.act s)
                            Equations
                            noncomputable instance MDP.act₀.instDefault {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [M.FiniteBranching] :
                            Inhabited { x : Act // x M.act₀ s }
                            Equations
                            theorem MDP.P_sum_one_iff {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} {a : Act} :
                            ∑' (s' : State), M.P s a s' = 1 a M.act s
                            noncomputable def MDP.succs₀ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [i : M.FiniteBranching] (α : Act) (s : State) :
                            Finset State
                            Equations
                            Instances For
                              @[simp]
                              theorem MDP.succs₀_eq_succs {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {α : Act} {s : State} [M.FiniteBranching] :
                              (M.succs₀ α s) = M.succs α s
                              @[simp]
                              theorem MDP.succs₀_mem_eq_succs_mem {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {a : Act} {s s' : State} [M.FiniteBranching] :
                              s' M.succs₀ a s s' M.succs a s
                              instance MDP.instFiniteElemSuccsOfFiniteBranching {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {α : Act} {s : State} [M.FiniteBranching] :
                              Finite (M.succs α s)
                              theorem MDP.succs_finite {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {α : Act} {s : State} [M.FiniteBranching] :
                              (M.succs α s).Finite
                              noncomputable instance MDP.instFintypeElemSuccsOfFiniteBranching {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {α : Act} {s : State} [M.FiniteBranching] :
                              Fintype (M.succs α s)
                              Equations
                              instance MDP.instNonemptySuccs {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} (α : (M.act s)) :
                              Nonempty (M.succs (↑α) s)
                              instance MDP.instNonemptySuccs₀ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} [M.FiniteBranching] (α : (M.act s)) :
                              Nonempty { x : State // x M.succs₀ (↑α) s }
                              theorem MDP.prev_iff_succs {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {α : Act} {s s' : State} :
                              s' M.prev α s s M.succs α s'
                              @[simp]
                              theorem MDP.prev_univ_iff_succs_univ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s s' : State} :
                              @[simp]
                              theorem MDP.succs_implies_succs_univ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {α : Act} {s : State} (s' : (M.succs α s)) :
                              s' M.succs_univ s
                              instance MDP.instNonemptySuccsUniv {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} :
                              noncomputable def MDP.succs_univ₀ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] [M.FiniteBranching] (s : State) :
                              Finset State
                              Equations
                              Instances For
                                theorem MDP.succs_univ₀_spec {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] [M.FiniteBranching] (s s' : State) :
                                s' M.succs_univ₀ s∃ (α : Act), 0 < M.P s α s'
                                @[simp]
                                theorem MDP.succs_univ₀_eq_succs_univ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] [M.FiniteBranching] (s : State) :
                                @[simp]
                                theorem MDP.succs_univ₀_mem_eq_succs_univ_mem {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] [M.FiniteBranching] (s s' : State) :
                                theorem MDP.succs_Finite {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [M.FiniteBranching] {s : Act} {a : State} :
                                (M.succs s a).Finite
                                theorem MDP.succs_univ_Finite {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] [M.FiniteBranching] {s : State} :
                                instance MDP.instNonemptySuccsUniv₀ {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] [M.FiniteBranching] {s : State} :
                                Nonempty { x : State // x M.succs_univ₀ s }
                                instance MDP.instFiniteElemSuccs_univOfFiniteBranching {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] {s : State} [M.FiniteBranching] :
                                noncomputable instance MDP.instFintypeElemSuccs_univOfFiniteBranching {State : Type u_1} {Act : Type u_2} (M : MDP State Act) [DecidableEq State] {s : State} [M.FiniteBranching] :
                                Equations
                                @[simp]
                                theorem MDP.tsum_succs_univ_P_eq_tsum_succs_P {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} {α : Act} :
                                ∑' (s' : (M.succs_univ s)), M.P s α s' = ∑' (s' : (M.succs α s)), M.P s α s'
                                @[simp]
                                theorem MDP.tsum_succs_P_eq_tsum_P {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {α : Act} {s : State} :
                                ∑' (s' : (M.succs α s)), M.P s α s' = ∑' (s' : State), M.P s α s'
                                @[simp]
                                theorem MDP.tsum_succs_P_eq_one {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} {α : Act} :
                                α M.act s∑' (s' : State), M.P s α s' = 1
                                theorem MDP.succs_tsum_add_left {State : Type u_1} {Act : Type u_2} (M : MDP State Act) {s : State} {α : Act} {a : ENNReal} (h : α M.act s) (f : (M.succs_univ s)ENNReal) :
                                ∑' (s' : (M.succs_univ s)), (M.P s α s' * a + f s') = a + ∑' (s' : (M.succs_univ s)), f s'