Markov Decision Process (MDP) #
MDP's are probabilistic transition systems consisting of sets of (possibly infinite) states and actions, equipped with a probability function.
Main definitions #
MDP State Act
: Markov Decision Process.MDP.P
: probability function relating states and actions to states.MDP.act
: enabled actions of a state.MDP.succs
,MDP.succs_univ
: successors of states.MDP.prev
,MDP.prev_univ
: predecessors of states.MDP.FiniteBranching
: class of MDP's where bothMDP.succs
andMDP.act
are finite.
Successors of s
are those s'
with s' ∈ M.succs α
for some α
.
Equations
- M.succs_univ s = ⋃ (α : Act), M.succs α s
Instances For
noncomputable def
MDP.ofP
{State : Type u_1}
{Act : Type u_2}
(P : State → Act → State → ENNReal)
(h₁ : ∀ (s : State) (α : Act), ∑' (s' : State), P s α s' = 0 ∨ ∑' (s' : State), P s α s' = 1)
(h₂ : ∀ (s : State), ∃ (α : Act) (s' : State), 0 < P s α s')
:
MDP State Act
Equations
- One or more equations did not get rendered due to their size.
Instances For
noncomputable instance
MDP.instFintypeAct
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
[Fintype Act]
:
Equations
- M.instFintypeAct = Fintype.ofFinite ↑(M.act s)
noncomputable def
MDP.default_act
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
(s : State)
:
Act
Equations
- M.default_act s = ⋯.choose
Instances For
@[simp]
instance
MDP.instFiniteElemActOfFiniteBranching
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
[i : M.FiniteBranching]
:
noncomputable instance
MDP.instFintypeElemActOfFiniteBranching
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
[M.FiniteBranching]
:
Equations
theorem
MDP.actFinite
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
[i : M.FiniteBranching]
:
@[simp]
theorem
MDP.act₀_eq_act
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
[i : M.FiniteBranching]
:
theorem
MDP.act₀_prop
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
{a : Act}
[M.FiniteBranching]
(h : a ∈ M.act₀ s)
:
(Function.support (M.P s a)).Nonempty
noncomputable def
MDP.act₀_nonempty
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[M.FiniteBranching]
(s : State)
:
Equations
- ⋯ = ⋯
Instances For
noncomputable instance
MDP.act.instDefault
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
:
Equations
noncomputable instance
MDP.act₀.instDefault
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
[M.FiniteBranching]
:
noncomputable def
MDP.succs₀
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[i : M.FiniteBranching]
(α : Act)
(s : State)
:
Finset State
Instances For
@[simp]
theorem
MDP.succs₀_eq_succs
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{α : Act}
{s : State}
[M.FiniteBranching]
:
instance
MDP.instFiniteElemSuccsOfFiniteBranching
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{α : Act}
{s : State}
[M.FiniteBranching]
:
theorem
MDP.succs_finite
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{α : Act}
{s : State}
[M.FiniteBranching]
:
noncomputable instance
MDP.instFintypeElemSuccsOfFiniteBranching
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{α : Act}
{s : State}
[M.FiniteBranching]
:
Equations
@[simp]
theorem
MDP.prev_univ_iff_succs_univ
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s s' : State}
:
@[simp]
theorem
MDP.succs_implies_succs_univ
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{α : Act}
{s : State}
(s' : ↑(M.succs α s))
:
instance
MDP.instNonemptySuccsUniv
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
{s : State}
:
Nonempty ↑(M.succs_univ s)
noncomputable def
MDP.succs_univ₀
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
[M.FiniteBranching]
(s : State)
:
Finset State
Equations
- M.succs_univ₀ s = (M.act₀ s).biUnion fun (x : Act) => M.succs₀ x s
Instances For
theorem
MDP.succs_univ₀_spec
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
[M.FiniteBranching]
(s s' : State)
:
s' ∈ M.succs_univ₀ s → ∃ (α : Act), 0 < M.P s α s'
@[simp]
theorem
MDP.succs_univ₀_eq_succs_univ
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
[M.FiniteBranching]
(s : State)
:
@[simp]
theorem
MDP.succs_univ₀_mem_eq_succs_univ_mem
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
[M.FiniteBranching]
(s s' : State)
:
theorem
MDP.succs_Finite
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[M.FiniteBranching]
{s : Act}
{a : State}
:
theorem
MDP.succs_univ_Finite
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
[M.FiniteBranching]
{s : State}
:
(M.succs_univ s).Finite
instance
MDP.instNonemptySuccsUniv₀
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
[M.FiniteBranching]
{s : State}
:
instance
MDP.instFiniteElemSuccs_univOfFiniteBranching
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
{s : State}
[M.FiniteBranching]
:
Finite ↑(M.succs_univ s)
noncomputable instance
MDP.instFintypeElemSuccs_univOfFiniteBranching
{State : Type u_1}
{Act : Type u_2}
(M : MDP State Act)
[DecidableEq State]
{s : State}
[M.FiniteBranching]
:
Fintype ↑(M.succs_univ s)