MDP.Scheduler

source

structure MDP.Scheduler {State : Type u_1} {Act : Type u_2} (M : MDP State Act) :

Type (max u_1 u_2)

A (potentially) history dependent scheduler.

toFun : M.Path → Act
property (π : M.Path) : self.toFun π ∈ M.act π.last

Instances For

source

def MDP.«term𝔖[_]» :

Lean.ParserDescr

A (potentially) history dependent scheduler.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

def MDP.Scheduler.mk' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : (π : M.Path) → ↑(M.act π.last)) :

𝔖[M]

Equations

MDP.Scheduler.mk' 𝒮 = { toFun := fun (π : M.Path) => ↑(𝒮 π), property := ⋯ }

Instances For

source

instance MDP.Scheduler.instDFunLikePath {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

DFunLike 𝔖[M] M.Path fun (x : M.Path) => Act

Equations

MDP.Scheduler.instDFunLikePath = { coe := fun (𝒮 : 𝔖[M]) => 𝒮.toFun, coe_injective' := ⋯ }

source

@[simp]

theorem MDP.Scheduler.toFun_coe {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (π : M.Path) :

𝒮.toFun π = 𝒮 π

source

@[simp]

theorem MDP.Scheduler.toFun_coe' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {f : M.Path → Act} {h : ∀ (π : M.Path), f π ∈ M.act π.last} (π : M.Path) :

{ toFun := f, property := h } π = f π

source

@[simp]

theorem MDP.Scheduler.mem_act_if {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {π : M.Path} (𝒮 : 𝔖[M]) (h : π.last = s) :

𝒮 π ∈ M.act s

source

@[simp]

theorem MDP.Scheduler.singleton_mem_act {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (s : State) :

𝒮 {s} ∈ M.act s

source

@[simp]

theorem MDP.Scheduler.mem_act {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (π : M.Path) :

𝒮 π ∈ M.act π.last

source

theorem MDP.Scheduler.mem_prepend {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (π : M.Path) (s₀ : ↑(M.prev_univ π[0])) :

𝒮 (π.prepend s₀) ∈ M.act π.last

source

theorem MDP.Scheduler.ext {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {𝒮 𝒮' : 𝔖[M]} (h : ∀ (π : M.Path), 𝒮 π = 𝒮' π) :

𝒮 = 𝒮'

source

theorem MDP.Scheduler.ext_iff {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {𝒮 𝒮' : 𝔖[M]} :

𝒮 = 𝒮' ↔ ∀ (π : M.Path), 𝒮 π = 𝒮' π

source

def MDP.Scheduler.IsMarkovian {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) :

Prop

Equations

𝒮.IsMarkovian = ∀ (π : M.Path), 𝒮 π = 𝒮 {π.last}

Instances For

source

class MDP.Scheduler.Markovian {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) :

Prop

intro : 𝒮.IsMarkovian

Instances

source

theorem MDP.Scheduler.markovian_iff {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) :

𝒮.Markovian ↔ 𝒮.IsMarkovian

source

theorem MDP.Scheduler.MarkovianOn {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) [inst : 𝒮.Markovian] (π : M.Path) :

𝒮 π = 𝒮 {π.last}

source

@[simp]

theorem MDP.Scheduler.Markovian_path_take {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) [𝒮.Markovian] (π : M.Path) (i : Fin ‖π‖) :

𝒮 (π.take ↑i) = 𝒮 {π[i]}

source

theorem MDP.Scheduler.singleton_last {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (s : State) :

{s}.last = s

source

@[simp]

theorem MDP.Scheduler.Markovian_path_take' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) [𝒮.Markovian] (π : M.Path) (i : ℕ) (hi : i < ‖π‖) :

𝒮 (π.take i) = 𝒮 {π[i]}

source

@[simp]

theorem MDP.Scheduler.Markovian_path_take'' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) [𝒮.Markovian] (π : M.Path) (i : Fin ‖π‖) :

𝒮 (π.take ↑i) = 𝒮 {π[i]}

source

@[simp]

theorem MDP.Scheduler.Markovian_path_take''' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) [𝒮.Markovian] (π : M.Path) (i : Fin (‖π‖ - 1)) :

𝒮 (π.take ↑i) = 𝒮 {π[i]}

source

def MDP.MScheduler {State : Type u_1} {Act : Type u_2} (M : MDP State Act) :

Type (max 0 u_1 u_2)

A Markovian (historyless) scheduler.

Equations

𝔏[M] = { 𝒮 : 𝔖[M] // 𝒮.Markovian }

Instances For

source

def MDP.«term𝔏[_]» :

Lean.ParserDescr

A Markovian (historyless) scheduler.

Equations

One or more equations did not get rendered due to their size.

Instances For

source

noncomputable instance MDP.MScheduler.instInhabitedScheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

Inhabited 𝔖[M]

Equations

MDP.MScheduler.instInhabitedScheduler = { default := { toFun := fun (x : M.Path) => M.default_act x.last, property := ⋯ } }

source

noncomputable instance MDP.MScheduler.instInhabited {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

Inhabited 𝔏[M]

Equations

MDP.MScheduler.instInhabited = { default := ⟨default, ⋯⟩ }

source

def MDP.MScheduler.toScheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

𝔏[M] → 𝔖[M]

Equations

MDP.MScheduler.toScheduler = Subtype.val

Instances For

source

instance MDP.MScheduler.instCoeScheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

Coe 𝔏[M] 𝔖[M]

Equations

MDP.MScheduler.instCoeScheduler = { coe := MDP.MScheduler.toScheduler }

source

instance MDP.MScheduler.instMarkovianToScheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (ℒ : 𝔏[M]) :

(↑ℒ).Markovian

source

@[simp]

theorem MDP.MScheduler.coe_mk {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (h𝒮 : 𝒮.Markovian) :

↑⟨𝒮, h𝒮⟩ = 𝒮

source

@[simp]

theorem MDP.MScheduler.val_eq_toScheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (ℒ : 𝔏[M]) :

↑ℒ = ↑ℒ

source

theorem MDP.MScheduler.toScheduler_injective {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

Function.Injective toScheduler

source

instance MDP.MScheduler.instFunLike {State : Type u_1} {Act : Type u_2} {M : MDP State Act} :

FunLike 𝔏[M] M.Path Act

Equations

MDP.MScheduler.instFunLike = { coe := fun (ℒ : 𝔏[M]) (π : M.Path) => ↑ℒ π, coe_injective' := ⋯ }

source

def MDP.MScheduler.mk' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (f : State → Act) (hf : ∀ (s : State), f s ∈ M.act s) :

𝔏[M]

Equations

MDP.MScheduler.mk' f hf = ⟨{ toFun := fun (π : M.Path) => f π.last, property := ⋯ }, ⋯⟩

Instances For

source

theorem MDP.MScheduler.markovian {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ℒ : 𝔏[M]} (π : M.Path) :

ℒ π = ℒ {π.last}

source

@[simp]

theorem MDP.MScheduler.mem_act' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ℒ : 𝔏[M]} (π : M.Path) :

ℒ π ∈ M.act π.last

source

@[simp]

theorem MDP.MScheduler.prepend {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ℒ : 𝔏[M]} {π : M.Path} (s : ↑(M.prev_univ π[0])) :

ℒ (π.prepend s) = ℒ π

source

@[simp]

theorem MDP.MScheduler.toScheduler_apply {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {ℒ : 𝔏[M]} {π : M.Path} :

↑ℒ π = ℒ π

source

@[simp]

theorem MDP.Scheduler.mk'_coe {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {𝒮 : (π : M.Path) → ↑(M.act π.last)} (π : M.Path) :

(mk' 𝒮) π = ↑(𝒮 π)

source

noncomputable def MDP.Scheduler.specialize {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] (𝒮 : 𝔖[M]) (s : State) (s' : ↑(M.succs_univ s)) :

𝔖[M]

Specialize a scheduler such that all scheduled paths are considered with a given state as the immediately predecessor.

Equations

𝒮.specialize s s' = MDP.Scheduler.mk' fun (π : M.Path) => if h : π[0] = ↑s' then ⟨𝒮 (π.prepend ⟨s, ⋯⟩), ⋯⟩ else default

Instances For

source

def MDP.«term__[_↦_]» :

Lean.TrailingParserDescr

Equations

One or more equations did not get rendered due to their size.

Instances For

source

def MDP.«term__[_↦_]'_» :

Lean.TrailingParserDescr

Equations

One or more equations did not get rendered due to their size.

Instances For

source

@[simp]

theorem MDP.Scheduler.specialize_apply {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {𝒮 : 𝔖[M]} {s : State} {s' : ↑(M.succs_univ s)} {π : M.Path} :

(𝒮.specialize s s') π = if h : π[0] = ↑s' then 𝒮 (π.prepend ⟨s, ⋯⟩) else M.default_act π.last

source

@[simp]

theorem MDP.Scheduler.specialize_tail_take {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {𝒮 : 𝔖[M]} {i : ℕ} (π : M.Path) (h : 1 < ‖π‖) :

(𝒮.specialize π[0] ⟨π[1], ⋯⟩) (π.tail.take i) = 𝒮 (π.take (i + 1))

source

@[simp]

theorem MDP.Scheduler.specialize_default_on {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {𝒮 : 𝔖[M]} {s : State} {π : M.Path} {s' : ↑(M.succs_univ s)} (h : ¬π[0] = ↑s') :

(𝒮.specialize s s') π = M.default_act π.last

source

theorem MDP.MScheduler.toScheduler_specialize {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {s' : ↑(M.succs_univ s)} (ℒ : 𝔏[M]) :

(↑ℒ).specialize s s' = { toFun := fun (π : M.Path) => if π[0] = ↑s' then ℒ π else M.default_act π.last, property := ⋯ }