MDP.BScheduler

class MDP.Scheduler.IsBounded {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (s : State) (n : ℕ) :

isBounded (π : M.Path) : π ∉ Path[M,s,≤n] → 𝒮 π = M.default_act π.last

Instances

theorem MDP.Scheduler.isBounded_iff {State : Type u_1} {Act : Type u_2} {M : MDP State Act} (𝒮 : 𝔖[M]) (s : State) (n : ℕ) :

𝒮.IsBounded s n ↔ ∀ π ∉ Path[M,s,≤n], 𝒮 π = M.default_act π.last

source

def MDP.BScheduler {State : Type u_1} {Act : Type u_2} (M : MDP State Act) (s : State) (n : ℕ) :

Type (max 0 u_1 u_2)

A (potentially) history dependent scheduler, bounded to paths in Path[M,s,≤n].

Equations

𝔖[M,s,≤n] = { 𝒮 : 𝔖[M] // 𝒮.IsBounded s n }

Instances For

source

def MDP.«term𝔖[_,_,≤_]» :

Lean.ParserDescr

A (potentially) history dependent scheduler, bounded to paths in Path[M,s,≤n].

Equations

One or more equations did not get rendered due to their size.

Instances For

source

instance MDP.BScheduler.instDFunLike {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} :

DFunLike 𝔖[M,s,≤n] M.Path fun (x : M.Path) => Act

Equations

MDP.BScheduler.instDFunLike = { coe := fun (ℬ : 𝔖[M,s,≤n]) (π : M.Path) => ↑ℬ π, coe_injective' := ⋯ }

source

@[simp]

theorem MDP.BScheduler.mk_coe_apply {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} (𝒮 : 𝔖[M]) (h : 𝒮.IsBounded s n) (π : M.Path) :

⟨𝒮, h⟩ π = 𝒮 π

source

theorem MDP.BScheduler.default_on {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} (ℬ : 𝔖[M,s,≤n]) {π : M.Path} (h : π ∉ Path[M,s,≤n]) :

ℬ π = M.default_act π.last

source

@[simp]

theorem MDP.BScheduler.coe_apply {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} {π : M.Path} (ℬ : 𝔖[M,s,≤n]) :

↑ℬ π = ℬ π

source

@[simp]

theorem MDP.BScheduler.mem_act_if {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} {π : M.Path} (ℬ : 𝔖[M,s,≤n]) :

ℬ π ∈ M.act π.last

source

@[simp]

theorem MDP.BScheduler.tail_mem_act_if {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} (ℬ : 𝔖[M,s,≤n]) {π : M.Path} :

ℬ π.tail ∈ M.act π.last

source

theorem MDP.BScheduler.ext {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} {ℬ ℬ' : 𝔖[M,s,≤n]} (h : ∀ π ∈ Path[M,s,≤n], ℬ π = ℬ' π) :

ℬ = ℬ'

source

theorem MDP.BScheduler.ext_iff {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} {ℬ ℬ' : 𝔖[M,s,≤n]} :

ℬ = ℬ' ↔ ∀ π ∈ Path[M,s,≤n], ℬ π = ℬ' π

source

def MDP.BScheduler.mk' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] (s : State) (n : ℕ) (f : ↑Path[M,s,≤n] → Act) (h : ∀ (π : ↑Path[M,s,≤n]), f π ∈ M.act (↑π).last) :

𝔖[M,s,≤n]

Equations

MDP.BScheduler.mk' s n f h = ⟨{ toFun := fun (π : M.Path) => if h : π ∈ Path[M,s,≤n] then f ⟨π, h⟩ else M.default_act π.last, property := ⋯ }, ⋯⟩

Instances For

source

def MDP.BScheduler.specialize {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} (ℬ : 𝔖[M,s,≤n + 1]) :

State → (s' : ↑(M.succs_univ s)) → 𝔖[M,↑s',≤n]

Equations

ℬ.specialize x✝ s' = ⟨(↑ℬ).specialize s s', ⋯⟩

Instances For

source

@[simp]

theorem MDP.BScheduler.specialize_apply {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} (ℬ : 𝔖[M,s,≤n + 1]) (s' : ↑(M.succs_univ s)) (π : ↑Path[M,↑s',≤n]) :

(ℬ.specialize s s') ↑π = ℬ ((↑π).prepend ⟨s, ⋯⟩)

source

@[simp]

theorem MDP.BScheduler.specialize_apply' {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} {s' : ↑(M.succs_univ s)} {π : M.Path} (ℬ : 𝔖[M,s,≤n + 1]) :

(ℬ.specialize s s') π = if h : π ∈ Path[M,↑s',≤n] then ℬ (π.prepend ⟨s, ⋯⟩) else M.default_act π.last

source

noncomputable def MDP.Scheduler.bound {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] (𝒮 : 𝔖[M]) {s : State} {n : ℕ} :

𝔖[M,s,≤n]

Equations

𝒮.bound = ⟨{ toFun := fun (π : M.Path) => if π ∈ Path[M,s,≤n] then 𝒮 π else M.default_act π.last, property := ⋯ }, ⋯⟩

Instances For

source

@[simp]

theorem MDP.Scheduler.bound_coe_apply {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] (𝒮 : 𝔖[M]) (s : State) (n : ℕ) (π : M.Path) :

𝒮.bound π = if π ∈ Path[M,s,≤n] then 𝒮 π else M.default_act π.last

source

def MDP.BScheduler.cast_arb {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} {s' : State} {m : ℕ} (ℬ : 𝔖[M,s,≤n]) :

𝔖[M,s',≤m]

Equations

ℬ.cast_arb = (↑ℬ).bound

Instances For

source

def MDP.BScheduler.cast_arb_tail {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} {s' : State} (ℬ : 𝔖[M,s,≤n]) :

𝔖[M,s',≤n + 1]

Equations

ℬ.cast_arb_tail = (MDP.Scheduler.mk' fun (π : M.Path) => ⟨ℬ π.tail, ⋯⟩).bound

Instances For

source

@[simp]

theorem MDP.BScheduler.cast_arb_tail_specialize {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} (s' : ↑(M.succs_univ s)) (ℬ : 𝔖[M,↑s',≤n]) :

ℬ.cast_arb_tail.specialize s s' = ℬ

source

instance MDP.BScheduler.instCoeScheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} :

Coe 𝔖[M] 𝔖[M,s,≤n]

Equations

MDP.BScheduler.instCoeScheduler = { coe := fun (x : 𝔖[M]) => x.bound }

source

instance MDP.BScheduler.instInhabited {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} :

Inhabited 𝔖[M,s,≤n]

Equations

MDP.BScheduler.instInhabited = { default := ⟨default, ⋯⟩ }

source

def MDP.BScheduler.FiniteMScheduler {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [M.FiniteBranching] (s : State) (n : ℕ) :

Type (max u_1 u_2)

Equations

MDP.BScheduler.FiniteMScheduler s n = ((π : ↑Path[M,s,≤n]) → { x : Act // x ∈ M.act₀ (↑π).last })

Instances For

source

instance MDP.BScheduler.instFintypeFiniteMSchedulerOfDecidableEq {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} [DecidableEq State] [M.FiniteBranching] :

Fintype (FiniteMScheduler s n)

Equations

MDP.BScheduler.instFintypeFiniteMSchedulerOfDecidableEq = id Pi.instFintype

source

instance MDP.BScheduler.instFiniteOfFiniteBranching {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} [M.FiniteBranching] :

Finite 𝔖[M,s,≤n]

source

instance MDP.BScheduler.instFintypeOfFiniteBranching {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} [M.FiniteBranching] :

Fintype 𝔖[M,s,≤n]

Equations

MDP.BScheduler.instFintypeOfFiniteBranching = Fintype.ofFinite 𝔖[M,s,≤n]

source

instance MDP.BScheduler.instNonemptyOfFiniteBranching {State : Type u_1} {Act : Type u_2} {M : MDP State Act} {s : State} {n : ℕ} [M.FiniteBranching] :

Nonempty 𝔖[M,s,≤n]

source

def MDP.BScheduler.elems {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} [M.FiniteBranching] :

Finset 𝔖[M,s,≤n]

Equations

MDP.BScheduler.elems = Fintype.elems

Instances For

source

@[simp]

theorem MDP.BScheduler.elems_mem {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} {ℬ : 𝔖[M,s,≤n]} [M.FiniteBranching] :

ℬ ∈ elems

source

@[simp]

theorem MDP.BScheduler.elems_nonempty {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {s : State} {n : ℕ} [M.FiniteBranching] :

elems.Nonempty

source

@[simp]

theorem MDP.BScheduler.mk'_specialize {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] {n : ℕ} {s : State} {s' : ↑(M.succs_univ s)} (f : ↑Path[M,s,≤n + 1] → Act) (h : ∀ (π : ↑Path[M,s,≤n + 1]), f π ∈ M.act (↑π).last) :

(mk' s (n + 1) f h).specialize s s' = mk' (↑s') n (fun (x : ↑Path[M,↑s',≤n]) => f ⟨(↑x).prepend ⟨s, ⋯⟩, ⋯⟩) ⋯

source

theorem MDP.BScheduler.mk'_argmin {State : Type u_1} {Act : Type u_2} {M : MDP State Act} [DecidableEq State] [M.FiniteBranching] {n : ℕ} (s : State) (s' : ↑(M.succs_univ s)) (f : 𝔖[M,↑s',≤n] → ENNReal) :

mk' (↑s') n (fun (π : ↑Path[M,↑s',≤n]) => (elems.argmin ⋯ f) ↑π) ⋯ = elems.argmin ⋯ f