Documentation

MDP.Counterexample.C

Counterexample exhibiting `⨅ 𝒮, ⨆ n, EC c 𝒮 n < ⨅ ℒ, ⨆ n, EC c ℒ n` #

        $0          $1          $0
     ┌─►s₁─────────►s₂─────────►s₃─┐
𝓅(α) │  │   1-𝓅(α)        1     ▲  │ 1
     └──┘                       └──┘

Setup: (instance)

The MDP consists three states s₁, s₂, and, s₃ and actions ℕ.
State s₁ has all actions enabled (i.e. all ℕ) while s₂ and s₃ only has 0 enabled.
The MDP is parameterized by a probability function 𝓅 : ℕ → ℝ≥0∞ where 0 < 𝓅 < 1 that dictates the probability in s₁ such that P(s₁, i, s₁) = 𝓅(i) and P(s₁, i, s₂) = 1 - 𝓅(i) for all i ∈ ℕ.
The remaining probabilities are P(s₂, 0, s₃) = 1 and P(s₃, 0, s₃) = 1, with all others being 0.
State s₁ and s₃ has cost 0 and s₂ has cost 1.

Note that if we were to ever leave s₁ we would get a incur a cost of 1, and thus in order to get minimal cost (0) one would have to stay in s₁ forever.

Now, there's no way to pick 0 < 𝓅 < 1 such that the outgoing probability 1 - 𝓅(α) is zero, we must instead try to minimize it.

For a fixed α the probability of staying n times 𝓅(α)ⁿ which in the limit is 0, and thus the probability of leaving is 1.

As a Markovian scheduler will always pick the same action in the same state, we find ourself in the above scenario, and will thus have an expected cost of 1 for any Markovian scheduler, regardless of choice of 𝓅.

The task now is to pick 𝓅 such to exploit the history of a scheduled path to beat the Markovian scheduler.

By carefully picking 𝓅(n) = (2 ^ (2 ^ n)⁻¹)⁻¹ and using the scheduler that picks an action based on the length of the scheduled path, such that, 𝒮(π) = ‖π‖, we find that in the limit the probability of staying (and of leaving) is 1/2, and thus the expected cost is 1/2.

This leads us to conclude iInf_iSup_EC_lt_iInf_iSup_ECℒ.

theorem List.take_append_cons_drop {α : Type u_1} {l : List α} {s : α} {i : ℕ} (hi : i < l.length) (h : l[i] = s) :

take i l ++ s :: drop (i + 1) l = l

inductive MDP.Counterexample.C.State :

s₁ : State
s₂ : State
s₃ : State

Instances For

instance MDP.Counterexample.C.instDecidableEqState :

DecidableEq State

Equations

MDP.Counterexample.C.instDecidableEqState x✝ y✝ = if h : x✝.toCtorIdx = y✝.toCtorIdx then isTrue ⋯ else isFalse ⋯

structure MDP.Counterexample.C.P :

toFun : ℕ → ENNReal
property (n : ℕ) : 0 < self.toFun n ∧ self.toFun n < 1

Instances For

instance MDP.Counterexample.C.instDFunLikePNatENNReal :

DFunLike P ℕ fun (x : ℕ) => ENNReal

Equations

MDP.Counterexample.C.instDFunLikePNatENNReal = { coe := MDP.Counterexample.C.P.toFun, coe_injective' := MDP.Counterexample.C.instDFunLikePNatENNReal._proof_3 }

@[simp]

theorem MDP.Counterexample.C.P.zero_lt (𝓅 : P) (α : ℕ) :

0 < 𝓅 α

@[simp]

theorem MDP.Counterexample.C.P.lt_one (𝓅 : P) (α : ℕ) :

𝓅 α < 1

@[simp]

theorem MDP.Counterexample.C.P.ne_zero (𝓅 : P) (α : ℕ) :

¬𝓅 α = 0

@[simp]

theorem MDP.Counterexample.C.P.ne_one (𝓅 : P) (α : ℕ) :

¬𝓅 α = 1

@[simp]

theorem MDP.Counterexample.C.P.le_one (𝓅 : P) (α : ℕ) :

𝓅 α ≤ 1

@[simp]

theorem MDP.Counterexample.C.P.one_sub_ne_zero (𝓅 : P) (α : ℕ) :

¬1 - 𝓅 α = 0

@[simp]

theorem MDP.Counterexample.C.P.add_one_sub (𝓅 : P) (α : ℕ) :

𝓅 α + (1 - 𝓅 α) = 1

@[simp]

theorem MDP.Counterexample.C.P.ne_top (𝓅 : P) (α : ℕ) :

¬𝓅 α = ⊤

inductive MDP.Counterexample.C.Step (𝓅 : P) :

State → ℕ → ENNReal → State → Prop

first {𝓅 : P} {α : ℕ} : Step 𝓅 State.s₁ α (𝓅 α) State.s₁
leave {𝓅 : P} {α : ℕ} : Step 𝓅 State.s₁ α (1 - 𝓅 α) State.s₂
loose {𝓅 : P} : Step 𝓅 State.s₂ 0 1 State.s₃
loop {𝓅 : P} : Step 𝓅 State.s₃ 0 1 State.s₃

Instances For

theorem MDP.Counterexample.C.step_iff (𝓅 : P) (a✝ : State) (a✝¹ : ℕ) (a✝² : ENNReal) (a✝³ : State) :

Step 𝓅 a✝ a✝¹ a✝² a✝³ ↔ a✝ = State.s₁ ∧ a✝² = 𝓅 a✝¹ ∧ a✝³ = State.s₁ ∨ a✝ = State.s₁ ∧ a✝² = 1 - 𝓅 a✝¹ ∧ a✝³ = State.s₂ ∨ a✝ = State.s₂ ∧ a✝¹ = 0 ∧ a✝² = 1 ∧ a✝³ = State.s₃ ∨ a✝ = State.s₃ ∧ a✝¹ = 0 ∧ a✝² = 1 ∧ a✝³ = State.s₃

noncomputable instance MDP.Counterexample.C.instDecidableStep (𝓅 : P) {c : State} {α : ℕ} {p : ENNReal} {c' : State} :

Decidable (Step 𝓅 c α p c')

Equations

MDP.Counterexample.C.instDecidableStep 𝓅 = Classical.propDecidable (MDP.Counterexample.C.Step 𝓅 c α p c')

@[simp]

theorem MDP.Counterexample.C.s₁_iff (𝓅 : P) {α : ℕ} {p : ENNReal} {s' : State} :

Step 𝓅 State.s₁ α p s' ↔ s' = State.s₁ ∧ p = 𝓅 α ∨ s' = State.s₂ ∧ p = 1 - 𝓅 α

@[simp]

theorem MDP.Counterexample.C.iff_s₁ (𝓅 : P) {s : State} {α : ℕ} {p : ENNReal} :

Step 𝓅 s α p State.s₁ ↔ s = State.s₁ ∧ p = 𝓅 α

@[simp]

theorem MDP.Counterexample.C.s₂_iff (𝓅 : P) {α : ℕ} {p : ENNReal} {s' : State} :

Step 𝓅 State.s₂ α p s' ↔ α = 0 ∧ p = 1 ∧ s' = State.s₃

@[simp]

theorem MDP.Counterexample.C.iff_s₂ (𝓅 : P) {s : State} {α : ℕ} {p : ENNReal} :

Step 𝓅 s α p State.s₂ ↔ s = State.s₁ ∧ p = 1 - 𝓅 α

@[simp]

theorem MDP.Counterexample.C.s₃_iff (𝓅 : P) {α : ℕ} {p : ENNReal} {s' : State} :

Step 𝓅 State.s₃ α p s' ↔ α = 0 ∧ p = 1 ∧ s' = State.s₃

@[simp]

theorem MDP.Counterexample.C.tsum_p (𝓅 : P) {c : State} {α : ℕ} {c' : State} :

∑' (p : { p : ENNReal // Step 𝓅 c α p c' }), ↑p = ∑' (p : ENNReal), if Step 𝓅 c α p c' then p else 0

noncomputable def MDP.Counterexample.C.M (𝓅 : P) :

Equations

MDP.Counterexample.C.M 𝓅 = MDP.ofRelation (MDP.Counterexample.C.Step 𝓅) ⋯ ⋯ ⋯

Instances For

def MDP.Counterexample.C.M.cost {ℯ : P} :

(M ℯ).Costs

Equations

Instances For

@[simp]

theorem MDP.Counterexample.C.M.act_eq (𝓅 : P) :

(M 𝓅).act = fun (s : State) => if s = State.s₁ then Set.univ else {0}

@[simp]

theorem MDP.Counterexample.C.𝒮_s₂ (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} :

𝒮 {State.s₂ } = 0

@[simp]

theorem MDP.Counterexample.C.𝒮_s₃ (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} :

𝒮 {State.s₃ } = 0

@[simp]

theorem MDP.Counterexample.C.succs_univ_s₁ (𝓅 : P) :

(M 𝓅).succs_univ State.s₁ = {State.s₁ , State.s₂ }

@[simp]

theorem MDP.Counterexample.C.succs_univ_s₂ (𝓅 : P) :

(M 𝓅).succs_univ State.s₂ = {State.s₃ }

@[simp]

theorem MDP.Counterexample.C.succs_univ_s₃ (𝓅 : P) :

(M 𝓅).succs_univ State.s₃ = {State.s₃ }

noncomputable def MDP.Counterexample.C.ℒ_a (𝓅 : P) (a : ℕ) :

Equations

MDP.Counterexample.C.ℒ_a 𝓅 a = ⟨{ toFun := fun (π : (MDP.Counterexample.C.M 𝓅).Path) => if π.last = MDP.Counterexample.C.State.s₁ then a else 0, property := ⋯ }, ⋯⟩

Instances For

@[simp]

theorem MDP.Counterexample.C.default_act_s₂ (𝓅 : P) :

(M 𝓅).default_act State.s₂ = 0

@[simp]

theorem MDP.Counterexample.C.default_act_s₃ (𝓅 : P) :

(M 𝓅).default_act State.s₃ = 0

noncomputable def MDP.Counterexample.C.𝒮_len (𝓅 : P) (a : ℕ) :

Picks the action proportional to the length of the scheduled path

Equations

One or more equations did not get rendered due to their size.

Instances For

@[reducible, inline]

noncomputable abbrev MDP.Counterexample.C.𝒮_s₁ {𝓅 : P} (𝒮 : 𝔖[M 𝓅]) :

Equations

MDP.Counterexample.C.𝒮_s₁ 𝒮 = 𝒮 {MDP.Counterexample.C.State.s₁ }

Instances For

@[simp]

theorem MDP.Counterexample.C.EC_succ_s₃ (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} {n : ℕ} :

EC M.cost 𝒮 n State.s₃ = 0

@[simp]

theorem MDP.Counterexample.C.EC_succ_s₂ (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} {n : ℕ} :

EC M.cost 𝒮 n State.s₂ = if n = 0 then 0 else 1

theorem MDP.Counterexample.C.EC_succ_s₁' (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} {n : ℕ} :

EC M.cost 𝒮 (n + 1) State.s₁ = 𝓅 (𝒮_s₁ 𝒮) * EC M.cost (𝒮.specialize State.s₁ ⟨State.s₁, ⋯⟩) n State.s₁ + (1 - 𝓅 (𝒮_s₁ 𝒮)) * EC M.cost (𝒮.specialize State.s₁ ⟨State.s₂, ⋯⟩) n State.s₂

theorem MDP.Counterexample.C.EC_succ_s₁ (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} {n : ℕ} :

EC M.cost 𝒮 (n + 1) State.s₁ = 𝓅 (𝒮_s₁ 𝒮) * EC M.cost (𝒮.specialize State.s₁ ⟨State.s₁, ⋯⟩) n State.s₁ + if n = 0 then 0 else 1 - 𝓅 (𝒮_s₁ 𝒮)

noncomputable def MDP.Counterexample.C.𝒮_x (𝓅 : P) (𝒮 : 𝔖[M 𝓅]) :

ℕ → 𝔖[M 𝓅]

Specializes the given scheduler with a chain of n repetitions of [.s₁ ↦ .s₁] s.t. 𝒮[.s₁ ↦ .s₁]^n.

Equations

MDP.Counterexample.C.𝒮_x 𝓅 𝒮 0 = 𝒮
MDP.Counterexample.C.𝒮_x 𝓅 𝒮 n.succ = (MDP.Counterexample.C.𝒮_x 𝓅 𝒮 n).specialize MDP.Counterexample.C.State.s₁ ⟨MDP.Counterexample.C.State.s₁, ⋯⟩

Instances For

theorem MDP.Counterexample.C.𝒮_x_add (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} {n m : ℕ} :

𝒮_x 𝓅 (𝒮_x 𝓅 𝒮 n) m = 𝒮_x 𝓅 𝒮 (n + m)

noncomputable def MDP.Counterexample.C.𝒮_x_alt (𝓅 : P) (𝒮 : 𝔖[M 𝓅]) :

ℕ → 𝔖[M 𝓅]

Equations

MDP.Counterexample.C.𝒮_x_alt 𝓅 𝒮 0 = 𝒮
MDP.Counterexample.C.𝒮_x_alt 𝓅 𝒮 n.succ = MDP.Counterexample.C.𝒮_x 𝓅 (𝒮.specialize MDP.Counterexample.C.State.s₁ ⟨MDP.Counterexample.C.State.s₁, ⋯⟩) n

Instances For

theorem MDP.Counterexample.C.𝒮_x_eq_alt (𝓅 : P) {n : ℕ} (𝒮 : 𝔖[M 𝓅]) :

𝒮_x 𝓅 𝒮 n = 𝒮_x_alt 𝓅 𝒮 n

@[simp]

theorem MDP.Counterexample.C.𝒮_x_zero (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} :

𝒮_x 𝓅 𝒮 0 = 𝒮

theorem MDP.Counterexample.C.iSup_EC_succ_s₁ (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} :

⨆ (n : ℕ), EC M.cost 𝒮 n State.s₁ = (𝓅 (𝒮_s₁ 𝒮) * ⨆ (n : ℕ), EC M.cost (𝒮.specialize State.s₁ ⟨State.s₁, ⋯⟩) n State.s₁) + (1 - 𝓅 (𝒮_s₁ 𝒮))

theorem MDP.Counterexample.C.specialize_eq_𝒮_x (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} :

𝒮.specialize State.s₁ ⟨State.s₁, ⋯⟩ = 𝒮_x 𝓅 𝒮 1

theorem MDP.Counterexample.C.iSup_EC_succ_eq_iSup_EC (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} :

⨆ (n : ℕ), EC M.cost 𝒮 (n + 1) State.s₁ = ⨆ (n : ℕ), EC M.cost 𝒮 n State.s₁

theorem MDP.Counterexample.C.iSup_EC_eq (𝓅 : P) {𝒮 : 𝔖[M 𝓅]} :

⨆ (n : ℕ), EC M.cost 𝒮 n State.s₁ = ∑' (n : ℕ), (1 - 𝓅 (𝒮_s₁ (𝒮_x 𝓅 𝒮 n))) * ∏ i ∈ Finset.range n, 𝓅 (𝒮_s₁ (𝒮_x 𝓅 𝒮 i))

theorem MDP.Counterexample.C.Path_s₁_prior (𝓅 : P) {i j : ℕ} (π : (M 𝓅).Path) (hi : i < ‖π‖) (h : π[i] = State.s₁) (hij : j ≤ i) :

π[j] = State.s₁

@[simp]

theorem MDP.Counterexample.C.𝒮_x_𝒮_len_one (𝓅 : P) {n : ℕ} :

𝒮_x 𝓅 (𝒮_len 𝓅 n) 1 = 𝒮_len 𝓅 (n + 1)

@[simp]

theorem MDP.Counterexample.C.𝒮_x_𝒮_len (𝓅 : P) {n m : ℕ} :

𝒮_x 𝓅 (𝒮_len 𝓅 n) m = 𝒮_len 𝓅 (n + m)

@[simp]

theorem MDP.Counterexample.C.𝒮_s₁_𝒮_len (𝓅 : P) {i : ℕ} :

𝒮_s₁ (𝒮_len 𝓅 i) = i + 1

theorem MDP.Counterexample.C.iSup_EC_𝒮_len (𝓅 : P) {i : ℕ} :

⨆ (n : ℕ), EC M.cost (𝒮_len 𝓅 i) n State.s₁ = ∑' (n : ℕ), (1 - 𝓅 (i + n + 1)) * ∏ x ∈ Finset.range n, 𝓅 (i + x + 1)

theorem MDP.Counterexample.C.le_of_s₁_eq_s₁ (𝓅 : P) {i : ℕ} (π : (M 𝓅).Path) {hi : i < ‖π‖} (h : π[i] = State.s₁) {j : ℕ} (hj : j ≤ i) :

π[j] = State.s₁

theorem MDP.Counterexample.C.ge_of_s₁_eq_s₁ (𝓅 : P) {i j : ℕ} (π : (M 𝓅).Path) {hi : i < ‖π‖} (h : π[i] = State.s₃) (hj : i ≤ j) (hj' : j < ‖π‖) :

π[j] = State.s₃

theorem MDP.Counterexample.C.lt_of_s₂_eq_s₁ (𝓅 : P) {i : ℕ} (π : (M 𝓅).Path) {hi : i < ‖π‖} (h : π[i] = State.s₂) {j : ℕ} (hj : j < i) :

π[j] = State.s₁

theorem MDP.Counterexample.C.gt_of_s₂_eq_s₃ (𝓅 : P) {i : ℕ} (π : (M 𝓅).Path) {hi : i < ‖π‖} (h : π[i] = State.s₂) {j : ℕ} (hj : i < j) (hj' : j < ‖π‖) :

π[j] = State.s₃

theorem MDP.Counterexample.C.s₂_mem_of_s₁_s₃_mem (𝓅 : P) (π : (M 𝓅).Path) (hs₁ : State.s₁ ∈ π) (hs₃ : State.s₃ ∈ π) :

State.s₂ ∈ π

theorem MDP.Counterexample.C.Cost_one_of_s₂_mem {𝓅✝ : P} {π : (M 𝓅✝).Path} (hs₂ : State.s₂ ∈ π) :

Path.Cost M.cost π = 1

theorem MDP.Counterexample.C.EC_𝒮_len' (𝓅 : P) {i n : ℕ} :

EC M.cost (𝒮_len 𝓅 i) n State.s₁ = if n = 0 then 0 else 1 - ∑' (π : ↑Path[M 𝓅,State.s₁ ,=n]), if ∀ s ∈ ↑π, s = State.s₁ then Path.Prob (𝒮_len 𝓅 i) ↑π else 0

theorem MDP.Counterexample.C.tsum_paths_eq_ite_tprod (𝓅 : P) {n i : ℕ} :

(∑' (π : ↑Path[M 𝓅,State.s₁ ,=n]), if ∀ s ∈ ↑π, s = State.s₁ then Path.Prob (𝒮_len 𝓅 i) ↑π else 0) = if n = 0 then 0 else ∏ x : Fin (n - 1), 𝓅 (↑x + i + 1)

@[simp]

theorem MDP.Counterexample.C.𝒮_x_ℒ (𝓅 : P) {i : ℕ} (ℒ : 𝔏[M 𝓅]) :

𝒮_x 𝓅 (↑ℒ) i = ↑ℒ

theorem MDP.Counterexample.C.iSup_ECℒ (𝓅 : P) (ℒ : 𝔏[M 𝓅]) :

⨆ (n : ℕ), EC M.cost (↑ℒ) n State.s₁ = 1

theorem MDP.Counterexample.C.iSup_iSup_ECℒ (𝓅 : P) :

⨆ (ℒ : 𝔏[M 𝓅]), ⨆ (n : ℕ), EC M.cost (↑ℒ) n State.s₁ = 1

theorem MDP.Counterexample.C.iInf_iSup_ECℒ (𝓅 : P) :

⨅ (ℒ : 𝔏[M 𝓅]), ⨆ (n : ℕ), EC M.cost (↑ℒ) n State.s₁ = 1

noncomputable def MDP.Counterexample.C.p :

Equations

MDP.Counterexample.C.p = { toFun := fun (n : ℕ) => (2 ^ (2 ^ n)⁻¹)⁻¹, property := MDP.Counterexample.C.p._proof_27 }

Instances For

theorem MDP.Counterexample.C.iInf_iSup_EC_ab :

⨅ (𝒮 : 𝔖[M p]), ⨆ (n : ℕ), EC M.cost 𝒮 n State.s₁ ≤ ⨆ (n : ℕ), 1 - ∏ x : Fin (n - 1), p (↑x + 1)

theorem MDP.Counterexample.C.prod_p_eq' {n : ℕ} :

∏ x : Fin n, p (↑x + 1) = 2 ^ (2 ^ (-↑n) - 1)

theorem MDP.Counterexample.C.iInf_iSup_EC_lt_iInf_iSup_ECℒ :

⨅ (𝒮 : 𝔖[M p]), ⨆ (n : ℕ), EC M.cost 𝒮 n State.s₁ < ⨅ (ℒ : 𝔏[M p]), ⨆ (n : ℕ), EC M.cost (↑ℒ) n State.s₁