Introduction

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

1/sigma * expectation(lambda: eps = sample(N(0, I)); (f(theta

+ sigma eps) - f(theta - sigma eps))/2 eps)

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

f_sigma(theta) == expectation(lambda: eps = sample(N(0, I));

f(theta + sigma eps))

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

tri * f_sigma == 1/sigma * expectation(lambda: eps = sample(N(0,

I)); (f(theta + sigma eps) * eps))

Introduction

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

1/sigma * expectation(lambda: eps = sample(N(0, I)); (f(theta

+ sigma eps) - f(theta - sigma eps))/2 eps)

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

f_sigma(theta) == expectation(lambda: eps = sample(N(0, I));

f(theta + sigma eps))

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

tri * f_sigma == 1/sigma * expectation(lambda: eps = sample(N(0,

I)); (f(theta + sigma eps) * eps))

Introduction

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

1/sigma * expectation(lambda: eps = sample(N(0, I)); (f(theta

+ sigma eps) - f(theta - sigma eps))/2 eps)

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

f_sigma(theta) == expectation(lambda: eps = sample(N(0, I));

f(theta + sigma eps))

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

tri * f_sigma == 1/sigma * expectation(lambda: eps = sample(N(0,

I)); (f(theta + sigma eps) * eps))

Introduction

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

1/sigma * expectation(lambda: eps = sample(N(0, I)); (f(theta

+ sigma eps) - f(theta - sigma eps))/2 eps)

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

f_sigma(theta) == expectation(lambda: eps = sample(N(0, I));

f(theta + sigma eps))

Gradient Estimation

$$$$def expectation(l):

N=10000

return mean(l() for _ in range(N))

tri * f_sigma == 1/sigma * expectation(lambda: eps = sample(N(0,

I)); (f(theta + sigma eps) * eps))