Discrete Gaussian Samplers over the Integers

This class realizes oracles which returns integers proportionally to \(\exp(-(x-c)^2/(2σ^2))\). All oracles are implemented using rejection sampling. See DiscreteGaussianDistributionIntegerSampler.__init__() for which algorithms are available.

AUTHORS:

  • Martin Albrecht (2014-06-28): initial version

EXAMPLES:

We construct a sampler for the distribution \(D_{3,c}\) with width \(σ=3\) and center \(c=0\):

sage: from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
sage: sigma = 3.0
sage: D = DiscreteGaussianDistributionIntegerSampler(sigma=sigma)
>>> from sage.all import *
>>> from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
>>> sigma = RealNumber('3.0')
>>> D = DiscreteGaussianDistributionIntegerSampler(sigma=sigma)

We ask for 100000 samples:

sage: from collections import defaultdict
sage: counter = defaultdict(Integer)
sage: n = 0
sage: def add_samples(i):
....:     global counter, n
....:     for _ in range(i):
....:         counter[D()] += 1
....:         n += 1

sage: add_samples(100000)
>>> from sage.all import *
>>> from collections import defaultdict
>>> counter = defaultdict(Integer)
>>> n = Integer(0)
>>> def add_samples(i):
...     global counter, n
...     for _ in range(i):
...         counter[D()] += Integer(1)
...         n += Integer(1)

>>> add_samples(Integer(100000))

These are sampled with a probability proportional to \(\exp(-x^2/18)\). More precisely we have to normalise by dividing by the overall probability over all integers. We use the fact that hitting anything more than 6 standard deviations away is very unlikely and compute:

sage: bound = (6*sigma).floor()
sage: norm_factor = sum([exp(-x^2/(2*sigma^2)) for x in range(-bound,bound+1)])
sage: norm_factor
7.519...
>>> from sage.all import *
>>> bound = (Integer(6)*sigma).floor()
>>> norm_factor = sum([exp(-x**Integer(2)/(Integer(2)*sigma**Integer(2))) for x in range(-bound,bound+Integer(1))])
>>> norm_factor
7.519...

With this normalisation factor, we can now test if our samples follow the expected distribution:

sage: expected = lambda x : ZZ(round(n*exp(-x^2/(2*sigma^2))/norm_factor))
sage: observed = lambda x : counter[x]

sage: add_samples(10000)
sage: while abs(observed(0)*1.0/expected(0)   - 1.0) > 5e-2: add_samples(10000)
sage: while abs(observed(4)*1.0/expected(4)   - 1.0) > 5e-2: add_samples(10000)
sage: while abs(observed(-10)*1.0/expected(-10) - 1.0) > 5e-2: add_samples(10000)  # long time
>>> from sage.all import *
>>> expected = lambda x : ZZ(round(n*exp(-x**Integer(2)/(Integer(2)*sigma**Integer(2)))/norm_factor))
>>> observed = lambda x : counter[x]

>>> add_samples(Integer(10000))
>>> while abs(observed(Integer(0))*RealNumber('1.0')/expected(Integer(0))   - RealNumber('1.0')) > RealNumber('5e-2'): add_samples(Integer(10000))
>>> while abs(observed(Integer(4))*RealNumber('1.0')/expected(Integer(4))   - RealNumber('1.0')) > RealNumber('5e-2'): add_samples(Integer(10000))
>>> while abs(observed(-Integer(10))*RealNumber('1.0')/expected(-Integer(10)) - RealNumber('1.0')) > RealNumber('5e-2'): add_samples(Integer(10000))  # long time

We construct an instance with a larger width:

sage: from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
sage: sigma = 127
sage: D = DiscreteGaussianDistributionIntegerSampler(sigma=sigma, algorithm='uniform+online')
>>> from sage.all import *
>>> from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
>>> sigma = Integer(127)
>>> D = DiscreteGaussianDistributionIntegerSampler(sigma=sigma, algorithm='uniform+online')

ask for 100000 samples:

sage: from collections import defaultdict
sage: counter = defaultdict(Integer)
sage: n = 0
sage: def add_samples(i):
....:     global counter, n
....:     for _ in range(i):
....:         counter[D()] += 1
....:         n += 1

sage: add_samples(100000)
>>> from sage.all import *
>>> from collections import defaultdict
>>> counter = defaultdict(Integer)
>>> n = Integer(0)
>>> def add_samples(i):
...     global counter, n
...     for _ in range(i):
...         counter[D()] += Integer(1)
...         n += Integer(1)

>>> add_samples(Integer(100000))

and check if the proportions fit:

sage: expected = lambda x, y: (
....:     exp(-x^2/(2*sigma^2))/exp(-y^2/(2*sigma^2)).n())
sage: observed = lambda x, y: float(counter[x])/counter[y]

sage: while not all(v in counter for v in (0, 1, -100)): add_samples(10000)

sage: while abs(expected(0, 1) - observed(0, 1)) > 2e-1: add_samples(10000)
sage: while abs(expected(0, -100) - observed(0, -100)) > 2e-1: add_samples(10000)
>>> from sage.all import *
>>> expected = lambda x, y: (
...     exp(-x**Integer(2)/(Integer(2)*sigma**Integer(2)))/exp(-y**Integer(2)/(Integer(2)*sigma**Integer(2))).n())
>>> observed = lambda x, y: float(counter[x])/counter[y]

>>> while not all(v in counter for v in (Integer(0), Integer(1), -Integer(100))): add_samples(Integer(10000))

>>> while abs(expected(Integer(0), Integer(1)) - observed(Integer(0), Integer(1))) > RealNumber('2e-1'): add_samples(Integer(10000))
>>> while abs(expected(Integer(0), -Integer(100)) - observed(Integer(0), -Integer(100))) > RealNumber('2e-1'): add_samples(Integer(10000))

We construct a sampler with \(c\%1 != 0\):

sage: from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
sage: sigma = 3
sage: D = DiscreteGaussianDistributionIntegerSampler(sigma=sigma, c=1/2)
sage: s = 0
sage: n = 0
sage: def add_samples(i):
....:     global s, n
....:     for _ in range(i):
....:         s += D()
....:         n += 1
....:
sage: add_samples(100000)
sage: while abs(float(s)/n - 0.5) > 5e-2: add_samples(10000)
>>> from sage.all import *
>>> from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
>>> sigma = Integer(3)
>>> D = DiscreteGaussianDistributionIntegerSampler(sigma=sigma, c=Integer(1)/Integer(2))
>>> s = Integer(0)
>>> n = Integer(0)
>>> def add_samples(i):
...     global s, n
...     for _ in range(i):
...         s += D()
...         n += Integer(1)
....:
>>> add_samples(Integer(100000))
>>> while abs(float(s)/n - RealNumber('0.5')) > RealNumber('5e-2'): add_samples(Integer(10000))

REFERENCES:

class sage.stats.distributions.discrete_gaussian_integer.DiscreteGaussianDistributionIntegerSampler[source]

Bases: SageObject

A Discrete Gaussian Sampler using rejection sampling.

__init__(sigma, c=0, tau=6, algorithm=None, precision='mp')[source]

Construct a new sampler for a discrete Gaussian distribution.

INPUT:

  • sigma – samples \(x\) are accepted with probability proportional to \(\exp(-(x-c)²/(2σ²))\)

  • c – the mean of the distribution. The value of c does not have to be an integer. However, some algorithms only support integer-valued c (default: 0)

  • tau – samples outside the range \((⌊c⌉-⌈στ⌉,...,⌊c⌉+⌈στ⌉)\) are considered to have probability zero. This bound applies to algorithms which sample from the uniform distribution (default: 6)

  • algorithm – see list below (default: 'uniform+table' for

    \(σt\) bounded by DiscreteGaussianDistributionIntegerSampler.table_cutoff and 'uniform+online' for bigger \(στ\))

  • precision – either 'mp' for multi-precision where the actual precision used is taken from sigma or 'dp' for double precision. In the latter case results are not reproducible. (default: 'mp')

ALGORITHMS:

  • 'uniform+table' – classical rejection sampling, sampling from the uniform distribution and accepted with probability proportional to \(\exp(-(x-c)²/(2σ²))\) where \(\exp(-(x-c)²/(2σ²))\) is precomputed and stored in a table. Any real-valued \(c\) is supported.

  • 'uniform+logtable' – samples are drawn from a uniform distribution and accepted with probability proportional to \(\exp(-(x-c)²/(2σ²))\) where \(\exp(-(x-c)²/(2σ²))\) is computed using logarithmically many calls to Bernoulli distributions. See [DDLL2013] for details. Only integer-valued \(c\) are supported.

  • 'uniform+online' – samples are drawn from a uniform distribution and accepted with probability proportional to \(\exp(-(x-c)²/(2σ²))\) where \(\exp(-(x-c)²/(2σ²))\) is computed in each invocation. Typically this is very slow. See [DDLL2013] for details. Any real-valued \(c\) is accepted.

  • 'sigma2+logtable' – samples are drawn from an easily samplable distribution with \(σ = k·σ_2\) with \(σ_2 = \sqrt{1/(2\log 2)}\) and accepted with probability proportional to \(\exp(-(x-c)²/(2σ²))\) where \(\exp(-(x-c)²/(2σ²))\) is computed using logarithmically many calls to Bernoulli distributions (but no calls to \(\exp\)). See [DDLL2013] for details. Note that this sampler adjusts \(σ\) to match \(k·σ_2\) for some integer \(k\). Only integer-valued \(c\) are supported.

EXAMPLES:

sage: from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
sage: DiscreteGaussianDistributionIntegerSampler(3.0, algorithm='uniform+online')
Discrete Gaussian sampler over the Integers with sigma = 3.000000 and c = 0.000000
sage: DiscreteGaussianDistributionIntegerSampler(3.0, algorithm='uniform+table')
Discrete Gaussian sampler over the Integers with sigma = 3.000000 and c = 0.000000
sage: DiscreteGaussianDistributionIntegerSampler(3.0, algorithm='uniform+logtable')
Discrete Gaussian sampler over the Integers with sigma = 3.000000 and c = 0.000000
>>> from sage.all import *
>>> from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
>>> DiscreteGaussianDistributionIntegerSampler(RealNumber('3.0'), algorithm='uniform+online')
Discrete Gaussian sampler over the Integers with sigma = 3.000000 and c = 0.000000
>>> DiscreteGaussianDistributionIntegerSampler(RealNumber('3.0'), algorithm='uniform+table')
Discrete Gaussian sampler over the Integers with sigma = 3.000000 and c = 0.000000
>>> DiscreteGaussianDistributionIntegerSampler(RealNumber('3.0'), algorithm='uniform+logtable')
Discrete Gaussian sampler over the Integers with sigma = 3.000000 and c = 0.000000

Note that 'sigma2+logtable' adjusts \(σ\):

sage: DiscreteGaussianDistributionIntegerSampler(3.0, algorithm='sigma2+logtable')
Discrete Gaussian sampler over the Integers with sigma = 3.397287 and c = 0.000000
>>> from sage.all import *
>>> DiscreteGaussianDistributionIntegerSampler(RealNumber('3.0'), algorithm='sigma2+logtable')
Discrete Gaussian sampler over the Integers with sigma = 3.397287 and c = 0.000000
__call__()[source]

Return a new sample.

EXAMPLES:

sage: from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
sage: DiscreteGaussianDistributionIntegerSampler(3.0, algorithm='uniform+online')()  # random
-3
sage: DiscreteGaussianDistributionIntegerSampler(3.0, algorithm='uniform+table')()  # random
3
>>> from sage.all import *
>>> from sage.stats.distributions.discrete_gaussian_integer import DiscreteGaussianDistributionIntegerSampler
>>> DiscreteGaussianDistributionIntegerSampler(RealNumber('3.0'), algorithm='uniform+online')()  # random
-3
>>> DiscreteGaussianDistributionIntegerSampler(RealNumber('3.0'), algorithm='uniform+table')()  # random
3
algorithm[source]
c[source]
sigma[source]
table_cutoff = 1000000
tau[source]