Source code for Tars.distributions.multiple

import theano
import theano.tensor as T

from ..utils import tolist


[docs]class Concatenate(object): """ This distribution is used to concatenate different distributions in their feature axis. Therefore, we can handle multiple distributions as one distribution when sampling from them or estimating their log-likelihood. However, every distribution must have same given variables. Samples ------- distributions : list Contain multiple distributions. Examples -------- >>> from Tars.distribution import Concatenate, Gaussian, Bernoulli >>> gauss = Gaussian(mean, var, given=[x]) >>> bernoulli = Bernoulli(mean, given=[x]) >>> concat = Concatenate([gauss, bernoulli]) """ def __init__(self, distributions): self.distributions = distributions self.outputs_dim = 0 for d in self.distributions: if self.distributions[0].given != d.given: raise ValueError("Every distribution must have same " "given variables") self.outputs_dim += d.get_output_shape()[-1] self.inputs = self.distributions[0].inputs
[docs] def get_params(self): params = [] for d in self.distributions: params += d.get_params() return params
[docs] def fprop(self, x, *args, **kwargs): """ Samples ------- x : list This contains Theano variables. The number of them must be same as 'distributions'. """ samples = [] for d in self.distributions: samples.append(d.fprop(x, *args, **kwargs)) return T.concatenate(samples, axis=-1)
[docs] def sample_given_x(self, x, srng, **kwargs): samples = [] for d in self.distributions: samples.append( d.sample_given_x(x, srng, **kwargs)[-1]) return [x, T.concatenate(samples, axis=-1)]
[docs] def sample_mean_given_x(self, x, *args, **kwargs): samples = [] for d in self.distributions: samples.append(d.sample_mean_given_x( x, *args, **kwargs)[-1]) return [x, T.concatenate(samples, axis=-1)]
[docs] def log_likelihood_given_x(self, samples, **kwargs): """ Samples ------- samples : list This contains 'x', which has Theano variables, and test sample. The dimension of test sample must be same as output_dim. Returns ------- Theano variable, shape (n_samples,) A log-likelihood, p(sample|x) """ x, sample = samples if sample.shape[-1] != self.outputs_dim: raise ValueError("The dimension of test sample must be same as " "output_dim.") loglikes = 0 start = 0 for d in enumerate(self.distributions): output_dim = d.get_output_shape()[-1] loglikes += d.log_likelihood_given_x( [x, sample[:, start:start + output_dim]], **kwargs) start += output_dim return loglikes
[docs]class MultiDistributions(object): """ This distribution is used to stack multiple distiributions, that is p(x|z) = p(x|z1)p(z1|z2)...p(zn-1|zn). If the distributions are approximate distributions, then a corresponding stacked distribution becomes like q(z|x) = q(zn|zn-1)...q(z2|z1)q(z1|x). If the stacked distribution is conditioned on y, then the corresponding mean field approximation becomes like p(x|z,y) = p(x|z1)p(z1|z2)...p(zn-1|zn,y), or q(z|x,y) = q(zn|zn-1)...q(z2|z1)q(z1|x,y). So far, each distribution except first layer cannot have conditioned variables more than two. Samples ------- distributions : list Contain multiple distributions. Examples -------- >>> from Tars.distribution import MultiDistributions >>> from Tars.distribution import Gaussian, Bernoulli >>> gauss = Gaussian(mean, var, given=[x]) >>> bernoulli = Bernoulli(mean, given=[z]) >>> multi = MultiDistributions([gauss, bernoulli]) """ def __init__(self, distributions, approximate=True): self.distributions = distributions self.given = self.distributions[0].given self.inputs = self.distributions[0].inputs self.output = self.distributions[-1].output self.get_input_shape = self.distributions[0].get_input_shape self.get_output_shape = self.distributions[-1].get_output_shape self.approximate = approximate self._set_theano_func() for i, d in enumerate(distributions[1:]): if len(d.given) != 1: raise ValueError("So far, each distribution except first " "layer cannot have conditioned variables " "more than two.") if distributions[i].get_output_shape() != d.given[0].shape: raise ValueError("An output's shape of a distribution must be " "same as an input's shape of the next layer " "distribution.")
[docs] def get_params(self): params = [] for d in self.distributions: params += d.get_params() return params
def _sample(self, x, layer_id, repeat=1, **kwargs): """ Paramaters ---------- x : list This contains Theano variables. Returns ------- list This contains 'x' and samples, such as [x,z1,...,zn-1]. """ samples = [[T.extra_ops.repeat(_x, repeat, axis=0) for _x in x]] for i, d in enumerate(self.distributions[:layer_id]): sample = d.sample_given_x( tolist(samples[i]), **kwargs) samples.append(sample[-1]) return samples def _sample_mean(self, x, layer_id, **kwargs): """ Paramaters ---------- x : list This contains Theano variables. Returns ------- list This contains 'x' and samples, such as [x,z1,...,zn-1]. """ samples = [x] for i, d in enumerate(self.distributions[:layer_id]): sample = d.sample_mean_given_x( tolist(samples[i]), **kwargs) samples.append(sample[-1]) return samples def _approx_sample(self, x, layer_id, repeat=1, **kwargs): """ Paramaters ---------- x : list This contains Theano variables. Returns ------- list This contains 'x' and samples, such as [x,z1,...,zn-1]. """ mean = x samples = [[T.extra_ops.repeat(_x, repeat, axis=0) for _x in x]] for d in self.distributions[:layer_id]: sample = d.sample_given_x( tolist(mean), repeat=repeat, **kwargs) samples.append(sample[-1]) mean = d.sample_mean_given_x( tolist(mean), **kwargs)[-1] return samples, mean
[docs] def fprop(self, x, layer_id=-1, *args, **kwargs): """ Paramaters ---------- x : list This contains Theano variables. Returns ------- mean : Theano variable The output of this distribution. """ if self.approximate: output = self._sample_mean(x, layer_id, **kwargs)[-1] else: output = self._sample(x, layer_id, **kwargs)[-1] mean = self.distributions[layer_id].fprop( tolist(output), *args, **kwargs) return mean
[docs] def sample_given_x(self, x, layer_id=-1, repeat=1, **kwargs): """ Paramaters -------- x : list This contains Theano variables, which must to correspond to 'given' of first layer distibution. repeat : int or thenao variable Returns -------- list This contains 'x' and samples, such as [x,z1,...,zn]. """ if self.approximate: samples, mean = self._approx_sample(x, layer_id, repeat=repeat, **kwargs) samples += self.distributions[layer_id].sample_given_x( tolist(mean), repeat=repeat, **kwargs)[-1:] else: samples = self._sample(x, layer_id, repeat=repeat, **kwargs) samples += self.distributions[layer_id].sample_given_x( tolist(samples[-1]), repeat=repeat, **kwargs)[-1:] return samples
[docs] def sample_mean_given_x(self, x, layer_id=-1, *args, **kwargs): """ Paramaters -------- x : list This contains Theano variables, which must to correspond to 'given'. Returns -------- list This contains 'x', samples, and a mean value of sample, such as [x,z1,...,zn_mean] """ if self.approximate: mean = self._sample_mean(x, layer_id, **kwargs) else: mean = self._sample(x, layer_id, **kwargs) mean += self.distributions[layer_id].sample_mean_given_x( tolist(mean[-1]), *args, **kwargs)[-1:] return mean
[docs] def log_likelihood_given_x(self, samples, **kwargs): """ Paramaters -------- samples : list This contains 'x', which has Theano variables, and test samples, such as z1, z2,...,zn. Returns -------- Theano variable, shape (n_samples,) log_likelihood (q) : log_q(z1|[x,y,...])+...+log_q(zn|zn-1) log_likelihood (p) : log_p(zn-1|[zn,y,...])+...+log_p(x|z1) """ all_log_likelihood = 0 for x, sample, d in zip(samples, samples[1:], self.distributions): log_likelihood = d.log_likelihood_given_x([tolist(x), sample], **kwargs) all_log_likelihood += log_likelihood return all_log_likelihood
def _set_theano_func(self): x = self.inputs samples = self.fprop(x, layer_id=-1, deterministic=True) self.np_fprop = theano.function(inputs=x, outputs=samples, on_unused_input='ignore') samples = self.sample_mean_given_x(x, layer_id=-1, deterministic=True) self.np_sample_mean_given_x = theano.function( inputs=x, outputs=samples[-1], on_unused_input='ignore') samples = self.sample_given_x(x, layer_id=-1, deterministic=True) self.np_sample_given_x = theano.function( inputs=x, outputs=samples[-1], on_unused_input='ignore')
[docs]class MultiPriorDistributions(MultiDistributions): """ p(z) = p(zn,z'n)p(zn-1|zn,z'n)...p(z1|z2). Samples ------- distributions : list Contain multiple distributions. Examples -------- >>> from Tars.distribution import MultiPriorDistributions >>> from Tars.distribution import Gaussian, Bernoulli >>> gauss = Gaussian(mean, var, given=[z2]) >>> bernoulli = Bernoulli(mean, given=[z1]) >>> multi = MultiPriorDistributions([gauss, bernoulli]) """ def __init__(self, distributions, prior=None): self.prior = tolist(prior) super(MultiPriorDistributions, self).__init__(distributions, approximate=False)
[docs] def log_likelihood_given_x(self, samples, add_prior=True, **kwargs): """ Paramaters -------- samples : list This contains 'x', which has Theano variables, and test samples, such as z1, z2,...,zn. Returns -------- Theano variable, shape (n_samples,) log_likelihood : add_prior=True : log_p(zn,z'n)+log_p(zn-1|zn,z'n)+...+log_p(z2|z1) add_prior=False : log_p(zn-1|zn,z'n)+...+log_p(z2|z1) """ all_log_likelihood = 0 for x, sample, d in zip(samples, samples[1:], self.distributions): log_likelihood = d.log_likelihood_given_x([tolist(x), sample], **kwargs) all_log_likelihood += log_likelihood if add_prior: for i, prior in enumerate(self.prior): prior_samples = samples[0][i] all_log_likelihood += prior.log_likelihood(prior_samples) return all_log_likelihood