Batch normalization operations. More...

#include "poplar/DebugContext.hpp"
#include "poplar/Program.hpp"
#include "poplar/Tensor.hpp"
#include "poplin/Norms.hpp"
#include <utility>

Namespaces
namespace	popnn
	Functions used in neural networks.

Functions
std::pair< poplar::Tensor, poplar::Tensor >	popnn::bn::batchNormStatistics (poplar::Graph &graph, const poplar::Tensor acts, float eps, poplar::program::Sequence &prog, bool unbiasedVarEstimate, bool stableAlgo=false, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Estimate mean and inverse of standard deviation of batched activations. More...

std::pair< poplar::Tensor, poplar::Tensor >	popnn::bn::distributedBatchNormStatistics (poplar::Graph &replicatedGraph, const poplar::Tensor acts, float eps, poplar::program::Sequence &prog, bool unbiasedVarEstimate, poplin::DistributedNormReduceCallback reduceCallback, unsigned normBatchSize, bool stableAlgo=false, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Compute the batch normalisation statistics for a part of the activations tensor. More...

poplar::Tensor	popnn::bn::batchNormWhiten (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Tensor &mean, const poplar::Tensor &invStdDev, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Whiten activations given the mean and standard deviation. More...

std::pair< poplar::Tensor, poplar::Tensor >	popnn::bn::batchNormalise (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Tensor &gamma, const poplar::Tensor &beta, const poplar::Tensor &mean, const poplar::Tensor &invStdDev, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Batch normalise the activations using the given mean, standard deviation and batch norm parameters. More...

poplar::Tensor	popnn::bn::batchNormalise (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Tensor &combinedMultiplicand, const poplar::Tensor &addend, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Computes the batch normalisation from a combined multiplicand and addend. More...

std::pair< poplar::Tensor, poplar::Tensor >	popnn::bn::batchNormParamGradients (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Tensor &gradsIn, const poplar::Tensor &mean, const poplar::Tensor &iStdDev, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Compute gradients with respect to parameters required for parameter update. More...

std::pair< poplar::Tensor, poplar::Tensor >	popnn::bn::batchNormParamGradients (poplar::Graph &graph, const poplar::Tensor &actsWhitened, const poplar::Tensor &gradsIn, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Compute gradients with respect to parameters required for parameter update. More...

poplar::Tensor	popnn::bn::batchNormGradients (poplar::Graph &graph, const poplar::Tensor &acts, const poplar::Tensor &gradsIn, const poplar::Tensor &mean, const poplar::Tensor &invStdDev, const poplar::Tensor &gamma, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Compute gradients with respect to input activations for the batch norm layer. More...

poplar::Tensor	popnn::bn::batchNormGradients (poplar::Graph &graph, const poplar::Tensor &actsWhitened, const poplar::Tensor &gradsIn, const poplar::Tensor &invStdDev, const poplar::Tensor &gamma, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Compute gradients with respect to input activations for the batch norm layer. More...

poplar::Tensor	popnn::bn::distributedBatchNormGradients (poplar::Graph &replicatedGraph, const poplar::Tensor &actsWhitened, const poplar::Tensor &gradsIn, const poplar::Tensor &invStdDev, const poplar::Tensor &gamma, poplar::program::Sequence &prog, poplin::DistributedNormReduceCallback reduceCallback, unsigned normBatchSize, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Propagate the gradients through the batch norm layer where equal-sized batch elements are distributed over replicas to effectively compute the batch norm over `normBatchSize` elements. More...

poplar::Tensor	popnn::bn::distributedBatchNormGradients (poplar::Graph &replicatedGraph, const poplar::Tensor &acts, const poplar::Tensor &gradsIn, const poplar::Tensor &mean, const poplar::Tensor &invStdDev, const poplar::Tensor &gamma, poplar::program::Sequence &prog, poplin::DistributedNormReduceCallback reduceCallback, unsigned normBatchSize, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Propagate the gradients through the batch norm layer where equal-sized batch elements are distributed over replicas to effectively compute the batch norm over `normBatchSize` elements. More...

void	popnn::bn::batchNormParamUpdate (poplar::Graph &graph, const poplar::Tensor &gammaDelta, const poplar::Tensor &betaDelta, float scale, poplar::Tensor &gamma, poplar::Tensor &beta, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Update the parameters for the batch norm layer. More...

void	popnn::bn::batchNormParamUpdate (poplar::Graph &graph, const poplar::Tensor &gammaDelta, const poplar::Tensor &betaDelta, const poplar::Tensor &scale, poplar::Tensor &gamma, poplar::Tensor &beta, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={})
	Update parameters for the batch norm layer. More...

Detailed Description

Batch normalization operations.

Function Documentation

◆ batchNormalise() [1/2]

poplar::Tensor popnn::bn::batchNormalise	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Tensor &	combinedMultiplicand,
		const poplar::Tensor &	addend,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Computes the batch normalisation from a combined multiplicand and addend.

Parameters

graph	The graph that the normalisation operation is added to.
acts	The input activations that will be normalised using the combined multiplicand and addend.
combinedMultiplicand	= gamma * invStdDev
addend	= beta - (gamma * mean * invStdDev)
prog	The program sequence to add the operation to.
debugContext	Optional debug information.
options	Batch normalisation options. Presently, there are no options that affect the operation of batch norm.

Returns: A new tensor with the normalised activations.

◆ batchNormalise() [2/2]

std::pair< poplar::Tensor, poplar::Tensor > popnn::bn::batchNormalise	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Tensor &	gamma,
		const poplar::Tensor &	beta,
		const poplar::Tensor &	mean,
		const poplar::Tensor &	invStdDev,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Batch normalise the activations using the given mean, standard deviation and batch norm parameters.

Parameters

graph	The graph that the normalisation operation is added to.
acts	The input activations to whiten and normalise, with shape `[B][C][..F..]` where: `B` is the batch size `C` is the number of channels `..F..` are the dimensions of an N-dimensional field.
gamma	The gamma weights to multiply by when normalising the whitened activations.
beta	The beta weights to add when normalising the whitened activations.
mean	The mean to subtract when whitening the activations.
invStdDev	The inverse standard deviation to multiply by when whitening the activations.
prog	The program sequence to add the operation to.
debugContext	Optional debug information.
options	Batch normalisation options. Presently, there are no options that affect the operation of batch norm.

Returns

Two tensors containing:

normalised activations
whitened activations

◆ batchNormGradients() [1/2]

poplar::Tensor popnn::bn::batchNormGradients	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	mean,
		const poplar::Tensor &	invStdDev,
		const poplar::Tensor &	gamma,
		poplar::program::Sequence &	prog,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Compute gradients with respect to input activations for the batch norm layer.

Gradients are propagated through the complete layer including statistics computation.

Parameters

graph	The graph that the normalisation operation is added to.
acts	The forward-pass activation inputs to this layer.
gradsIn	The gradient with respect to the output of this layer.
mean	The mean of the `acts` tensor, typically calculated using batchNormStatistics().
invStdDev	The inverse standard deviation of the `acts` tensor, typically calculated using batchNormStatistics().
gamma	The gamma weights to multiply by when normalising the whitened activations.
prog	The program sequence to add the operation to.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

Returns: A tensor containing the gradients with respect to the input activations for this layer.

◆ batchNormGradients() [2/2]

poplar::Tensor popnn::bn::batchNormGradients	(	poplar::Graph &	graph,
		const poplar::Tensor &	actsWhitened,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	invStdDev,
		const poplar::Tensor &	gamma,
		poplar::program::Sequence &	prog,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Compute gradients with respect to input activations for the batch norm layer.

Gradients are propagated through the complete layer including statistics computation.

Parameters

graph	The graph that the normalisation operation is added to.
actsWhitened	The forward-pass whitened activation inputs to this layer.
gradsIn	The gradient with respect to the output of this layer.
invStdDev	The inverse standard deviation to multiply by when whitening the activations.
gamma	The gamma weights to multiply by when normalising the whitened activations.
prog	The program sequence to add the operation to.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

Returns: A tensor containing the gradients with respect to the input activations for this layer.

◆ batchNormParamGradients() [1/2]

std::pair< poplar::Tensor, poplar::Tensor > popnn::bn::batchNormParamGradients	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	mean,
		const poplar::Tensor &	iStdDev,
		poplar::program::Sequence &	prog,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Compute gradients with respect to parameters required for parameter update.

Parameters

graph	The graph that the normalisation operation is added to.
acts	The forward-pass activation inputs to this layer.
gradsIn	The gradient with respect to the output of this layer.
mean	The mean of the `acts` tensor, typically calculated using batchNormStatistics().
iStdDev	The inverse standard deviation of the `acts` tensor, typically calculated using batchNormStatistics().
prog	The program sequence to add the operation to.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

Returns: A pair of tensors, gammaDelta and betaDelta which are the gradients with respect to gamma and beta.

◆ batchNormParamGradients() [2/2]

std::pair< poplar::Tensor, poplar::Tensor > popnn::bn::batchNormParamGradients	(	poplar::Graph &	graph,
		const poplar::Tensor &	actsWhitened,
		const poplar::Tensor &	gradsIn,
		poplar::program::Sequence &	prog,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Compute gradients with respect to parameters required for parameter update.

Parameters

graph	The graph that the normalisation operation is added to.
actsWhitened	The forward-pass whitened activation inputs to this layer.
gradsIn	The gradient with respect to the output of this layer.
prog	The program sequence to add the operation to.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

Returns: A pair of tensors, gammaDelta and betaDelta which are the gradients with respect to gamma and beta.

◆ batchNormParamUpdate() [1/2]

void popnn::bn::batchNormParamUpdate	(	poplar::Graph &	graph,
		const poplar::Tensor &	gammaDelta,
		const poplar::Tensor &	betaDelta,
		const poplar::Tensor &	scale,
		poplar::Tensor &	gamma,
		poplar::Tensor &	beta,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Update parameters for the batch norm layer.

Gradients are propagated through the complete layer including statistics computation.

The gamma and beta parameters are updated as follows:

gamma += gammaDelta * scale
beta += betaDelta * scale

scale is a tensor and therefore variable.

Parameters

graph	The graph that the normalisation operation is added to.
gammaDelta	Value used to update `gamma`.
betaDelta	Value used to update `beta`.
scale	Scale factor for `gammaDelta` and `betaDelta`.
gamma	The gamma weights to multiply by when normalising the activations.
beta	The beta weights to add when normalising the activations.
prog	The program sequence to add the operation to.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

◆ batchNormParamUpdate() [2/2]

void popnn::bn::batchNormParamUpdate	(	poplar::Graph &	graph,
		const poplar::Tensor &	gammaDelta,
		const poplar::Tensor &	betaDelta,
		float	scale,
		poplar::Tensor &	gamma,
		poplar::Tensor &	beta,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Update the parameters for the batch norm layer.

Gradients are propagated through the complete layer including statistics computation.

The gamma and beta parameters are updated as follows:

gamma += gammaDelta * scale
beta += betaDelta * scale

scale is a float and therefore constant.

Parameters

graph	The graph that the normalisation operation is added to.
gammaDelta	Value used to update `gamma`.
betaDelta	Value used to update `beta`.
scale	Scale factor for `gammaDelta` and `betaDelta`.
gamma	The gamma weights to multiply by when normalising the activations.
beta	The beta weights to add when normalising the activations.
prog	The program sequence to add the operation to.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

◆ batchNormStatistics()

std::pair< poplar::Tensor, poplar::Tensor > popnn::bn::batchNormStatistics	(	poplar::Graph &	graph,
		const poplar::Tensor	acts,
		float	eps,
		poplar::program::Sequence &	prog,
		bool	unbiasedVarEstimate,
		bool	stableAlgo = `false`,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Estimate mean and inverse of standard deviation of batched activations.

Parameters

graph	The graph that the normalisation operation is added to.
acts	The activations for which the mean and variance are estimated.
eps	The epsilon value added to the variance to avoid division by zero.
prog	The program sequence to add the operation to.
unbiasedVarEstimate	If true, an unbiased variance estimate will be computed.
stableAlgo	If true, computes the mean first then subtracts the activations from it before computing the variance. The implementation with this flag set to true is slower than when set to false.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

Returns: A vector pair with mean and inverse standard deviations.

◆ batchNormWhiten()

poplar::Tensor popnn::bn::batchNormWhiten	(	poplar::Graph &	graph,
		const poplar::Tensor &	acts,
		const poplar::Tensor &	mean,
		const poplar::Tensor &	invStdDev,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Whiten activations given the mean and standard deviation.

Parameters

graph	The graph that the normalisation operation is added to.
acts	The input activations that will be whitened.
mean	The previously calculated mean to subtract from the activations. Typically calculated using batchNormStatistics().
invStdDev	The previously calculated inverse standard deviation to multiply the activations by. Typically calculated using batchNormStatistics().
prog	The program sequence to add the operation to.
debugContext	Optional debug information.
options	Batch normalisation options. See batchNormalise().

Returns: A new tensor with the whitened activations.

◆ distributedBatchNormGradients() [1/2]

poplar::Tensor popnn::bn::distributedBatchNormGradients	(	poplar::Graph &	replicatedGraph,
		const poplar::Tensor &	acts,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	mean,
		const poplar::Tensor &	invStdDev,
		const poplar::Tensor &	gamma,
		poplar::program::Sequence &	prog,
		poplin::DistributedNormReduceCallback	reduceCallback,
		unsigned	normBatchSize,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Propagate the gradients through the batch norm layer where equal-sized batch elements are distributed over replicas to effectively compute the batch norm over normBatchSize elements.

Each replica gets the same number of batches (N) with normBatchSize = N * number-of-devices

A callback does the required reduction over the replicas the norm is spread on.

The input to the layer is the output gradients from the normalisation layer. The activations and the input gradients must have undergone a prior rearrangement such that the channel dimension has the same elements as invStdDev. The activations are whitened within the function by applying the mean and invStdDev.

Parameters

replicatedGraph	The replicated graph to which the normalisation operation is added.
acts	The forward-pass activation inputs to this layer.
gradsIn	The gradient with respect to the output of this layer.
mean	The mean of the `acts` tensor, typically calculated using batchNormStatistics().
invStdDev	The inverse standard deviation of the `acts` tensor, typically calculated using batchNormStatistics().
gamma	The gamma weights to multiply by when normalising the whitened activations.
prog	A program sequence that the code to perform the normalisation will be appended to.
reduceCallback	A callback to perform all-reduce of the statistics gradients.
normBatchSize	The batch size over which the norm is done.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
debugContext	Optional debug information.

Returns: A tensor containing the gradients with respect to the input activations for this layer.

◆ distributedBatchNormGradients() [2/2]

poplar::Tensor popnn::bn::distributedBatchNormGradients	(	poplar::Graph &	replicatedGraph,
		const poplar::Tensor &	actsWhitened,
		const poplar::Tensor &	gradsIn,
		const poplar::Tensor &	invStdDev,
		const poplar::Tensor &	gamma,
		poplar::program::Sequence &	prog,
		poplin::DistributedNormReduceCallback	reduceCallback,
		unsigned	normBatchSize,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Propagate the gradients through the batch norm layer where equal-sized batch elements are distributed over replicas to effectively compute the batch norm over normBatchSize elements.

Each replica gets the same number of batches (N) with normBatchSize = N * number-of-devices

A callback does the required reduction over the replicas the norm is spread over.

The input to the layer is the output gradients from the normalisation layer. The whitened activations and the input gradients must have undergone a prior rearrangement such that the channel dimension is the same as invStdDev.

Parameters

replicatedGraph	The replicated graph to which the normalisation operation is added.
actsWhitened	The forward-pass whitened activation inputs to this layer.
gradsIn	The gradient with respect to the output of this layer.
invStdDev	The inverse standard deviation of the `acts` tensor, typically calculated using batchNormStatistics().
gamma	The gamma weights to multiply by when normalising the whitened activations.
prog	A program sequence that the code to perform the normalisation will be appended to.
reduceCallback	A callback to perform all-reduce of the statistics gradients.
normBatchSize	The batch size over which the norm is done.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
debugContext	Optional debug information.

Returns: A tensor containing the gradients with respect to the input activations for this layer.

◆ distributedBatchNormStatistics()

std::pair< poplar::Tensor, poplar::Tensor > popnn::bn::distributedBatchNormStatistics	(	poplar::Graph &	replicatedGraph,
		const poplar::Tensor	acts,
		float	eps,
		poplar::program::Sequence &	prog,
		bool	unbiasedVarEstimate,
		poplin::DistributedNormReduceCallback	reduceCallback,
		unsigned	normBatchSize,
		bool	stableAlgo = `false`,
		const poplar::Type &	partialsType = `poplar::FLOAT`,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`
	)

Compute the batch normalisation statistics for a part of the activations tensor.

normBatchSize batch elements are distributed over multiple replicas. Each replica gets equal-sized batches (B). A callback does the required reduction over multiple replicas. The activations tensor is of shape [B][C][..F..]. The mean and inverse standard deviation are computed over dimensions {[B] [..F..]} and vectors of length C are returned as estimates.

Parameters

replicatedGraph	The replicated graph in which the computation is performed.
acts	The activation tensor with shape `[B][C][..F..]` where: `B` is the batch size `C` is the number of channels `..F..` are the dimensions of an N-dimensional field.
eps	The epsilon value added to the variance to avoid division by zero.
prog	A program sequence that the code to perform the normalisation will be appended to.
unbiasedVarEstimate	If true an unbiased variance estimate will be computed.
stableAlgo	If true, computes the mean first then subtracts the activations from it before computing the variance. The implementation with this flag set to true is slower than when set to false.
partialsType	Poplar type used for partials. If the type specified is smaller than the input/output type then `partialsType` is ignored and the input/output type is used instead.
allReduceCallback	Callback to perform all-reduce over `normBatchSize` batch elements.
normBatchSize	Number of batch elements over which statistics are estimated.
debugContext	Optional debug information.

Returns: A vector pair with mean and inverse standard deviation.

Namespaces

Functions

Detailed Description

Function Documentation

◆ batchNormalise() [1/2]

◆ batchNormalise() [2/2]

◆ batchNormGradients() [1/2]

◆ batchNormGradients() [2/2]

◆ batchNormParamGradients() [1/2]

◆ batchNormParamGradients() [2/2]

◆ batchNormParamUpdate() [1/2]

◆ batchNormParamUpdate() [2/2]

◆ batchNormStatistics()

◆ batchNormWhiten()

◆ distributedBatchNormGradients() [1/2]

◆ distributedBatchNormGradients() [2/2]

◆ distributedBatchNormStatistics()