Support for Long short-term memory cells. More...

#include <poplar/Tensor.hpp>
#include <poplin/MatMul.hpp>
#include <popnn/LstmDef.hpp>
#include <popnn/NonLinearityDef.hpp>
#include <popnn/Rnn.hpp>

Classes
struct	popnn::lstm::LstmParams
	Structure representing the parameters of the LSTM. More...

struct	popnn::lstm::LstmState
	Structure holding the state of a LSTM cell, or the gradients for the state (depending on the context). More...

struct	popnn::lstm::LstmWeights
	Structure holding all the parameters of an LSTM cell, or the deltas for those parameters (depending on the context). More...

Namespaces
namespace	popnn
	Functions used in neural networks.

Functions
const std::vector< BasicLstmCellUnit >	popnn::lstm::getDefaultBasicLstmCellOrder ()
	Get the default order of the gates in a basic LSTM cell. More...

std::vector< std::pair< poplin::MatMulParams, poplar::OptionFlags > >	popnn::lstm::getMatMulPrePlanParameters (LstmParams params, poplar::OptionFlags opts)
	Predict what matrix multiplications will be needed for the given parameters and return a list of corresponding matmul parameters and options.

poplar::Tensor	popnn::lstm::createInput (poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Create an input tensor of shape [`numSteps`, `batchSize`, `inputSize`] that is optimally mapped to multiply the whole input sequence in a single matrix multiply operation. More...

poplar::Tensor	popnn::lstm::createInitialOutput (poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Create the initial output that can be combined with the initial cell state using an LstmState. More...

poplar::Tensor	popnn::lstm::createInitialCellState (poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Create the initial cell state that can be combined with the initial output using an LstmState. More...

LstmState	popnn::lstm::createInitialState (poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Creates the initial state (both output and cell state) that is fed into the LSTM cell at the first timestep. More...

void	popnn::lstm::zeroInitialState (poplar::Graph &graph, const LstmState &initialState, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
	Initialise the forward state of an LSTM with zeros. More...

std::pair< poplar::Tensor, poplar::Tensor >	popnn::lstm::createWeightsKernel (poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Create the weights kernel used to weight the input of an LSTM. More...

poplar::Tensor	popnn::lstm::createWeightsBiases (poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Create the weights biases.

LstmWeights	popnn::lstm::createWeights (poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Create the weights (both kernel and biases) used to weight the input of an LSTM.

std::pair< poplar::Tensor, poplar::Tensor >	popnn::lstm::lstmFwd (poplar::Graph &graph, const LstmParams &params, const LstmState &stateInit, const poplar::Tensor &in, const LstmWeights &weights, poplar::Tensor intermediates, poplar::program::Sequence &fwdProg, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache planningCache=nullptr)
	Calculate the result of applying an LSTM across a sequence. More...

LstmState	popnn::lstm::lstmBwd (poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, const LstmState lastStepStateGrad, poplar::Tensor inputGrad, poplar::Tensor bwdIntermediates, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache planningCache=nullptr)
	Run LSTM backward pass. More...

LstmState	popnn::lstm::lstmBwd (poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, const poplar::Tensor lastCellStateGrad, poplar::Tensor inputGrad, poplar::Tensor bwdIntermediates, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache planningCache=nullptr)

LstmWeights	popnn::lstm::lstmWU (poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const poplar::Tensor &bwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Run a standalone weight update pass. More...

LstmState	popnn::lstm::lstmBwdWithWU (poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, const LstmState lastStepStateGrad, poplar::Tensor inputGrad, LstmWeights &weightsGrad, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
	Run a combined LSTM backward and weight update pass. More...

LstmState	popnn::lstm::lstmBwdWithWU (poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, const poplar::Tensor lastCellStateGrad, poplar::Tensor inputGrad, LstmWeights &weightsGrad, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)

Detailed Description

Support for Long short-term memory cells.

Function Documentation

◆ createInitialCellState()

poplar::Tensor popnn::lstm::createInitialCellState	(	poplar::Graph &	graph,
		const LstmParams &	params,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Create the initial cell state that can be combined with the initial output using an LstmState.

This then can be fed into the LSTM cell at the first timestep.

Parameters

graph	Graph to which the LSTM cell belongs.
params	The LSTM parameters.
debugContext	Debug information.
options	Any implementation/debug options for the LSTM. See createInput().
planningCache	A poplin matrix multiply planning cache.

Returns: A tensor which is the cell state for the forward operation of the LSTM cell.

◆ createInitialOutput()

poplar::Tensor popnn::lstm::createInitialOutput	(	poplar::Graph &	graph,
		const LstmParams &	params,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Create the initial output that can be combined with the initial cell state using an LstmState.

This then can be fed into the LSTM cell at the first timestep.

Parameters

graph	Graph to which the LSTM cell belongs.
params	The LSTM parameters.
debugContext	Debug information.
options	Any implementation/debug options for the LSTM. See createInput().
planningCache	A poplin matrix multiply planning cache.

Returns: A tensor which is the cell state for the forward operation of the LSTM cell.

◆ createInitialState()

LstmState popnn::lstm::createInitialState	(	poplar::Graph &	graph,
		const LstmParams &	params,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Creates the initial state (both output and cell state) that is fed into the LSTM cell at the first timestep.

It can be initialised by writing the appropriate member or using zeroInitialState().

Parameters

graph	Graph to which the LSTM cell belongs.
params	The LSTM parameters.
debugContext	Debug information.
options	Any implementation/debug options for the LSTM. See createInput().
planningCache	A poplin matrix multiply planning cache.

Returns: A tensor which is the state for the forward operation of the LSTM cell.

◆ createInput()

poplar::Tensor popnn::lstm::createInput	(	poplar::Graph &	graph,
		const LstmParams &	params,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Create an input tensor of shape [numSteps, batchSize, inputSize] that is optimally mapped to multiply the whole input sequence in a single matrix multiply operation.

LSTM options

availableMemoryProportion Decimal between 0 and 1 (inclusive)

See poplin::createWeights().
inferenceOnly (true, false) [=false]

Sets convolution pass to INFERENCE_FWD if true; TRAINING_FWD otherwise. See the pass option in poplin::createWeights().
partialsType (half, float) [=float]

See poplin::createWeights().
weightAccumulatorsType (half, float) [=data type of lstm]

Data type of the weight accumulators for the LSTM's weight matrices and biases
preCalcWeights (true, false) [=false]

If true, use one big matrix multiply before the recurrent calculation to perform the part of the calculation that only depends on the input sequence.
recomputationMode (none, cellAndTanh, full) [=none]
- none: No recomputation is done in the backwards pass.
- cellAndTanh: A small amount of recomputation is done in the backwards pass, yielding some reduction in memory footprint for the layer.
- full: Recompute everything from the forward pass. Saves the most memory at the cost of an extra forward pass of cycles.

Parameters

graph	Graph to which the LSTM cell belongs.
params	The LSTM parameters.
debugContext	Debug information.
options	Any implementation/debug options for the LSTM.
planningCache	A poplin matrix multiply planning cache.

Returns: A tensor created in the graph, of shape [timeSteps, batchSize, inputSize].

◆ createWeightsKernel()

std::pair< poplar::Tensor, poplar::Tensor > popnn::lstm::createWeightsKernel	(	poplar::Graph &	graph,
		const LstmParams &	params,
		const poplar::DebugContext &	debugContext,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Create the weights kernel used to weight the input of an LSTM.

Returns the inputWeights and outputWeights.

◆ getDefaultBasicLstmCellOrder()

const std::vector< BasicLstmCellUnit > popnn::lstm::getDefaultBasicLstmCellOrder ( )

Get the default order of the gates in a basic LSTM cell.

The default order is: [Forget gate, Input gate, Candidate, Output Gate].

◆ lstmBwd() [1/2]

LstmState popnn::lstm::lstmBwd	(	poplar::Graph &	graph,
		const LstmParams &	params,
		poplar::program::Sequence &	prog,
		const LstmState &	fwdStateInit,
		const poplar::Tensor &	fwdIntermediates,
		const LstmWeights &	weights,
		const poplar::Tensor &	input,
		const poplar::Tensor &	output,
		const poplar::Tensor &	outputGrad,
		const LstmState *	lastStepStateGrad,
		poplar::Tensor *	inputGrad,
		poplar::Tensor *	bwdIntermediates,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Run LSTM backward pass.

The backward pass executes in reverse order as compared to the forward pass. If the forward steps for a LSTM layer are sf = {0, 1, 2, ..., S - 1} then the backward steps run for sb = {S - 1, S - 2, .... , 1, 0}.

Note 1: If the time step limit is variable, the entries above the given time step limit must be explicitly set to zero in fwdIntermediates, in order for the weights to be correctly updated. Note 2: if the time limit is variable, the initialising state gradients at the last time step are internally zero initialised and the lastStepStateGrad parameter is ignored.

Parameters

	graph	Graph to which the LSTM cell belongs.
	params	The parameters of the LSTM.
	prog	Program sequence.
	fwdStateInit	Forward state tensor for initial step.
	fwdIntermediates	Intermediates results from the forward pass.
	weights	The LSTM weights structure.
	input	The input tensor to the LSTM of shape: [timesteps, batch, inputSize].
	output	The output tensor from the forward pass. Depending on the outputFullSequence parameter this is either the output for the last timestep or it is a sequence of outputs for each timestep.
	outputGrad	The gradients of the output. Depending on the `outputFullSequence` parameter this is either the gradient of the output for the last timestep or it is a sequence output gradients for each timestep.
	*lastStepStateGrad	The gradient of the state at the last step. May be null if state gradient is to be zero initialised.
[out]	*inputSeqGrad	The gradients of the inputs - may be null if this information is not required.
[out]	*bwdIntermediates	Intermediates gradients that are retained in the backward pass of training for use in the weight update. This argument should be set to null if you do not need to calculate weight deltas.
	debugContext	Optional debug information.
	options	LSTM implementation options. See createInput().
	planningCache	The matmul planning cache.

Returns: The gradient of the initial state.

◆ lstmBwd() [2/2]

LstmState popnn::lstm::lstmBwd	(	poplar::Graph &	graph,
		const LstmParams &	params,
		poplar::program::Sequence &	prog,
		const LstmState &	fwdStateInit,
		const poplar::Tensor &	fwdIntermediates,
		const LstmWeights &	weights,
		const poplar::Tensor &	input,
		const poplar::Tensor &	output,
		const poplar::Tensor &	outputGrad,
		const poplar::Tensor *	lastCellStateGrad,
		poplar::Tensor *	inputGrad,
		poplar::Tensor *	bwdIntermediates,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Deprecated:: deprecated Use previously defined popnn::lstmBwd() instead.

Run LSTM backward pass. The backward pass executes in reverse order as compared to the forward pass. If the forward steps for a LSTM layer are sf = {0, 1, 2, ..., S - 1} then the backward steps run for sb = {S - 1, S - 2, .... , 1, 0}.

Note 1: If the time step limit is variable, the entries above the given time step limit must be explicitly set to zero in fwdIntermediates, in order for the weights to be correctly updated. Note 2: if the time limit is variable, the initialising cell state gradient at the last time step is internally zero initialised and the lastCellStateGrad parameter is ignored.

Parameters

	graph	Graph to which the LSTM cell belongs.
	params	The parameters of the LSTM.
	prog	Program sequence.
	fwdStateInit	Forward state tensor for initial step.
	fwdIntermediates	Intermediates results from the forward pass.
	weights	The LSTM weights structure.
	input	The input tensor to the LSTM, of shape [`timeSteps`, `batchSize`, `inputSize`].
	output	The output tensor from the forward pass. Depending on the `outputFullSequence` parameter, this is either the output for the last timestep or it is a sequence of outputs for each timestep.
	outputGrad	The gradients of the output. Depending on the `outputFullSequence` parameter, this is either the gradient of the output for the last timestep or it is a sequence output gradients for each timestep.
	lastCellStateGrad	The gradient of the last cell state - may be null if there is no incoming gradient.
[out]	*inputSeqGrad	The gradients of the inputs - may be null if this information is not required.
[out]	*bwdIntermediates	Intermediates gradients that are retained in the backward pass of training for use in the weight update. This argument should be set to null if you do not need to calculate weight deltas.
	debugContext	Optional debug information.
	options	LSTM implementation options. See createInput().
	planningCache	The matmul planning cache.

Returns: The gradient of the initial state.

◆ lstmBwdWithWU() [1/2]

LstmState popnn::lstm::lstmBwdWithWU	(	poplar::Graph &	graph,
		const LstmParams &	params,
		poplar::program::Sequence &	prog,
		const LstmState &	fwdStateInit,
		const poplar::Tensor &	fwdIntermediates,
		const LstmWeights &	weights,
		const poplar::Tensor &	input,
		const poplar::Tensor &	output,
		const poplar::Tensor &	outputGrad,
		const LstmState *	lastStepStateGrad,
		poplar::Tensor *	inputGrad,
		LstmWeights &	weightsGrad,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Run a combined LSTM backward and weight update pass.

Use this combined backward and weight update pass in preference to lstmBwd() and lstmWU() separately, in order to allow the most efficient implementation to be chosen if you do not need to split the operation.

Note 1: If the time step limit is variable, the entries above the given time step limit must be explicitly set to zero in fwdIntermediates, in order for the weights to be correctly updated. Note 2: if the time limit is variable, the initialising state gradients at the last time step are internally zero initialised and the lastStepStateGrad parameter is ignored.

Parameters

	graph	Graph to which the LSTM cell belongs.
	params	The parameters of the LSTM.
	prog	Program sequence.
	fwdStateInit	Forward state tensor for initial step.
	fwdIntermediates	Intermediates results from the forward pass.
	weights	The LSTM weights structure.
	input	The input tensor to the LSTM of shape: [timesteps, batch, inputSize].
	output	The output tensor from the forward pass. Depending on the outputFullSequence parameter this is either the output for the last timestep or it is a sequence of outputs for each timestep.
	outputGrad	The gradients of the output. Depending on the `outputFullSequence` parameter this is either the gradient of the output for the last timestep or it is a sequence output gradients for each timestep.
	*lastStepStateGrad	The gradient of the state at the last step. May be null if state gradient is to be zero initialised.
[out]	*inputSeqGrad	The gradients of the inputs. May be null if this information is not required.
	weightsGrad	A set of weight deltas to sum with weights.
	debugContext	Optional debug information.
	options	LSTM implementation options. See createInput().
	planningCache	The matmul planning cache.

Returns: The gradient of the initial state.

◆ lstmBwdWithWU() [2/2]

LstmState popnn::lstm::lstmBwdWithWU	(	poplar::Graph &	graph,
		const LstmParams &	params,
		poplar::program::Sequence &	prog,
		const LstmState &	fwdStateInit,
		const poplar::Tensor &	fwdIntermediates,
		const LstmWeights &	weights,
		const poplar::Tensor &	input,
		const poplar::Tensor &	output,
		const poplar::Tensor &	outputGrad,
		const poplar::Tensor *	lastCellStateGrad,
		poplar::Tensor *	inputGrad,
		LstmWeights &	weightsGrad,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Deprecated:: deprecated Use previously defined popnn::lstmBwdWithWU() instead.

Run a combined LSTM backward and weight update pass. Use this combined backward and weight update pass in preference to lstmBwd and lstmWU separately in order to allow the most efficient implementation to be chosen if you do not need to split the operation.

Note 1: If the time step limit is variable, the entries above the given time step limit must be explicitly set to zero in fwdIntermediates, in order for the weights to be correctly updated. Note 2: if the time limit is variable, the initialising cell state gradient at the last time step is internally zero initialised and the lastCellStateGrad parameter is ignored.

Parameters

	graph	Graph to which the LSTM cell belongs.
	params	The parameters of the LSTM.
	prog	Program sequence.
	fwdStateInit	Forward state tensor for initial step.
	fwdIntermediates	Intermediates results from the forward pass.
	weights	The LSTM weights structure.
	input	The input tensor to the LSTM, of shape [`timeSteps`, `batchSize`, `inputSize`].
	output	The output tensor from the forward pass. Depending on the `outputFullSequence` parameter, this is either the output for the last timestep or it is a sequence of outputs for each timestep.
	outputGrad	The gradients of the output. Depending on the `outputFullSequence` parameter, this is either the gradient of the output for the last timestep or it is a sequence output gradients for each timestep.
	lastCellStateGrad	The gradient of the last cell state - may be null if there is no incoming gradient.
[out]	*inputSeqGrad	The gradients of the inputs. May be null if this information is not required.
	weightsGrad	A set of weight deltas to sum with weights.
	debugContext	Optional debug information.
	options	LSTM implementation options. See createInput().
	planningCache	The matmul planning cache.

Returns: The gradient of the initial state.

◆ lstmFwd()

std::pair< poplar::Tensor, poplar::Tensor > popnn::lstm::lstmFwd	(	poplar::Graph &	graph,
		const LstmParams &	params,
		const LstmState &	stateInit,
		const poplar::Tensor &	in,
		const LstmWeights &	weights,
		poplar::Tensor *	intermediates,
		poplar::program::Sequence &	fwdProg,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Calculate the result of applying an LSTM across a sequence.

The LSTM is run for rnn::RnnParams.maxTimeSteps, each with a batch of size batchSize, an input size of inputSize and output size of outputSize. The total number of units within each LSTM cell is lstmUnits = BASIC_LSTM_CELL_NUM_UNITS.

Parameters

	graph	Graph to which the LSTM cell belongs.
	params	The parameters of the LSTM.
	stateInit	Initial state for the LSTM.
	in	The input tensor to the LSTM, of shape [`timeSteps`, `batchSize`, `inputSize`].
	weights	The LSTM weights structure.
[out]	intermediates	Intermediate results that are retained in the the forward pass of training for use in the backward pass. This argument should be set to null if we are only doing inference.
	fwdProg	Program sequence.
	debugContext	Optional debug information.
	options	LSTM implementation options. See createInput().
	planningCache	The matmul planning cache.

Returns: The output of the LSTM and the final cell state. Depending on the outputFullSequence parameter, the output tensor is either the output of the last timestep in the shape [batchSize, outputSize] or it is the sequence of outputs for every timestep in the shape [timeSteps, batchSize, outputSize].

◆ lstmWU()

LstmWeights popnn::lstm::lstmWU	(	poplar::Graph &	graph,
		const LstmParams &	params,
		poplar::program::Sequence &	prog,
		const LstmState &	fwdStateInit,
		const poplar::Tensor &	fwdIntermediates,
		const poplar::Tensor &	bwdIntermediates,
		const LstmWeights &	weights,
		const poplar::Tensor &	input,
		const poplar::Tensor &	output,
		const poplar::DebugContext &	debugContext = `{}`,
		const poplar::OptionFlags &	options = `{}`,
		poplin::PlanningCache *	planningCache = `nullptr`
	)

Run a standalone weight update pass.

Takes intermediates and gradients from the backward pass and calculates and returns weight deltas.

Note: If the timestep limit is variable, the entries above the given time step limit must be explicitly set to zero in fwdIntermediates, in order for the weights to be correctly updated.

Parameters

graph	Graph to which the LSTM cell belongs.
params	The parameters of the LSTM.
prog	Program sequence to add operations to.
fwdStateInit	Forward state tensor for initial step.
fwdIntermediates	Intermediate results from the forward pass.
bwdIntermediates	Intermediate results from the backward pass.
weights	The LSTM weights structure.
input	The input tensor to the LSTM, of shape [`timeSteps`, `batchSize`, `inputSize`].
output	The output tensor from the forward pass. Depending on the `outputFullSequence` parameter, this is either the output for the last timestep or it is a sequence of outputs for each timestep.
debugContext	Optional debug information.
options	LSTM implementation options. See createInput().
planningCache	The matmul planning cache.

Returns: A set of weight gradients to sum with weights.

◆ zeroInitialState()

void popnn::lstm::zeroInitialState	(	poplar::Graph &	graph,
		const LstmState &	initialState,
		poplar::program::Sequence &	prog,
		const poplar::DebugContext &	debugContext = `{}`
	)

Initialise the forward state of an LSTM with zeros.

Parameters

graph	Graph to which the LSTM cell belongs.
initialState	The initial state to zero.
prog	The program to extend with the initialization code.
debugContext	Optional debug information.

Classes

Namespaces

Functions

Detailed Description

Function Documentation

◆ createInitialCellState()

◆ createInitialOutput()

◆ createInitialState()

◆ createInput()

◆ createWeightsKernel()

◆ getDefaultBasicLstmCellOrder()

◆ lstmBwd() [1/2]

◆ lstmBwd() [2/2]

◆ lstmBwdWithWU() [1/2]

◆ lstmBwdWithWU() [2/2]

◆ lstmFwd()

◆ lstmWU()

◆ zeroInitialState()