latest/doxygen/Lstm_8hpp_source.html

// Copyright (c) 2017 Graphcore Ltd. All rights reserved.

#ifndef popnn_Lstm_hpp

#define popnn_Lstm_hpp


#include <poplar/Tensor.hpp>

#include <poplin/MatMul.hpp>

#include <popnn/LstmDef.hpp>

#include <popnn/NonLinearityDef.hpp>

#include <popnn/Rnn.hpp>


namespace popnn {

namespace lstm {


const std::vector<BasicLstmCellUnit> getDefaultBasicLstmCellOrder();


struct LstmParams {

  rnn::RnnParams rnn;


  poplar::Type dataType;

  std::size_t batchSize;

  std::size_t timeSteps;

  std::vector<std::size_t> layerSizes;

  bool outputFullSequence = true;

  bool doInputWeightCalc = true;

  bool calcInputGradients = true;

  bool preserveFinalState = false;

  std::vector<BasicLstmCellUnit> cellOrder = getDefaultBasicLstmCellOrder();

  NonLinearityType activation = NonLinearityType::TANH;

  NonLinearityType recurrentActivation = NonLinearityType::SIGMOID;


  LstmParams(poplar::Type dataType, std::size_t batchSize,

             std::size_t timeSteps, std::vector<std::size_t> layerSizes,

             NonLinearityType activation = NonLinearityType::TANH,

             NonLinearityType recurrentActivation = NonLinearityType::SIGMOID);


  LstmParams(poplar::Type dataType, std::size_t batchSize,

             std::size_t maxTimeSteps, const poplar::Tensor &timeSteps,

             std::vector<std::size_t> layerSizes,

             NonLinearityType activation = NonLinearityType::TANH,

             NonLinearityType recurrentActivation = NonLinearityType::SIGMOID);

};


struct LstmState {

  poplar::Tensor output;

  poplar::Tensor cellState;


  poplar::Tensor getAsTensor() const;

};


std::vector<std::pair<poplin::MatMulParams, poplar::OptionFlags>>

getMatMulPrePlanParameters(LstmParams params, poplar::OptionFlags opts);


uint64_t getBasicLstmCellFwdFlops(const LstmParams &params);


uint64_t getBasicLstmCellBwdFlops(const LstmParams &params);


uint64_t getBasicLstmCellWuFlops(const LstmParams &params);


poplar::Tensor createInput(poplar::Graph &graph, const LstmParams &params,

                           const poplar::DebugContext &debugContext,

                           const poplar::OptionFlags &options = {},

                           poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor

createInitialOutput(poplar::Graph &graph, const LstmParams &params,

                    const poplar::DebugContext &debugContext,

                    const poplar::OptionFlags &options = {},

                    poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor

createInitialCellState(poplar::Graph &graph, const LstmParams &params,

                       const poplar::DebugContext &debugContext,

                       const poplar::OptionFlags &options = {},

                       poplin::PlanningCache *planningCache = nullptr);


LstmState createInitialState(poplar::Graph &graph, const LstmParams &params,

                             const poplar::DebugContext &debugContext,

                             const poplar::OptionFlags &options = {},

                             poplin::PlanningCache *planningCache = nullptr);


void zeroInitialState(poplar::Graph &graph, const LstmState &initialState,

                      poplar::program::Sequence &prog,

                      const poplar::DebugContext &debugContext = {});


struct LstmWeights {

  poplar::Tensor inputWeights;

  poplar::Tensor outputWeights;

  poplar::Tensor biases;

};


std::pair<poplar::Tensor, poplar::Tensor>

createWeightsKernel(poplar::Graph &graph, const LstmParams &params,

                    const poplar::DebugContext &debugContext,

                    const poplar::OptionFlags &options = {},

                    poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor

createWeightsBiases(poplar::Graph &graph, const LstmParams &params,

                    const poplar::DebugContext &debugContext,

                    const poplar::OptionFlags &options = {},

                    poplin::PlanningCache *planningCache = nullptr);


LstmWeights createWeights(poplar::Graph &graph, const LstmParams &params,

                          const poplar::DebugContext &debugContext,

                          const poplar::OptionFlags &options = {},

                          poplin::PlanningCache *planningCache = nullptr);


std::pair<poplar::Tensor, poplar::Tensor>

lstmFwd(poplar::Graph &graph, const LstmParams &params,

        const LstmState &stateInit, const poplar::Tensor &in,

        const LstmWeights &weights, poplar::Tensor *intermediates,

        poplar::program::Sequence &fwdProg,

        const poplar::DebugContext &debugContext = {},

        const poplar::OptionFlags &options = {},

        poplin::PlanningCache *planningCache = nullptr);


LstmState

lstmBwd(poplar::Graph &graph, const LstmParams &params,

        poplar::program::Sequence &prog, const LstmState &fwdStateInit,

        const poplar::Tensor &fwdIntermediates, const LstmWeights &weights,

        const poplar::Tensor &input, const poplar::Tensor &output,

        const poplar::Tensor &outputGrad, const LstmState *lastStepStateGrad,

        poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates,

        const poplar::DebugContext &debugContext = {},

        const poplar::OptionFlags &options = {},

        poplin::PlanningCache *planningCache = nullptr);


LstmState lstmBwd(poplar::Graph &graph, const LstmParams &params,

                  poplar::program::Sequence &prog,

                  const LstmState &fwdStateInit,

                  const poplar::Tensor &fwdIntermediates,

                  const LstmWeights &weights, const poplar::Tensor &input,

                  const poplar::Tensor &output,

                  const poplar::Tensor &outputGrad,

                  const poplar::Tensor *lastCellStateGrad,

                  poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates,

                  const poplar::DebugContext &debugContext = {},

                  const poplar::OptionFlags &options = {},

                  poplin::PlanningCache *planningCache = nullptr);


LstmWeights lstmWU(poplar::Graph &graph, const LstmParams &params,

                   poplar::program::Sequence &prog,

                   const LstmState &fwdStateInit,

                   const poplar::Tensor &fwdIntermediates,

                   const poplar::Tensor &bwdIntermediates,

                   const LstmWeights &weights, const poplar::Tensor &input,

                   const poplar::Tensor &output,

                   const poplar::DebugContext &debugContext = {},

                   const poplar::OptionFlags &options = {},

                   poplin::PlanningCache *planningCache = nullptr);


LstmState lstmBwdWithWU(poplar::Graph &graph, const LstmParams &params,

                        poplar::program::Sequence &prog,

                        const LstmState &fwdStateInit,

                        const poplar::Tensor &fwdIntermediates,

                        const LstmWeights &weights, const poplar::Tensor &input,

                        const poplar::Tensor &output,

                        const poplar::Tensor &outputGrad,

                        const LstmState *lastStepStateGrad,

                        poplar::Tensor *inputGrad, LstmWeights &weightsGrad,

                        const poplar::DebugContext &debugContext = {},

                        const poplar::OptionFlags &options = {},

                        poplin::PlanningCache *planningCache = nullptr);


LstmState lstmBwdWithWU(poplar::Graph &graph, const LstmParams &params,

                        poplar::program::Sequence &prog,

                        const LstmState &fwdStateInit,

                        const poplar::Tensor &fwdIntermediates,

                        const LstmWeights &weights, const poplar::Tensor &input,

                        const poplar::Tensor &output,

                        const poplar::Tensor &outputGrad,

                        const poplar::Tensor *lastCellStateGrad,

                        poplar::Tensor *inputGrad, LstmWeights &weightsGrad,

                        const poplar::DebugContext &debugContext = {},

                        const poplar::OptionFlags &options = {},

                        poplin::PlanningCache *planningCache = nullptr);


} // namespace lstm

} // namespace popnn


#endif // popnn_Lstm_hpp

LstmDef.hpp
Definitions for LSTM cell operations.

popnn::lstm::lstmBwdWithWU
LstmState lstmBwdWithWU(poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, const LstmState *lastStepStateGrad, poplar::Tensor *inputGrad, LstmWeights &weightsGrad, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Run a combined LSTM backward and weight update pass.

popnn::lstm::createInitialOutput
poplar::Tensor createInitialOutput(poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Create the initial output that can be combined with the initial cell state using an LstmState.

popnn::lstm::zeroInitialState
void zeroInitialState(poplar::Graph &graph, const LstmState &initialState, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
Initialise the forward state of an LSTM with zeros.

popnn::lstm::lstmBwd
LstmState lstmBwd(poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, const LstmState *lastStepStateGrad, poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Run LSTM backward pass.

popnn::lstm::createInitialCellState
poplar::Tensor createInitialCellState(poplar::Graph &graph, const LstmParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Create the initial cell state that can be combined with the initial output using an LstmState.

popnn::lstm::lstmFwd
std::pair< poplar::Tensor, poplar::Tensor > lstmFwd(poplar::Graph &graph, const LstmParams &params, const LstmState &stateInit, const poplar::Tensor &in, const LstmWeights &weights, poplar::Tensor *intermediates, poplar::program::Sequence &fwdProg, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Calculate the result of applying an LSTM across a sequence.

popnn::lstm::lstmWU
LstmWeights lstmWU(poplar::Graph &graph, const LstmParams &params, poplar::program::Sequence &prog, const LstmState &fwdStateInit, const poplar::Tensor &fwdIntermediates, const poplar::Tensor &bwdIntermediates, const LstmWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Run a standalone weight update pass.

popnn::lstm::getDefaultBasicLstmCellOrder
const std::vector< BasicLstmCellUnit > getDefaultBasicLstmCellOrder()
Get the default order of the gates in a basic LSTM cell.

NonLinearityDef.hpp
Definitions for non-linearity operations.

Rnn.hpp
Functions for recurrent neural networks (RNN).

poplar::DebugContext
DebugContext gathers the common external parameters of the context of an operation.
Definition: DebugContext.hpp:221

poplar::Graph
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52

poplar::OptionFlags
A set of option/value string flags to be used in various APIs.
Definition: OptionFlags.hpp:24

poplar::Tensor
A reference to a subset of tensor elements.
Definition: Tensor.hpp:38

poplar::Type
Class representing device data types.
Definition: Type.hpp:42

poplar::program::Sequence
Program that executes a sequence of programs.
Definition: Program.hpp:77

popnn
Functions used in neural networks.
Definition: BatchNorm.hpp:14

popnn::NonLinearityType
NonLinearityType
Definition: NonLinearityDef.hpp:11

popnn::NonLinearityType::TANH
@ TANH
Hyperbolic tangent:

popnn::NonLinearityType::SIGMOID
@ SIGMOID
Sigmoid:

MatMul.hpp
Functions and data types for performing matrix multiplies on the IPU.

popnn::lstm::LstmParams
Structure representing the parameters of the LSTM.
Definition: Lstm.hpp:29

popnn::lstm::LstmParams::cellOrder
std::vector< BasicLstmCellUnit > cellOrder
The weights and biases for all of the layers being processed are concatenated in the outermost dimens...
Definition: Lstm.hpp:68

popnn::lstm::LstmParams::preserveFinalState
bool preserveFinalState
If this parameter is set to true then the LSTM will preserve the internal state at the last valid tim...
Definition: Lstm.hpp:62

popnn::lstm::LstmParams::dataType
poplar::Type dataType
The datatype of the LSTM.
Definition: Lstm.hpp:34

popnn::lstm::LstmParams::outputFullSequence
bool outputFullSequence
If true the Lstm function returns the entire sequence of outputs, otherwise it returns just the final...
Definition: Lstm.hpp:49

popnn::lstm::LstmParams::recurrentActivation
NonLinearityType recurrentActivation
Recurrent activation function.
Definition: Lstm.hpp:72

popnn::lstm::LstmParams::layerSizes
std::vector< std::size_t > layerSizes
The number of neurons before and after each layer of the LSTM.
Definition: Lstm.hpp:46

popnn::lstm::LstmParams::activation
NonLinearityType activation
Activation function.
Definition: Lstm.hpp:70

popnn::lstm::LstmParams::timeSteps
std::size_t timeSteps
The number of time steps in the sequence of the LSTM.
Definition: Lstm.hpp:40

popnn::lstm::LstmParams::calcInputGradients
bool calcInputGradients
If this parameter is set to false then the LSTM will skip the calculation of the gradients of the inp...
Definition: Lstm.hpp:55

popnn::lstm::LstmParams::batchSize
std::size_t batchSize
The batch size.
Definition: Lstm.hpp:37

popnn::lstm::LstmParams::doInputWeightCalc
bool doInputWeightCalc
If this parameter is set to false then the LSTM will skip the calculation of weighted inputs (only us...
Definition: Lstm.hpp:52

popnn::lstm::LstmState
Structure holding the state of a LSTM cell, or the gradients for the state (depending on the context)...
Definition: Lstm.hpp:90

popnn::lstm::LstmWeights
Structure holding all the parameters of an LSTM cell, or the deltas for those parameters (depending o...
Definition: Lstm.hpp:240

popnn::rnn::RnnParams
Structure of Recurrent Neural Network (RNN) parameters which allows for any customized implementation...
Definition: Rnn.hpp:22