latest/doxygen/Gru_8hpp_source.html

// Copyright (c) 2019 Graphcore Ltd. All rights reserved.

#ifndef popnn_Gru_hpp

#define popnn_Gru_hpp


#include <poplar/Tensor.hpp>

#include <poplin/MatMul.hpp>

#include <popnn/GruDef.hpp>

#include <popnn/NonLinearityDef.hpp>

#include <popnn/Rnn.hpp>


namespace popnn {

namespace gru {


const std::vector<BasicGruCellUnit> getDefaultBasicGruCellOrder();


struct GruParams {

  rnn::RnnParams rnn;


  // The datatype of the GRU.

  poplar::Type dataType;

  std::size_t batchSize;

  std::size_t timeSteps;

  std::vector<std::size_t> layerSizes;

  bool outputFullSequence = true;

  bool calcInputGradients = true;

  std::vector<BasicGruCellUnit> cellOrder = getDefaultBasicGruCellOrder();

  bool resetAfter = false;

  NonLinearityType activation = NonLinearityType::TANH;

  NonLinearityType recurrentActivation = NonLinearityType::SIGMOID;


  GruParams(poplar::Type dataType, std::size_t batchSize, std::size_t timeSteps,

            std::vector<std::size_t> layerSizes,

            NonLinearityType activation = NonLinearityType::TANH,

            NonLinearityType recurrentActivation = NonLinearityType::SIGMOID);


  GruParams(poplar::Type dataType, std::size_t batchSize,

            std::size_t maxTimeSteps, const poplar::Tensor &timeSteps,

            std::vector<std::size_t> layerSizes,

            NonLinearityType activation = NonLinearityType::TANH,

            NonLinearityType recurrentActivation = NonLinearityType::SIGMOID);


  GruParams(const GruParams &other);

};


uint64_t getBasicGruCellFwdFlops(const GruParams &params);


uint64_t getBasicGruCellBwdFlops(const GruParams &params);


uint64_t getBasicGruCellWuFlops(const GruParams &params);


poplar::Tensor createInput(poplar::Graph &graph, const GruParams &params,

                           const poplar::DebugContext &debugContext,

                           const poplar::OptionFlags &options = {},

                           poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor createInitialState(poplar::Graph &graph, const GruParams &params,

                                  const poplar::DebugContext &debugContext,

                                  const poplar::OptionFlags &options,

                                  poplin::PlanningCache *cache);

struct GruWeights {

  poplar::Tensor inputWeights;

  poplar::Tensor outputWeights;

  poplar::Tensor biases;

};


std::pair<poplar::Tensor, poplar::Tensor>

createWeightsKernel(poplar::Graph &graph, const GruParams &params,

                    const poplar::DebugContext &debugContext,

                    const poplar::OptionFlags &options = {},

                    poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor

createWeightsBiases(poplar::Graph &graph, const GruParams &params,

                    const poplar::DebugContext &debugContext,

                    const poplar::OptionFlags &options = {},

                    poplin::PlanningCache *planningCache = nullptr);


GruWeights createWeights(poplar::Graph &graph, const GruParams &params,

                         const poplar::DebugContext &debugContext,

                         const poplar::OptionFlags &options = {},

                         poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor createAttention(poplar::Graph &graph, const GruParams &params,

                               const poplar::DebugContext &debugContext,

                               const poplar::OptionFlags &options = {});


poplar::Tensor gruFwd(poplar::Graph &graph, const GruParams &params,

                      const poplar::Tensor &stateInit, const poplar::Tensor &in,

                      const GruWeights &weights, poplar::Tensor *intermediates,

                      poplar::program::Sequence &fwdProg,

                      const poplar::DebugContext &debugContext = {},

                      const poplar::OptionFlags &options = {},

                      poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor gruFwd(poplar::Graph &graph, const GruParams &params,

                      const poplar::Tensor &stateInit, const poplar::Tensor &in,

                      const poplar::Tensor &realTimeSteps,

                      const GruWeights &weights, poplar::Tensor *intermediates,

                      poplar::program::Sequence &fwdProg,

                      const poplar::DebugContext &debugContext = {},

                      const poplar::OptionFlags &options = {},

                      poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor auGruFwd(poplar::Graph &graph, const GruParams &params,

                        const poplar::Tensor &stateInit,

                        const poplar::Tensor &in, const GruWeights &weights,

                        poplar::Tensor *intermediates,

                        const poplar::Tensor &attScores,

                        poplar::program::Sequence &fwdProg,

                        const poplar::DebugContext &debugContext = {},

                        const poplar::OptionFlags &options = {},

                        poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor

auGruFwd(poplar::Graph &graph, const GruParams &params,

         const poplar::Tensor &stateInit, const poplar::Tensor &in,

         const poplar::Tensor &realTimeSteps, const GruWeights &weights,

         poplar::Tensor *intermediates, const poplar::Tensor &attScores,

         poplar::program::Sequence &fwdProg,

         const poplar::DebugContext &debugContext = {},

         const poplar::OptionFlags &options = {},

         poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor gruBwd(

    poplar::Graph &graph, const GruParams &params,

    poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

    const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights,

    const poplar::Tensor &fwdInputSeq, const poplar::Tensor &fwdOutput,

    const poplar::Tensor &gradLayerNext, poplar::Tensor *inputGrad,

    poplar::Tensor *bwdIntermediates, const poplar::DebugContext &debugContext,

    const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache);


poplar::Tensor

gruBwd(poplar::Graph &graph, const GruParams &params,

       poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

       const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights,

       const poplar::Tensor &fwdInputSeq, const poplar::Tensor &realTimeSteps,

       const poplar::Tensor &fwdOutput, const poplar::Tensor &gradLayerNext,

       poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates,

       const poplar::DebugContext &debugContext,

       const poplar::OptionFlags &options_,

       poplin::PlanningCache *planningCache);


poplar::Tensor auGruBwd(

    poplar::Graph &graph, const GruParams &params,

    poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

    const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights,

    const poplar::Tensor &fwdInputSeq, const poplar::Tensor &fwdOutput,

    const poplar::Tensor &gradLayerNext, poplar::Tensor *inputGrad,

    poplar::Tensor *bwdIntermediates, const poplar::Tensor &attentions,

    poplar::Tensor *attentionsGrad, const poplar::DebugContext &debugContext,

    const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache);


poplar::Tensor

auGruBwd(poplar::Graph &graph, const GruParams &params,

         poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

         const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights,

         const poplar::Tensor &fwdInputSeq, const poplar::Tensor &realTimeSteps,

         const poplar::Tensor &fwdOutput, const poplar::Tensor &gradLayerNext,

         poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates,

         const poplar::Tensor &attentions, poplar::Tensor *attentionsGrad,

         const poplar::DebugContext &debugContext,

         const poplar::OptionFlags &options_,

         poplin::PlanningCache *planningCache);


GruWeights gruWU(poplar::Graph &graph, const GruParams &params,

                 poplar::program::Sequence &prog,

                 const poplar::Tensor &fwdOutputInit,

                 const poplar::Tensor &fwdIntermediates,

                 const poplar::Tensor &bwdIntermediates,

                 const GruWeights &weights, const poplar::Tensor &input,

                 const poplar::Tensor &output,

                 const poplar::DebugContext &debugContext,

                 const poplar::OptionFlags &options_,

                 poplin::PlanningCache *planningCache);


GruWeights auGruWU(poplar::Graph &graph, const GruParams &params,

                   poplar::program::Sequence &prog,

                   const poplar::Tensor &fwdOutputInit,

                   const poplar::Tensor &fwdIntermediates,

                   const poplar::Tensor &bwdIntermediates,

                   const GruWeights &weights, const poplar::Tensor &input,

                   const poplar::Tensor &output,

                   const poplar::DebugContext &debugContext,

                   const poplar::OptionFlags &options_,

                   poplin::PlanningCache *planningCache);


poplar::Tensor gruBwdWithWU(

    poplar::Graph &graph, const GruParams &params,

    poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

    const poplar::Tensor &fwdIntermediates, const GruWeights &weights,

    const poplar::Tensor &input, const poplar::Tensor &output,

    const poplar::Tensor &outputGrad, poplar::Tensor *inputGrad,

    GruWeights &weightsGrad, const poplar::DebugContext &debugContext,

    const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache);


poplar::Tensor gruBwdWithWU(

    poplar::Graph &graph, const GruParams &params,

    poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

    const poplar::Tensor &fwdIntermediates, const GruWeights &weights,

    const poplar::Tensor &input, const poplar::Tensor &realTimeSteps,

    const poplar::Tensor &output, const poplar::Tensor &outputGrad,

    poplar::Tensor *inputGrad, GruWeights &weightsGrad,

    const poplar::DebugContext &debugContext,

    const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache);


poplar::Tensor auGruBwdWithWU(

    poplar::Graph &graph, const GruParams &params,

    poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

    const poplar::Tensor &fwdIntermediates, const GruWeights &weights,

    const poplar::Tensor &input, const poplar::Tensor &output,

    const poplar::Tensor &outputGrad, poplar::Tensor *inputGrad,

    GruWeights &weightsGrad, const poplar::Tensor &attentions,

    poplar::Tensor *attentionsGrad, const poplar::DebugContext &debugContext,

    const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache);


poplar::Tensor auGruBwdWithWU(

    poplar::Graph &graph, const GruParams &params,

    poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit,

    const poplar::Tensor &fwdIntermediates, const GruWeights &weights,

    const poplar::Tensor &input, const poplar::Tensor &realTimeSteps,

    const poplar::Tensor &output, const poplar::Tensor &outputGrad,

    poplar::Tensor *inputGrad, GruWeights &weightsGrad,

    const poplar::Tensor &attentions, poplar::Tensor *attentionsGrad,

    const poplar::DebugContext &debugContext,

    const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache);


} // namespace gru

} // namespace popnn


#endif // popnn_Gru_hpp

GruDef.hpp
Definitions for GRU cell operations.

popnn::gru::auGruBwd
poplar::Tensor auGruBwd(poplar::Graph &graph, const GruParams &params, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights, const poplar::Tensor &fwdInputSeq, const poplar::Tensor &fwdOutput, const poplar::Tensor &gradLayerNext, poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates, const poplar::Tensor &attentions, poplar::Tensor *attentionsGrad, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run AUGRU backward pass.

popnn::gru::createWeightsKernel
std::pair< poplar::Tensor, poplar::Tensor > createWeightsKernel(poplar::Graph &graph, const GruParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Create the weights kernel used to weight the input and output of a GRU.

popnn::gru::gruBwdWithWU
poplar::Tensor gruBwdWithWU(poplar::Graph &graph, const GruParams &params, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, poplar::Tensor *inputGrad, GruWeights &weightsGrad, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a combined GRU backward and weight update pass.

popnn::gru::getDefaultBasicGruCellOrder
const std::vector< BasicGruCellUnit > getDefaultBasicGruCellOrder()
Get the default order of the gates in a basic GRU cell.

popnn::gru::gruWU
GruWeights gruWU(poplar::Graph &graph, const GruParams &params, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const poplar::Tensor &bwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a standalone weight update pass.

popnn::gru::gruFwd
poplar::Tensor gruFwd(poplar::Graph &graph, const GruParams &params, const poplar::Tensor &stateInit, const poplar::Tensor &in, const GruWeights &weights, poplar::Tensor *intermediates, poplar::program::Sequence &fwdProg, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Calculate the result of applying a GRU across a sequence.

popnn::gru::gruBwd
poplar::Tensor gruBwd(poplar::Graph &graph, const GruParams &params, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights, const poplar::Tensor &fwdInputSeq, const poplar::Tensor &fwdOutput, const poplar::Tensor &gradLayerNext, poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run GRU backward pass.

popnn::gru::auGruWU
GruWeights auGruWU(poplar::Graph &graph, const GruParams &params, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const poplar::Tensor &bwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a standalone weight update pass.

popnn::gru::auGruFwd
poplar::Tensor auGruFwd(poplar::Graph &graph, const GruParams &params, const poplar::Tensor &stateInit, const poplar::Tensor &in, const GruWeights &weights, poplar::Tensor *intermediates, const poplar::Tensor &attScores, poplar::program::Sequence &fwdProg, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Calculate the result of applying an AUGRU across a sequence.

popnn::gru::createWeightsBiases
poplar::Tensor createWeightsBiases(poplar::Graph &graph, const GruParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Create the weights biases.

popnn::gru::auGruBwdWithWU
poplar::Tensor auGruBwdWithWU(poplar::Graph &graph, const GruParams &params, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, poplar::Tensor *inputGrad, GruWeights &weightsGrad, const poplar::Tensor &attentions, poplar::Tensor *attentionsGrad, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a combined AUGRU backward and weight update pass.

popnn::gru::createAttention
poplar::Tensor createAttention(poplar::Graph &graph, const GruParams &params, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={})
Create an attention tensor for AUGRU.

NonLinearityDef.hpp
Definitions for non-linearity operations.

Rnn.hpp
Functions for recurrent neural networks (RNN).

poplar::DebugContext
DebugContext gathers the common external parameters of the context of an operation.
Definition: DebugContext.hpp:221

poplar::Graph
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52

poplar::OptionFlags
A set of option/value string flags to be used in various APIs.
Definition: OptionFlags.hpp:24

poplar::Tensor
A reference to a subset of tensor elements.
Definition: Tensor.hpp:38

poplar::Type
Class representing device data types.
Definition: Type.hpp:42

poplar::program::Sequence
Program that executes a sequence of programs.
Definition: Program.hpp:77

popnn
Functions used in neural networks.
Definition: BatchNorm.hpp:14

popnn::NonLinearityType
NonLinearityType
Definition: NonLinearityDef.hpp:11

popnn::NonLinearityType::TANH
@ TANH
Hyperbolic tangent:

popnn::NonLinearityType::SIGMOID
@ SIGMOID
Sigmoid:

MatMul.hpp
Functions and data types for performing matrix multiplies on the IPU.

popnn::gru::GruParams
Structure representing the parameters of the GRU.
Definition: Gru.hpp:29

popnn::gru::GruParams::layerSizes
std::vector< std::size_t > layerSizes
The number of neurons for the input and output layer.
Definition: Gru.hpp:43

popnn::gru::GruParams::cellOrder
std::vector< BasicGruCellUnit > cellOrder
The weights and biases for all of the layers being processed are concatenated in the outermost dimens...
Definition: Gru.hpp:55

popnn::gru::GruParams::batchSize
std::size_t batchSize
The batch size.
Definition: Gru.hpp:37

popnn::gru::GruParams::outputFullSequence
bool outputFullSequence
If true the GRU function returns the entire sequence of outputs, otherwise it returns just the final ...
Definition: Gru.hpp:46

popnn::gru::GruParams::activation
NonLinearityType activation
Activation function.
Definition: Gru.hpp:60

popnn::gru::GruParams::recurrentActivation
NonLinearityType recurrentActivation
Recurrent activation function.
Definition: Gru.hpp:62

popnn::gru::GruParams::dataType
poplar::Type dataType
Definition: Gru.hpp:34

popnn::gru::GruParams::timeSteps
std::size_t timeSteps
The number of time steps in the sequence of the GRU.
Definition: Gru.hpp:40

popnn::gru::GruParams::resetAfter
bool resetAfter
Controls whether the reset gate is applied before or after the candidate weights and biases.
Definition: Gru.hpp:58

popnn::gru::GruParams::calcInputGradients
bool calcInputGradients
If this parameter is set to false then the GRU will skip the calculation of the gradients of the inpu...
Definition: Gru.hpp:49

popnn::gru::GruWeights
Structure holding all the parameters of a GRU cell, or the deltas for those parameters (depending on ...
Definition: Gru.hpp:125

popnn::rnn::RnnParams
Structure of Recurrent Neural Network (RNN) parameters which allows for any customized implementation...
Definition: Rnn.hpp:22