11#include <poplar/Tensor.hpp>
78uint64_t getBasicGruCellFwdFlops(
const GruParams ¶ms);
80uint64_t getBasicGruCellBwdFlops(
const GruParams ¶ms);
82uint64_t getBasicGruCellWuFlops(
const GruParams ¶ms);
115 poplin::PlanningCache *planningCache =
nullptr);
120 poplin::PlanningCache *cache);
134std::pair<poplar::Tensor, poplar::Tensor>
138 poplin::PlanningCache *planningCache =
nullptr);
146 poplin::PlanningCache *planningCache =
nullptr);
154 poplin::PlanningCache *planningCache =
nullptr);
213 poplin::PlanningCache *planningCache =
nullptr);
271 poplin::PlanningCache *planningCache =
nullptr);
327 poplin::PlanningCache *planningCache =
nullptr);
387 poplin::PlanningCache *planningCache =
nullptr);
488 poplin::PlanningCache *planningCache);
595 poplin::PlanningCache *planningCache);
634 poplin::PlanningCache *planningCache);
673 poplin::PlanningCache *planningCache);
Definitions for GRU cell operations.
poplar::Tensor auGruBwd(poplar::Graph &graph, const GruParams ¶ms, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights, const poplar::Tensor &fwdInputSeq, const poplar::Tensor &fwdOutput, const poplar::Tensor &gradLayerNext, poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates, const poplar::Tensor &attentions, poplar::Tensor *attentionsGrad, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run AUGRU backward pass.
std::pair< poplar::Tensor, poplar::Tensor > createWeightsKernel(poplar::Graph &graph, const GruParams ¶ms, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Create the weights kernel used to weight the input and output of a GRU.
poplar::Tensor gruBwdWithWU(poplar::Graph &graph, const GruParams ¶ms, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, poplar::Tensor *inputGrad, GruWeights &weightsGrad, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a combined GRU backward and weight update pass.
const std::vector< BasicGruCellUnit > getDefaultBasicGruCellOrder()
Get the default order of the gates in a basic GRU cell.
GruWeights gruWU(poplar::Graph &graph, const GruParams ¶ms, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const poplar::Tensor &bwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a standalone weight update pass.
poplar::Tensor gruFwd(poplar::Graph &graph, const GruParams ¶ms, const poplar::Tensor &stateInit, const poplar::Tensor &in, const GruWeights &weights, poplar::Tensor *intermediates, poplar::program::Sequence &fwdProg, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Calculate the result of applying a GRU across a sequence.
poplar::Tensor gruBwd(poplar::Graph &graph, const GruParams ¶ms, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediatesSeq, const GruWeights &weights, const poplar::Tensor &fwdInputSeq, const poplar::Tensor &fwdOutput, const poplar::Tensor &gradLayerNext, poplar::Tensor *inputGrad, poplar::Tensor *bwdIntermediates, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run GRU backward pass.
GruWeights auGruWU(poplar::Graph &graph, const GruParams ¶ms, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const poplar::Tensor &bwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a standalone weight update pass.
poplar::Tensor auGruFwd(poplar::Graph &graph, const GruParams ¶ms, const poplar::Tensor &stateInit, const poplar::Tensor &in, const GruWeights &weights, poplar::Tensor *intermediates, const poplar::Tensor &attScores, poplar::program::Sequence &fwdProg, const poplar::DebugContext &debugContext={}, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Calculate the result of applying an AUGRU across a sequence.
poplar::Tensor createWeightsBiases(poplar::Graph &graph, const GruParams ¶ms, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={}, poplin::PlanningCache *planningCache=nullptr)
Create the weights biases.
poplar::Tensor auGruBwdWithWU(poplar::Graph &graph, const GruParams ¶ms, poplar::program::Sequence &prog, const poplar::Tensor &fwdOutputInit, const poplar::Tensor &fwdIntermediates, const GruWeights &weights, const poplar::Tensor &input, const poplar::Tensor &output, const poplar::Tensor &outputGrad, poplar::Tensor *inputGrad, GruWeights &weightsGrad, const poplar::Tensor &attentions, poplar::Tensor *attentionsGrad, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options_, poplin::PlanningCache *planningCache)
Run a combined AUGRU backward and weight update pass.
poplar::Tensor createAttention(poplar::Graph &graph, const GruParams ¶ms, const poplar::DebugContext &debugContext, const poplar::OptionFlags &options={})
Create an attention tensor for AUGRU.
Definitions for non-linearity operations.
Functions for recurrent neural networks (RNN).
DebugContext gathers the common external parameters of the context of an operation.
Definition: DebugContext.hpp:221
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52
A set of option/value string flags to be used in various APIs.
Definition: OptionFlags.hpp:24
A reference to a subset of tensor elements.
Definition: Tensor.hpp:38
Class representing device data types.
Definition: Type.hpp:42
Program that executes a sequence of programs.
Definition: Program.hpp:77
Functions used in neural networks.
Definition: BatchNorm.hpp:14
NonLinearityType
Definition: NonLinearityDef.hpp:11
@ TANH
Hyperbolic tangent:
Functions and data types for performing matrix multiplies on the IPU.
Structure representing the parameters of the GRU.
Definition: Gru.hpp:29
std::vector< std::size_t > layerSizes
The number of neurons for the input and output layer.
Definition: Gru.hpp:43
std::vector< BasicGruCellUnit > cellOrder
The weights and biases for all of the layers being processed are concatenated in the outermost dimens...
Definition: Gru.hpp:55
std::size_t batchSize
The batch size.
Definition: Gru.hpp:37
bool outputFullSequence
If true the GRU function returns the entire sequence of outputs, otherwise it returns just the final ...
Definition: Gru.hpp:46
NonLinearityType activation
Activation function.
Definition: Gru.hpp:60
NonLinearityType recurrentActivation
Recurrent activation function.
Definition: Gru.hpp:62
poplar::Type dataType
Definition: Gru.hpp:34
std::size_t timeSteps
The number of time steps in the sequence of the GRU.
Definition: Gru.hpp:40
bool resetAfter
Controls whether the reset gate is applied before or after the candidate weights and biases.
Definition: Gru.hpp:58
bool calcInputGradients
If this parameter is set to false then the GRU will skip the calculation of the gradients of the inpu...
Definition: Gru.hpp:49
Structure holding all the parameters of a GRU cell, or the deltas for those parameters (depending on ...
Definition: Gru.hpp:125
Structure of Recurrent Neural Network (RNN) parameters which allows for any customized implementation...
Definition: Rnn.hpp:22