latest/doxygen/Recurrent_8hpp_source.html

// Copyright (c) 2017 Graphcore Ltd. All rights reserved.

#ifndef popnn_Recurrent_hpp

#define popnn_Recurrent_hpp


/*

 *  Vanilla RNN layer implementation:

 *

 *       ------             ----         ----------------

 * x -->| Wff |----------->| + |------->| Non linearity |----------> y

 *      ------             ----         ----------------        |

 *                          /\                                 |

 *                          |        ------                   |

 *                          --------| Wfb |<------------------

 *                                   -----

 *

 *

 *  In general, the RNN can be run over a set of sequence steps. The

 *  multiplication with Wff can be done in parallel for any subset or even the

 *  full set of sequence steps. The recurrent part must be done a step at a

 *  time.

 *

 *  In the code below:

 *  Wff is named weightsInput

 *  Wfb is named weightsFeedback

 */


#include <poplar/Graph.hpp>

#include <poplar/Program.hpp>

#include <popnn/NonLinearity.hpp>


namespace poplin {


class PlanningCache;


} // namespace poplin


namespace popnn {

namespace rnn {


std::vector<std::pair<poplin::MatMulParams, poplar::OptionFlags>>

getMatMulPrePlanParameters(std::size_t numSteps, std::size_t batchSize,

                           std::size_t inputSize, std::size_t outputSize,

                           const poplar::Type &dType,

                           const poplar::Type &partialsType = poplar::FLOAT,

                           bool inferenceOnly = false,

                           bool hasFeedforwardWeights = true);


uint64_t getFwdFlops(unsigned sequenceSize, unsigned batchSize,

                     unsigned inputSize, unsigned outputSize,

                     bool weightInput = true);

uint64_t getBwdFlops(unsigned sequenceSize, unsigned batchSize,

                     unsigned inputSize, unsigned outputSize,

                     bool calcInputGrad = true);

uint64_t getWuFlops(unsigned sequenceSize, unsigned batchSize,

                    unsigned inputSize, unsigned outputSize);


poplar::Tensor createInput(poplar::Graph &graph, unsigned numSteps,

                           unsigned batchSize, unsigned inputSize,

                           unsigned outputSize, const poplar::Type &dType,

                           const poplar::Type &partialsType = poplar::FLOAT,

                           bool inferenceOnly = false,

                           const poplar::DebugContext &debugContext = {},

                           poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor createFwdState(poplar::Graph &graph, const poplar::Type &dType,

                              unsigned batchSize, unsigned outputSize,

                              poplar::program::Sequence &prog, bool initState,

                              bool inferenceOnly,

                              const poplar::DebugContext &debugContext = {},

                              poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor getOutputFromFwdState(const poplar::Tensor &fwdState);


poplar::Tensor createWeightsInput(

    poplar::Graph &graph, unsigned sequenceSize, unsigned batchSize,

    unsigned inputSize, unsigned outputSize, const poplar::Type &dType,

    const poplar::Type &partialsType = poplar::FLOAT,

    bool inferenceOnly = false, const poplar::DebugContext &debugContext = {},

    poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor createWeightsFeedback(

    poplar::Graph &graph, unsigned batchSize, unsigned outputSize,

    const poplar::Type &dType, const poplar::Type &partialsType = poplar::FLOAT,

    bool inferenceOnly = false, const poplar::DebugContext &debugContext = {},

    poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor forwardWeightInput(

    poplar::Graph &graph, const poplar::Tensor &actIn,

    const poplar::Tensor &weights, poplar::program::Sequence &prog,

    const poplar::Type &partialsType = poplar::FLOAT,

    bool inferenceOnly = false, const poplar::DebugContext &debugContext = {},

    poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor forwardIterate(

    poplar::Graph &graph, const poplar::Tensor &feedFwdIn,

    const poplar::Tensor &initState, const poplar::Tensor &feedbackWeights,

    const poplar::Tensor &biases, poplar::program::Sequence &prog,

    popnn::NonLinearityType nonLinearityType,

    const poplar::Type &partialsType = poplar::FLOAT,

    bool inferenceOnly = false, const poplar::DebugContext &debugContext = {},

    poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor createBwdState(poplar::Graph &graph, const poplar::Type &dType,

                              unsigned batchSize, unsigned outputSize,

                              poplar::program::Sequence &prog,

                              const poplar::DebugContext &debugContext = {},

                              poplin::PlanningCache *planningCache = nullptr);


/*       ------        ----           -----

 *  <---| Wfb |<------| + |<---------| NL |<------- (bwd:gradientOut

 *      ------        ----           -----               for final step)

 *                      | (bwd:gradientOut)

 *                     \|/

 *                   -----

 *                  | Wff |

 *                  ------

 *                     |

 *  Wfb are the feedback weights

 *  Wff are the input weights

 */

std::pair<poplar::Tensor, poplar::Tensor> backwardGradientStep(

    poplar::Graph &graph, const poplar::Tensor &nextLayerGrad,

    const poplar::Tensor &bwdState, const poplar::Tensor &actOut,

    const poplar::Tensor &weightsInput, const poplar::Tensor &weightsFeedback,

    poplar::program::Sequence &prog, popnn::NonLinearityType nonLinearityType,

    const poplar::Type &partialsType = poplar::FLOAT,

    const poplar::DebugContext &debugContext = {},

    poplin::PlanningCache *planningCache = nullptr);


// clang-format off

// To handle long lines in Doxygen for auto-linking of overloaded functions.

// clang-format on

poplar::Tensor backwardGradientStep(

    poplar::Graph &graph, const poplar::Tensor &nextLayerGrad,

    const poplar::Tensor &bwdState, const poplar::Tensor &actOut,

    const poplar::Tensor &weightsFeedback, poplar::program::Sequence &prog,

    popnn::NonLinearityType nonLinearityType,

    const poplar::Type &partialsType = poplar::FLOAT,

    const poplar::DebugContext &debugContext = {},

    poplin::PlanningCache *planningCache = nullptr);


void paramDeltaUpdate(poplar::Graph &graph, const poplar::Tensor &bwdState,

                      const poplar::Tensor &actIn,

                      const poplar::Tensor &prevOut,

                      poplar::Tensor &weightsInputDeltasAcc,

                      poplar::Tensor &weightsFeedbackDeltasAcc,

                      poplar::Tensor &biasDeltasAcc,

                      poplar::program::Sequence &prog,

                      const poplar::Type &partialsType = poplar::FLOAT,

                      const poplar::DebugContext &debugContext = {},

                      poplin::PlanningCache *planningCache = nullptr);


poplar::Tensor rnnFwdSequence(

    poplar::Graph &graph, poplar::program::Sequence &prog,

    const poplar::Tensor &fwdStateInit, const poplar::Tensor *weightedIn,

    const poplar::Tensor &biases, const poplar::Tensor &feedFwdWeights,

    const poplar::Tensor &feedbackWeights, const poplar::Tensor &prevLayerActs,

    const popnn::NonLinearityType &nonLinearityType,

    const poplar::Type &partialsType, bool inferenceOnly,

    const poplar::DebugContext &debugContext = {},

    poplin::PlanningCache *planningCache = nullptr);


std::tuple<poplar::Tensor, poplar::Tensor, poplar::Tensor, poplar::Tensor>

rnnBwdSequence(poplar::Graph &graph, bool doWU, bool ignoreInputGradientCalc,

               poplar::program::Sequence &prog,

               const poplar::Tensor &fwdStateInit,

               const poplar::Tensor &fwdState, const poplar::Tensor &biases,

               const poplar::Tensor &feedFwdWeights,

               const poplar::Tensor &feedbackWeights,

               const poplar::Tensor &outGradient, const poplar::Tensor &actIn,

               const popnn::NonLinearityType &nonLinearityType,

               const poplar::Type &partialsType,

               const poplar::DebugContext &debugContext = {},

               poplin::PlanningCache *planningCache = nullptr);


} // namespace rnn

} // namespace popnn


#endif // popnn_Recurrent_hpp

NonLinearity.hpp
Non-linearity operations.

poplar::DebugContext
DebugContext gathers the common external parameters of the context of an operation.
Definition: DebugContext.hpp:221

poplar::Graph
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52

poplar::Tensor
A reference to a subset of tensor elements.
Definition: Tensor.hpp:38

poplar::Type
Class representing device data types.
Definition: Type.hpp:42

poplar::program::Sequence
Program that executes a sequence of programs.
Definition: Program.hpp:77

poplar::FLOAT
Type FLOAT
Device type: float

poplin
Linear algebra functions.
Definition: Cholesky.hpp:14

popnn::rnn::forwardWeightInput
poplar::Tensor forwardWeightInput(poplar::Graph &graph, const poplar::Tensor &actIn, const poplar::Tensor &weights, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, bool inferenceOnly=false, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Perform the feedforward part of a RNN layer.

popnn::rnn::getOutputFromFwdState
poplar::Tensor getOutputFromFwdState(const poplar::Tensor &fwdState)
Extract previous output tensor from the hidden state.

popnn::rnn::createWeightsInput
poplar::Tensor createWeightsInput(poplar::Graph &graph, unsigned sequenceSize, unsigned batchSize, unsigned inputSize, unsigned outputSize, const poplar::Type &dType, const poplar::Type &partialsType=poplar::FLOAT, bool inferenceOnly=false, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Create the weights used to weight the input of a vanilla RNN layer.

popnn::rnn::getMatMulPrePlanParameters
std::vector< std::pair< poplin::MatMulParams, poplar::OptionFlags > > getMatMulPrePlanParameters(std::size_t numSteps, std::size_t batchSize, std::size_t inputSize, std::size_t outputSize, const poplar::Type &dType, const poplar::Type &partialsType=poplar::FLOAT, bool inferenceOnly=false, bool hasFeedforwardWeights=true)
Predict what matrix multiplications will be needed for the given parameters and return list of corres...

popnn::rnn::createWeightsFeedback
poplar::Tensor createWeightsFeedback(poplar::Graph &graph, unsigned batchSize, unsigned outputSize, const poplar::Type &dType, const poplar::Type &partialsType=poplar::FLOAT, bool inferenceOnly=false, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Create the weights used in the recurrent part of a vanilla RNN layer.

popnn::rnn::rnnFwdSequence
poplar::Tensor rnnFwdSequence(poplar::Graph &graph, poplar::program::Sequence &prog, const poplar::Tensor &fwdStateInit, const poplar::Tensor *weightedIn, const poplar::Tensor &biases, const poplar::Tensor &feedFwdWeights, const poplar::Tensor &feedbackWeights, const poplar::Tensor &prevLayerActs, const popnn::NonLinearityType &nonLinearityType, const poplar::Type &partialsType, bool inferenceOnly, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Perform the forward part of the RNN layer.

popnn::rnn::paramDeltaUpdate
void paramDeltaUpdate(poplar::Graph &graph, const poplar::Tensor &bwdState, const poplar::Tensor &actIn, const poplar::Tensor &prevOut, poplar::Tensor &weightsInputDeltasAcc, poplar::Tensor &weightsFeedbackDeltasAcc, poplar::Tensor &biasDeltasAcc, poplar::program::Sequence &prog, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Update parameter deltas for a vanilla RNN step.

popnn::rnn::createBwdState
poplar::Tensor createBwdState(poplar::Graph &graph, const poplar::Type &dType, unsigned batchSize, unsigned outputSize, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Create initial state for backward pass of a vanilla RNN.

popnn::rnn::getBwdFlops
uint64_t getBwdFlops(unsigned sequenceSize, unsigned batchSize, unsigned inputSize, unsigned outputSize, bool calcInputGrad=true)
Compute the total floating point operations for the backward pass of RNN.

popnn::rnn::createFwdState
poplar::Tensor createFwdState(poplar::Graph &graph, const poplar::Type &dType, unsigned batchSize, unsigned outputSize, poplar::program::Sequence &prog, bool initState, bool inferenceOnly, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Create initial state for a vanilla RNN.

popnn::rnn::getFwdFlops
uint64_t getFwdFlops(unsigned sequenceSize, unsigned batchSize, unsigned inputSize, unsigned outputSize, bool weightInput=true)
Compute the total floating point operations for the forward pass of RNN.

popnn::rnn::backwardGradientStep
std::pair< poplar::Tensor, poplar::Tensor > backwardGradientStep(poplar::Graph &graph, const poplar::Tensor &nextLayerGrad, const poplar::Tensor &bwdState, const poplar::Tensor &actOut, const poplar::Tensor &weightsInput, const poplar::Tensor &weightsFeedback, poplar::program::Sequence &prog, popnn::NonLinearityType nonLinearityType, const poplar::Type &partialsType=poplar::FLOAT, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Compute a single step of the backward pass of a vanilla RNN layer.

popnn::rnn::getWuFlops
uint64_t getWuFlops(unsigned sequenceSize, unsigned batchSize, unsigned inputSize, unsigned outputSize)
Compute the total floating point operations for the weight update pass of RNN.

popnn::rnn::rnnBwdSequence
std::tuple< poplar::Tensor, poplar::Tensor, poplar::Tensor, poplar::Tensor > rnnBwdSequence(poplar::Graph &graph, bool doWU, bool ignoreInputGradientCalc, poplar::program::Sequence &prog, const poplar::Tensor &fwdStateInit, const poplar::Tensor &fwdState, const poplar::Tensor &biases, const poplar::Tensor &feedFwdWeights, const poplar::Tensor &feedbackWeights, const poplar::Tensor &outGradient, const poplar::Tensor &actIn, const popnn::NonLinearityType &nonLinearityType, const poplar::Type &partialsType, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Perform the feedback part of the RNN layer.

popnn::rnn::createInput
poplar::Tensor createInput(poplar::Graph &graph, unsigned numSteps, unsigned batchSize, unsigned inputSize, unsigned outputSize, const poplar::Type &dType, const poplar::Type &partialsType=poplar::FLOAT, bool inferenceOnly=false, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Create a tensor which is input to a vanilla RNN.

popnn::rnn::forwardIterate
poplar::Tensor forwardIterate(poplar::Graph &graph, const poplar::Tensor &feedFwdIn, const poplar::Tensor &initState, const poplar::Tensor &feedbackWeights, const poplar::Tensor &biases, poplar::program::Sequence &prog, popnn::NonLinearityType nonLinearityType, const poplar::Type &partialsType=poplar::FLOAT, bool inferenceOnly=false, const poplar::DebugContext &debugContext={}, poplin::PlanningCache *planningCache=nullptr)
Perform the feedback part of the RNN layer.

popnn
Functions used in neural networks.
Definition: BatchNorm.hpp:14

popnn::NonLinearityType
NonLinearityType
Definition: NonLinearityDef.hpp:11