9#ifndef popops_Rearrange_hpp 
   10#define popops_Rearrange_hpp 
   12#include <poplar/Graph.hpp> 
   13#include <poplar/Type.hpp> 
   17#include <gccs/CompilerFeatures.hpp> 
   38                         unsigned numRows, 
unsigned numColumns,
 
   39                         unsigned numTranspositions);
 
   66    const poplar::Graph::TileToTensorMapping &mapping,
 
   67    std::function<std::pair<const poplar::Tensor, const poplar::Tensor>(
size_t)>
 
  165                             std::vector<poplar::program::Copy> &copies,
 
  236                    std::size_t preferredGrouping,
 
poplar::Tensor partialTranspose(poplar::Graph &graph, const poplar::Tensor &in, const poplar::ComputeSet &cs, const poplar::DebugContext &debugContext={})
Transpose the innermost pair of dimensions of the specified tensor, writing the results to a new tens...
 
unsigned getMinimumRegroupGrainSize(const poplar::Type &type)
Get the smallest grouping we can transpose between for the given type using fast transposition codele...
 
poplar::Tensor regroupIfPossible(poplar::Graph &graph, const poplar::Tensor &t, poplar::program::Sequence &prog, const poputil::GroupingInfo &to, const poplar::DebugContext &debugContext={})
Insert copies or other operations into the given program to transform the grouping found on the given...
 
poplar::Tensor regroupIfBeneficial(poplar::Graph &graph, const poplar::Tensor &in, const poplar::Tensor &ref, poplar::program::Sequence &prog, const poplar::DebugContext &debugContext={})
If possible and runtime efficient, add an operation to rearrange the given tensor in memory such that...
 
poplar::Tensor regroupTensor(poplar::Graph &graph, const poplar::Tensor &t, poplar::program::Sequence &copies, const poplar::ComputeSet &transposeCS, const poputil::GroupingInfo &from, const poputil::GroupingInfo &to, const poplar::DebugContext &debugContext={})
Insert copies or other operations into the given programs/compute sets to transform the grouping foun...
 
bool canUseFastTranspose(const poplar::Target &target, const poplar::Type &type, unsigned numRows, unsigned numColumns, unsigned numTranspositions)
Determine if a fast transposition codelet may be used based on the given target/data type/no.
 
void addTransposeVertices(poplar::Graph &graph, const poplar::ComputeSet &cs, const poplar::Type &dType, unsigned rows, unsigned cols, const poplar::Graph::TileToTensorMapping &mapping, std::function< std::pair< const poplar::Tensor, const poplar::Tensor >(size_t)> getInOut, const poplar::DebugContext &debugContext={})
Transposes of a set of matrices stored on multiple tiles.
 
Manage partitioning and grouping in tensors.
 
A reference to a compute set within a graph.
Definition: GraphElements.hpp:131
 
DebugContext gathers the common external parameters of the context of an operation.
Definition: DebugContext.hpp:221
 
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52
 
A target representation.
Definition: Target.hpp:69
 
A reference to a subset of tensor elements.
Definition: Tensor.hpp:38
 
Class representing device data types.
Definition: Type.hpp:42
 
Program that executes a sequence of programs.
Definition: Program.hpp:77
 
Common functions, such as elementwise and reductions.
Definition: AllTrue.hpp:15
 
std::pair< unsigned, unsigned > GroupingInfo
Grouped dimension info.
Definition: VarStructure.hpp:35