2#ifndef INCLUDE_GCL_COLLECTIVEBALANCEDREORDER_HPP
3#define INCLUDE_GCL_COLLECTIVEBALANCEDREORDER_HPP
5#include <poplar/DebugContext.hpp>
6#include <poplar/Graph.hpp>
7#include <poplar/Interval.hpp>
8#include <poplar/Tensor.hpp>
9#include <poplar/TensorRearranger.hpp>
10#include <poplar/Type.hpp>
17#if __cplusplus >= 201603L
21#define GCL_NO_DISCARD [[nodiscard]]
46 template <
typename ElementType>
47 void rearrangeImpl(
const ElementType *in, std::size_t inLen, ElementType *out,
48 std::size_t outLen,
bool refToGathered)
const;
51#pragma GCC diagnostic push
52#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
73#pragma GCC diagnostic pop
81 GC_DEPRECATED_MSG(
"Use rearrangeForCollective(const void*, std::size_t, void*, std::size_t, std::size_t) instead ")
84 int64_t elemByteSize) const;
95 std::
size_t elemByteSize) const;
102 template <typename T>
104 std::
size_t elemByteSize = sizeof(T))
const {
106 out.size() *
sizeof(T), elemByteSize);
116 GC_DEPRECATED_MSG(
"Use undoRearrangeForCollective(const void*, std::size_t, void*, std::size_t, std::size_t) instead ")
119 int64_t elemByteSize) const;
127 template <typename T>
129 std::
size_t elemByteSize = sizeof(T))
const {
131 out.size() *
sizeof(T), elemByteSize);
144 std::size_t elemByteSize)
const;
159 GC_DEPRECATED_MSG(
"Use rearrangeForCollective or undoRearrangeForCollective instead")
161 void
rearrange(const
void *in,
void *out, int64_t elemByteSize,
162 bool refToGathered) const;
224 void rearrange(const
void *in, std::
size_t inSize,
void *out,
225 std::
size_t outSize, std::
size_t elemByteSize,
226 bool refToGathered) const;
229 bool allowElementMap,
size_t numGatheredToRefSlicesVecEntries,
230 const std::vector<std::vector<
poplar::Interval>> &gatheredToRefSlicesVec);
264 unsigned replicationFactor_,
266 bool allowElementMap =
false,
267 unsigned grainSize = 1);
286 const std::string &debugPrefix);
300 return mReferenceTensor.shape();
308 return mHostRearrangement;
319 void rearrange(
const void *in,
void *out, std::size_t elemByteSize,
320 bool refToGathered)
const;
326 unsigned mReplicationFactor;
328 std::vector<std::size_t> mNumReplicaElementsPerTile;
329 std::vector<std::size_t> mElementsPerTile;
330 std::vector<poplar::Interval> mGatheredToSimplifiedRefSlices;
#define GCL_NO_DISCARD
Produce compile time warning for unused return values.
Definition: CollectiveBalancedReorder.hpp:26
This class contains functions and data necessary to rearrange tensors on the host side at runtime.
Definition: CollectiveBalancedReorder.hpp:36
void rearrange(const void *in, void *out, int64_t elemByteSize, bool refToGathered) const
Host tensor rearrangement routine.
const std::vector< uint32_t > & getElementMap() const
Simple indices map for mapping individual elements one by one.
void rearrangeForCollective(const void *in, void *out, int64_t elemByteSize) const
Balanced reorder the tensor in a collective-friendly manner (host-side).
void setTotalElementsPerReplica(std::size_t totalElementsPerReplica)
The total number for one replica's fragment.
void setGatheredToRefSlices(std::vector< poplar::Interval > slices)
Set the mapping from the gathered tensor back to the reference tensor.
size_t getNumRearrangedTensorElems() const
Number of elements in the collective balanced (reordered) tensor.
CollectiveBalancedHostRearrangement(CollectiveBalancedHostRearrangement &&) noexcept=default
Defaulted to avoid warnings in deprecation period.
std::vector< poplar::Interval > gatheredToRefSlices
The mapping from the gathered tensor back to the reference tensor.
Definition: CollectiveBalancedReorder.hpp:214
std::vector< uint32_t > elementMap
Simple indices map for mapping individual elements one by one.
Definition: CollectiveBalancedReorder.hpp:221
void undoRearrangeForCollective(const void *in, void *out, int64_t elemByteSize) const
Reorder tensor back into the expected IR tensor shape and order (host-side).
void setReplicationFactor(unsigned replicationFactor)
The graph's replication factor.
unsigned getReplicationFactor() const
The graph's replication factor.
void undoRearrangeForCollective(const void *in, std::size_t inSize, void *out, std::size_t outSize, std::size_t elemByteSize) const
Reorder tensor back into the expected IR tensor shape and order (host-side).
const std::vector< poplar::Interval > & getGatheredToRefSlices() const
The mapping from the gathered tensor back to the reference tensor.
std::size_t getTotalElementsPerReplica() const
The total number for one replica's fragment.
CollectiveBalancedHostRearrangement(const CollectiveBalancedHostRearrangement &)=default
Defaulted to avoid warnings in deprecation period.
std::size_t totalElementsPerReplica
The total number for one replica's fragment.
Definition: CollectiveBalancedReorder.hpp:209
unsigned replicationFactor
The graph's replication factor.
Definition: CollectiveBalancedReorder.hpp:204
Helper class to reorder a tensor in a per-tile-balanced fashion such that each replica obtains (for i...
Definition: CollectiveBalancedReorder.hpp:249
std::vector< std::size_t > getReferenceShape() const
Get the shape of the reference tensor.
Definition: CollectiveBalancedReorder.hpp:299
poplar::Tensor createCollectivesTensor(const poplar::Type &type, const std::string &debugPrefix)
Create a tensor mapped efficiently over the same tiles as the reference tensor.
CollectiveBalancedReorder(poplar::Graph &graph_, poplar::Tensor tensor_, unsigned replicationFactor_, const poplar::DebugNameAndId &dnai_, bool allowElementMap=false, unsigned grainSize=1)
Constructor.
void zeroPaddingInCollectiveTensor(poplar::Tensor &collectiveTensor, poplar::program::Sequence &prog) const
Zero the padding of a collective friendly tensor.
poplar::Tensor undoRearrangeForCollective(const poplar::Tensor &tensor) const
Reorder tensor back into the expected IR tensor shape and order.
const CollectiveBalancedHostRearrangement & getHostRearrangement() const
Get a helper class that implements allows to apply the rearrangement on the host.
Definition: CollectiveBalancedReorder.hpp:307
poplar::Tensor createReplicaSlice(const poplar::Type &type)
Create a tensor mapped efficiently over the same tiles as the reference tensor.
DebugNameAndId bundles a name and a DebugId to facilitate their propagation through function calls.
Definition: DebugContext.hpp:142
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52
TensorRearranger can be used to re-order the view on a tensor and to undo that re-ordering.
Definition: TensorRearranger.hpp:19
A reference to a subset of tensor elements.
Definition: Tensor.hpp:38
Class representing device data types.
Definition: Type.hpp:42
Program that executes a sequence of programs.
Definition: Program.hpp:77
Graphcore Communications Library.
Poplar classes and functions.
Definition: ArrayRef.hpp:14