Poplar and PopLibs
CollectiveBalancedReorder.hpp
Go to the documentation of this file.
1// Copyright (c) 2021 Graphcore Ltd. All rights reserved.
2#ifndef INCLUDE_GCL_COLLECTIVEBALANCEDREORDER_HPP
3#define INCLUDE_GCL_COLLECTIVEBALANCEDREORDER_HPP
4
5#include <poplar/DebugContext.hpp>
6#include <poplar/Graph.hpp>
7#include <poplar/Interval.hpp>
8#include <poplar/Tensor.hpp>
9#include <poplar/TensorRearranger.hpp>
10#include <poplar/Type.hpp>
11#include <cstdint>
12#include <string>
13#include <vector>
14
17#if __cplusplus >= 201603L
21#define GCL_NO_DISCARD [[nodiscard]]
22#else
26#define GCL_NO_DISCARD
27#endif
28
30namespace gcl {
31
37private:
46 template <typename ElementType>
47 void rearrangeImpl(const ElementType *in, std::size_t inLen, ElementType *out,
48 std::size_t outLen, bool refToGathered) const;
49
50public:
51#pragma GCC diagnostic push
52#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
53 // In the deprecation period we need to ensure none of the
54 // default implementations warn about using the deprecated vars.
59 const CollectiveBalancedHostRearrangement &) = default;
62 CollectiveBalancedHostRearrangement &&) noexcept = default;
67 operator=(const CollectiveBalancedHostRearrangement &) = default;
72 operator=(CollectiveBalancedHostRearrangement &&) noexcept = default;
73#pragma GCC diagnostic pop
74
80 // clang-format off
81 GC_DEPRECATED_MSG("Use rearrangeForCollective(const void*, std::size_t, void*, std::size_t, std::size_t) instead ")
82 // clang-format on
83 void rearrangeForCollective(const void *in, void *out,
84 int64_t elemByteSize) const;
85
93 void rearrangeForCollective(const void *in, std::size_t inSize, void *out,
94 std::size_t outSize,
95 std::size_t elemByteSize) const;
96
102 template <typename T>
103 void rearrangeForCollective(const std::vector<T> &in, std::vector<T> &out,
104 std::size_t elemByteSize = sizeof(T)) const {
105 rearrangeForCollective(in.data(), in.size() * sizeof(T), out.data(),
106 out.size() * sizeof(T), elemByteSize);
107 }
108
115 // clang-format off
116 GC_DEPRECATED_MSG("Use undoRearrangeForCollective(const void*, std::size_t, void*, std::size_t, std::size_t) instead ")
117 // clang-format on
118 void undoRearrangeForCollective(const void *in, void *out,
119 int64_t elemByteSize) const;
120
127 template <typename T>
128 void undoRearrangeForCollective(const std::vector<T> &in, std::vector<T> &out,
129 std::size_t elemByteSize = sizeof(T)) const {
130 undoRearrangeForCollective(in.data(), in.size() * sizeof(T), out.data(),
131 out.size() * sizeof(T), elemByteSize);
132 }
133
142 void undoRearrangeForCollective(const void *in, std::size_t inSize, void *out,
143 std::size_t outSize,
144 std::size_t elemByteSize) const;
145
148 // NOLINTNEXTLINE
150
158 // clang-format off
159 GC_DEPRECATED_MSG("Use rearrangeForCollective or undoRearrangeForCollective instead")
160 // clang-format on
161 void rearrange(const void *in, void *out, int64_t elemByteSize,
162 bool refToGathered) const;
163
168
173
178
183
187 GCL_NO_DISCARD const std::vector<poplar::Interval> &
189
193 void setGatheredToRefSlices(std::vector<poplar::Interval> slices);
194
199 GCL_NO_DISCARD const std::vector<uint32_t> &getElementMap() const;
200
202 GC_DEPRECATED_MSG("Use get/setReplicationFactor() instead")
203 // NOLINTNEXTLINE(readability-identifier-naming)
204 unsigned replicationFactor = 0;
205
207 GC_DEPRECATED_MSG("Use get/setTotalElementsPerReplica() instead")
208 // NOLINTNEXTLINE(readability-identifier-naming)
209 std::size_t totalElementsPerReplica = 0;
210
212 GC_DEPRECATED_MSG("Use set/getGatheredToRefSlices() instead")
213 // NOLINTNEXTLINE(readability-identifier-naming)
214 std::vector<poplar::Interval> gatheredToRefSlices;
215
219 GC_DEPRECATED_MSG("Use getElementMap() instead")
220 // NOLINTNEXTLINE(readability-identifier-naming)
221 std::vector<uint32_t> elementMap;
222
223private:
224 void rearrange(const void *in, std::size_t inSize, void *out,
225 std::size_t outSize, std::size_t elemByteSize,
226 bool refToGathered) const;
227
228 void update(
229 bool allowElementMap, size_t numGatheredToRefSlicesVecEntries,
230 const std::vector<std::vector<poplar::Interval>> &gatheredToRefSlicesVec);
231
234};
235
250public:
264 unsigned replicationFactor_,
265 const poplar::DebugNameAndId &dnai_,
266 bool allowElementMap = false,
267 unsigned grainSize = 1);
268
276
286 const std::string &debugPrefix);
287
292 // NOLINTNEXTLINE(modernize-use-nodiscard)
295
298 // NOLINTNEXTLINE(modernize-use-nodiscard)
299 GCL_NO_DISCARD std::vector<std::size_t> getReferenceShape() const {
300 return mReferenceTensor.shape();
301 }
302
306 // NOLINTNEXTLINE(modernize-use-nodiscard)
308 return mHostRearrangement;
309 }
310
315 poplar::program::Sequence &prog) const;
316
317private:
319 void rearrange(const void *in, void *out, std::size_t elemByteSize,
320 bool refToGathered) const;
321
324 poplar::Graph &mGraph;
325
326 unsigned mReplicationFactor;
327
328 std::vector<std::size_t> mNumReplicaElementsPerTile;
329 std::vector<std::size_t> mElementsPerTile;
330 std::vector<poplar::Interval> mGatheredToSimplifiedRefSlices;
331 poplar::Tensor mReferenceTensor;
332 poplar::TensorRearranger mSimplifier;
333
334 CollectiveBalancedHostRearrangement mHostRearrangement;
335 const poplar::DebugNameAndId mDnai;
336};
337
338} // namespace gcl
339#endif // INCLUDE_GCL_COLLECTIVEBALANCEDREORDER_HPP
#define GCL_NO_DISCARD
Produce compile time warning for unused return values.
Definition: CollectiveBalancedReorder.hpp:26
This class contains functions and data necessary to rearrange tensors on the host side at runtime.
Definition: CollectiveBalancedReorder.hpp:36
void rearrange(const void *in, void *out, int64_t elemByteSize, bool refToGathered) const
Host tensor rearrangement routine.
const std::vector< uint32_t > & getElementMap() const
Simple indices map for mapping individual elements one by one.
void rearrangeForCollective(const void *in, void *out, int64_t elemByteSize) const
Balanced reorder the tensor in a collective-friendly manner (host-side).
void setTotalElementsPerReplica(std::size_t totalElementsPerReplica)
The total number for one replica's fragment.
void setGatheredToRefSlices(std::vector< poplar::Interval > slices)
Set the mapping from the gathered tensor back to the reference tensor.
size_t getNumRearrangedTensorElems() const
Number of elements in the collective balanced (reordered) tensor.
CollectiveBalancedHostRearrangement(CollectiveBalancedHostRearrangement &&) noexcept=default
Defaulted to avoid warnings in deprecation period.
std::vector< poplar::Interval > gatheredToRefSlices
The mapping from the gathered tensor back to the reference tensor.
Definition: CollectiveBalancedReorder.hpp:214
std::vector< uint32_t > elementMap
Simple indices map for mapping individual elements one by one.
Definition: CollectiveBalancedReorder.hpp:221
void undoRearrangeForCollective(const void *in, void *out, int64_t elemByteSize) const
Reorder tensor back into the expected IR tensor shape and order (host-side).
void setReplicationFactor(unsigned replicationFactor)
The graph's replication factor.
unsigned getReplicationFactor() const
The graph's replication factor.
void undoRearrangeForCollective(const void *in, std::size_t inSize, void *out, std::size_t outSize, std::size_t elemByteSize) const
Reorder tensor back into the expected IR tensor shape and order (host-side).
const std::vector< poplar::Interval > & getGatheredToRefSlices() const
The mapping from the gathered tensor back to the reference tensor.
std::size_t getTotalElementsPerReplica() const
The total number for one replica's fragment.
CollectiveBalancedHostRearrangement(const CollectiveBalancedHostRearrangement &)=default
Defaulted to avoid warnings in deprecation period.
std::size_t totalElementsPerReplica
The total number for one replica's fragment.
Definition: CollectiveBalancedReorder.hpp:209
unsigned replicationFactor
The graph's replication factor.
Definition: CollectiveBalancedReorder.hpp:204
Helper class to reorder a tensor in a per-tile-balanced fashion such that each replica obtains (for i...
Definition: CollectiveBalancedReorder.hpp:249
std::vector< std::size_t > getReferenceShape() const
Get the shape of the reference tensor.
Definition: CollectiveBalancedReorder.hpp:299
poplar::Tensor createCollectivesTensor(const poplar::Type &type, const std::string &debugPrefix)
Create a tensor mapped efficiently over the same tiles as the reference tensor.
CollectiveBalancedReorder(poplar::Graph &graph_, poplar::Tensor tensor_, unsigned replicationFactor_, const poplar::DebugNameAndId &dnai_, bool allowElementMap=false, unsigned grainSize=1)
Constructor.
void zeroPaddingInCollectiveTensor(poplar::Tensor &collectiveTensor, poplar::program::Sequence &prog) const
Zero the padding of a collective friendly tensor.
poplar::Tensor undoRearrangeForCollective(const poplar::Tensor &tensor) const
Reorder tensor back into the expected IR tensor shape and order.
const CollectiveBalancedHostRearrangement & getHostRearrangement() const
Get a helper class that implements allows to apply the rearrangement on the host.
Definition: CollectiveBalancedReorder.hpp:307
poplar::Tensor createReplicaSlice(const poplar::Type &type)
Create a tensor mapped efficiently over the same tiles as the reference tensor.
DebugNameAndId bundles a name and a DebugId to facilitate their propagation through function calls.
Definition: DebugContext.hpp:142
This class represents a graph program to be executed on the IPU.
Definition: Graph.hpp:52
TensorRearranger can be used to re-order the view on a tensor and to undo that re-ordering.
Definition: TensorRearranger.hpp:19
A reference to a subset of tensor elements.
Definition: Tensor.hpp:38
Class representing device data types.
Definition: Type.hpp:42
Program that executes a sequence of programs.
Definition: Program.hpp:77
Graphcore Communications Library.
Poplar classes and functions.
Definition: ArrayRef.hpp:14