Search help

Note: Searching from the top-level index page will search all documents. Searching from a specific document will search only that document.

Find an exact phrase: Wrap your search phrase in "" (double quotes) to only get results where the phrase is exactly matched. For example "PyTorch for the IPU" or "replicated tensor sharding"
Prefix query: Add an * (asterisk) at the end of any word to indicate a prefix query. This will return results containing all words with the specific prefix. For example tensor*

Fuzzy search: Use ~N (tilde followed by a number) at the end of any word for a fuzzy search. This will return results that are similar to the search word. N specifies the “edit distance” (fuzziness) of the match. For example Polibs~1
Words close to each other: ~N (tilde followed by a number) after a phrase (in quotes) returns results where the words are close to each other. N is the maximum number of positions allowed between matching words. For example "ipu version"~2
Logical operators. You can use the following logical operators in a search:
- + signifies AND operation
- | signifies OR operation
- - negates a single word or phrase (returns results without that word or phrase)
- () controls operator precedence

9. Examples

You can find PyTorch examples and tutorials in the Graphcore GitHub examples repository. This contains:

Examples of popular machine learning models for training and inference
Tutorials
Examples of PopTorch and IPU features
Examples of simple models
Source code from videos, blogs and other documents

9.1. MNIST example

The example in Listing 9.1 shows how an MNIST model can be run on the IPU. The highlighted lines show the PopTorch-specific code required to run the example on multiple IPUs.

You can download the full source code from GitHub: mnist.py.

To run this example you will need to install the Poplar SDK (see the Getting Started Guide for your IPU system) and the appropriate version of torchvision:

$ python3 -m pip install torchvision==0.11.1

Listing 9.1 MNIST example

import torch
import torch.nn as nn
import torchvision
import poptorch

# Normal pytorch batch size
training_batch_size = 20
validation_batch_size = 100

opts = poptorch.Options()
# Device "step"
opts.deviceIterations(20)

# How many IPUs to replicate over.
opts.replicationFactor(4)

opts.randomSeed(42)

# Load MNIST normally.
training_data = poptorch.DataLoader(
    opts,
    torchvision.datasets.MNIST('mnist_data/',
                               train=True,
                               download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307, ), (0.3081, ))
                               ])),
    batch_size=training_batch_size,
    shuffle=True)

# Load MNIST normally.
val_options = poptorch.Options()
validation_data = poptorch.DataLoader(
    val_options,
    torchvision.datasets.MNIST('mnist_data/',
                               train=True,
                               download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.1307, ), (0.3081, ))
                               ])),
    batch_size=validation_batch_size,
    shuffle=True,
    drop_last=True)

# A helper block to build convolution-pool-relu blocks.
class Block(nn.Module):
    def __init__(self, in_channels, num_filters, kernel_size, pool_size):
        super(Block, self).__init__()
        self.conv = nn.Conv2d(in_channels,
                              num_filters,
                              kernel_size=kernel_size)
        self.pool = nn.MaxPool2d(kernel_size=pool_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.pool(x)
        x = self.relu(x)
        return x

# Define the network using the above blocks.
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = Block(1, 10, 5, 2)
        self.layer2 = Block(10, 20, 5, 2)
        self.layer3 = nn.Linear(320, 256)
        self.layer3_act = nn.ReLU()
        self.layer4 = nn.Linear(256, 10)

        self.softmax = nn.LogSoftmax(1)
        self.loss = nn.NLLLoss(reduction="mean")

    def forward(self, x, target=None):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(-1, 320)

        x = self.layer3_act(self.layer3(x))
        x = self.layer4(x)
        x = self.softmax(x)

        if target is not None:
            loss = self.loss(x, target)
            return x, loss
        return x

# Create our model.
model = Network()

# Create model for training which will run on IPU.
training_model = poptorch.trainingModel(model, training_data.options)

# Same model as above, they will share weights (in 'model') which once training is finished can be copied back.
inference_model = poptorch.inferenceModel(model, validation_data.options)

def train():
    for batch_number, (data, labels) in enumerate(training_data):
        output, losses = training_model(data, labels)

        if batch_number % 10 == 0:
            print(f"PoptorchIPU loss at batch: {batch_number} is {losses}")

            # Pick the highest probability.
            _, ind = torch.max(output, 1)
            assert training_model.options.output_mode in (
                poptorch.OutputMode.All, poptorch.OutputMode.Final
            ), "Only 'Final' and 'All' OutputMode supported"
            # If we're using Final: only keep the last labels, no-op if using All
            num_labels = ind.shape[0]
            labels = labels[-num_labels:]
            eq = torch.eq(ind, labels)
            elms, counts = torch.unique(eq,
                                        sorted=False,
                                        return_counts=True)

            acc = 0.0
            if len(elms) == 2:
                if elms[0]:
                    acc = (counts[0].item() / num_labels) * 100.0
                else:
                    acc = (counts[1].item() / num_labels) * 100.0

            print(
                f"Training accuracy: {acc}% from batch of size {num_labels}"
            )
    print("Done training")

def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for (data, labels) in validation_data:
            output = inference_model(data)

            # Argmax the probabilities to get the highest.
            _, ind = torch.max(output, 1)

            # Compare it against the ground truth for this batch.
            eq = torch.eq(ind, labels)

            # Count the number which are True and the number which are False.
            elms, counts = torch.unique(eq,
                                        sorted=False,
                                        return_counts=True)

            if len(elms) == 2 or elms[0]:
                if elms[0]:
                    correct += counts[0].item()
                else:
                    correct += counts[1].item()

            total += validation_batch_size
    print("Validation: of " + str(total) + " samples we got: " +
          str((correct / total) * 100.0) + "% correct")

# Train on IPU.
train()

test()