-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.cpp
More file actions
60 lines (52 loc) · 2 KB
/
train.cpp
File metadata and controls
60 lines (52 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include <torch/torch.h>
#include "net.h"
#define BATCH_SIZE 64
#define N_EPOCHS 10
#define LEARNING_RATE 0.01
int main(int argc, char **argv) {
// Init model
std::string out_path;
std::shared_ptr<Net> net = std::make_shared<Net>();
if (argc == 3) { // if 3 args
torch::load(net, argv[1]); // load from arg 1
out_path = argv[2]; // save to arg 2
} else if (argc == 2) {
out_path = argv[1]; // save to arg 1, no initial model
} else {
std::cerr << "Wrong number of arguments" << std::endl;
exit(1);
}
// Get other params from environment vars
size_t n_splits = std::stoi(std::getenv("N_SPLITS"));
size_t split = std::stoi(std::getenv("SPLIT"));
std::string data_dir = std::getenv("DATA_DIR");
// Multi-threaded data loader for the MNIST dataset.
auto data_loader =
torch::data::make_data_loader(torch::data::datasets::MNIST(data_dir).map(
torch::data::transforms::Stack<>()),
BATCH_SIZE);
// Init optimizer
torch::optim::SGD optimizer(net->parameters(), LEARNING_RATE);
// Train loop
for (size_t epoch = 1; epoch <= N_EPOCHS; ++epoch) { // epoch loop
size_t batch_index = 0;
for (auto &batch : *data_loader) { // batch loop
if (batch_index % n_splits == split) {
optimizer.zero_grad(); // reset gradients
torch::Tensor prediction = net->forward(batch.data); // forward pass
torch::Tensor loss =
torch::nll_loss(prediction, batch.target); // compute loss
loss.backward(); // backprop
optimizer.step(); // update params
// Print logs
if (batch_index % (100 + split) == 0) {
std::cerr << "Epoch: " << epoch << " | Batch: " << batch_index
<< " | Loss: " << loss.item<float>() << std::endl;
}
}
batch_index++;
}
}
// Save
torch::save(net, out_path);
}