/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.scripts.nn.examples;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.sysml.api.mlcontext.MLResults;
import org.apache.sysml.api.mlcontext.Matrix;
import org.apache.sysml.api.mlcontext.Script;
import org.apache.sysml.scripts.nn.examples.mnist_lenet_distrib_sgd.Eval_output;
import org.apache.sysml.scripts.nn.examples.mnist_lenet_distrib_sgd.Generate_dummy_data_output;
import org.apache.sysml.scripts.nn.examples.mnist_lenet_distrib_sgd.Train_output;

public class Mnist_lenet_distrib_sgd
extends Script {
    public Mnist_lenet_distrib_sgd() {
        String string = "scripts/nn/examples/mnist_lenet_distrib_sgd.dml";
        InputStream inputStream = Script.class.getResourceAsStream(new StringBuffer().append("/").append(string).toString());
        InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
        char[] cArray = new char[1024];
        StringBuilder stringBuilder = new StringBuilder();
        try {
            int n;
            while ((n = inputStreamReader.read(cArray)) > 0) {
                stringBuilder.append(cArray, 0, n);
            }
        }
        catch (IOException iOException) {
            iOException.printStackTrace();
        }
        this.setScriptString(stringBuilder.toString());
    }

    public Generate_dummy_data_output generate_dummy_data(Object object, Object object2, Object object3, Object object4, Object object5) {
        String string = "source('scripts/nn/examples/mnist_lenet_distrib_sgd.dml') as mlcontextns;[X, Y] = mlcontextns::generate_dummy_data(N, C, Hin, Win, K);";
        Script script = new Script(string);
        script.in("N", object).in("C", object2).in("Hin", object3).in("Win", object4).in("K", object5).out("X").out("Y");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("X");
        Matrix matrix2 = mLResults.getMatrix("Y");
        Generate_dummy_data_output generate_dummy_data_output = new Generate_dummy_data_output(matrix, matrix2);
        return generate_dummy_data_output;
    }

    public String generate_dummy_data__docs() {
        String string = "generate_dummy_data = function(int N, int C, int Hin, int Win, int K)\n    return (matrix[double] X, matrix[double] Y) {\n  /*\n   * Generate a dummy dataset.\n   *\n   * Outputs:\n   *  - X: Input data matrix, of shape (N, D).\n   *  - Y: Target matrix, of shape (N, K).\n   *  - C: Number of input channels (dimensionality of input depth).\n   *  - Hin: Input height.\n   *  - Win: Input width.\n   */\n";
        return string;
    }

    public String generate_dummy_data__source() {
        String string = "generate_dummy_data = function(int N, int C, int Hin, int Win, int K)\n    return (matrix[double] X, matrix[double] Y) {\n  /*\n   * Generate a dummy dataset.\n   *\n   * Outputs:\n   *  - X: Input data matrix, of shape (N, D).\n   *  - Y: Target matrix, of shape (N, K).\n   *  - C: Number of input channels (dimensionality of input depth).\n   *  - Hin: Input height.\n   *  - Win: Input width.\n   */\n  # Generate dummy input data\n  #N = 1024  # num examples\n  #C = 1  # num input channels\n  #Hin = 28  # input height\n  #Win = 28  # input width\n  #K = 10  # num target classes\n  X = rand(rows=N, cols=C*Hin*Win, pdf=\"normal\")\n  classes = round(rand(rows=N, cols=1, min=1, max=K, pdf=\"uniform\"))\n  Y = table(seq(1, N), classes, N, K)  # one-hot encoding\n}\n";
        return string;
    }

    public Eval_output eval(Object object, Object object2) {
        String string = "source('scripts/nn/examples/mnist_lenet_distrib_sgd.dml') as mlcontextns;[loss, accuracy] = mlcontextns::eval(probs, Y);";
        Script script = new Script(string);
        script.in("probs", object).in("Y", object2).out("loss").out("accuracy");
        MLResults mLResults = script.execute();
        double d = mLResults.getDouble("loss");
        double d2 = mLResults.getDouble("accuracy");
        Eval_output eval_output = new Eval_output(d, d2);
        return eval_output;
    }

    public String eval__docs() {
        String string = "eval = function(matrix[double] probs, matrix[double] Y)\n    return (double loss, double accuracy) {\n  /*\n   * Evaluates a convolutional net using the \"LeNet\" architecture.\n   *\n   * The probs matrix contains the class probability predictions\n   * of K classes over N examples.  The targets, Y, have K classes,\n   * and are one-hot encoded.\n   *\n   * Inputs:\n   *  - probs: Class probabilities, of shape (N, K).\n   *  - Y: Target matrix, of shape (N, K).\n   *\n   * Outputs:\n   *  - loss: Scalar loss, of shape (1).\n   *  - accuracy: Scalar accuracy, of shape (1).\n   */\n";
        return string;
    }

    public String eval__source() {
        String string = "eval = function(matrix[double] probs, matrix[double] Y)\n    return (double loss, double accuracy) {\n  /*\n   * Evaluates a convolutional net using the \"LeNet\" architecture.\n   *\n   * The probs matrix contains the class probability predictions\n   * of K classes over N examples.  The targets, Y, have K classes,\n   * and are one-hot encoded.\n   *\n   * Inputs:\n   *  - probs: Class probabilities, of shape (N, K).\n   *  - Y: Target matrix, of shape (N, K).\n   *\n   * Outputs:\n   *  - loss: Scalar loss, of shape (1).\n   *  - accuracy: Scalar accuracy, of shape (1).\n   */\n  # Compute loss & accuracy\n  loss = cross_entropy_loss::forward(probs, Y)\n  correct_pred = rowIndexMax(probs) == rowIndexMax(Y)\n  accuracy = mean(correct_pred)\n}\n";
        return string;
    }

    public Matrix predict(Object object, Object object2, Object object3, Object object4, Object object5, Object object6, Object object7, Object object8, Object object9, Object object10, Object object11, Object object12) {
        String string = "source('scripts/nn/examples/mnist_lenet_distrib_sgd.dml') as mlcontextns;probs = mlcontextns::predict(X, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4);";
        Script script = new Script(string);
        script.in("X", object).in("C", object2).in("Hin", object3).in("Win", object4).in("W1", object5).in("b1", object6).in("W2", object7).in("b2", object8).in("W3", object9).in("b3", object10).in("W4", object11).in("b4", object12).out("probs");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("probs");
        return matrix;
    }

    public String predict__docs() {
        String string = "predict = function(matrix[double] X, int C, int Hin, int Win,\n                   matrix[double] W1, matrix[double] b1,\n                   matrix[double] W2, matrix[double] b2,\n                   matrix[double] W3, matrix[double] b3,\n                   matrix[double] W4, matrix[double] b4)\n    return (matrix[double] probs) {\n  /*\n   * Computes the class probability predictions of a convolutional\n   * net using the \"LeNet\" architecture.\n   *\n   * The input matrix, X, has N examples, each represented as a 3D\n   * volume unrolled into a single vector.\n   *\n   * Inputs:\n   *  - X: Input data matrix, of shape (N, C*Hin*Win).\n   *  - C: Number of input channels (dimensionality of input depth).\n   *  - Hin: Input height.\n   *  - Win: Input width.\n   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).\n   *  - b1: 1st layer biases vector, of shape (F1, 1).\n   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).\n   *  - b2: 2nd layer biases vector, of shape (F2, 1).\n   *  - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3).\n   *  - b3: 3rd layer biases vector, of shape (1, N3).\n   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).\n   *  - b4: 4th layer biases vector, of shape (1, K).\n   *\n   * Outputs:\n   *  - probs: Class probabilities, of shape (N, K).\n   */\n";
        return string;
    }

    public String predict__source() {
        String string = "predict = function(matrix[double] X, int C, int Hin, int Win,\n                   matrix[double] W1, matrix[double] b1,\n                   matrix[double] W2, matrix[double] b2,\n                   matrix[double] W3, matrix[double] b3,\n                   matrix[double] W4, matrix[double] b4)\n    return (matrix[double] probs) {\n  /*\n   * Computes the class probability predictions of a convolutional\n   * net using the \"LeNet\" architecture.\n   *\n   * The input matrix, X, has N examples, each represented as a 3D\n   * volume unrolled into a single vector.\n   *\n   * Inputs:\n   *  - X: Input data matrix, of shape (N, C*Hin*Win).\n   *  - C: Number of input channels (dimensionality of input depth).\n   *  - Hin: Input height.\n   *  - Win: Input width.\n   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).\n   *  - b1: 1st layer biases vector, of shape (F1, 1).\n   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).\n   *  - b2: 2nd layer biases vector, of shape (F2, 1).\n   *  - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3).\n   *  - b3: 3rd layer biases vector, of shape (1, N3).\n   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).\n   *  - b4: 4th layer biases vector, of shape (1, K).\n   *\n   * Outputs:\n   *  - probs: Class probabilities, of shape (N, K).\n   */\n  N = nrow(X)\n\n  # Network:\n  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax\n  Hf = 5  # filter height\n  Wf = 5  # filter width\n  stride = 1\n  pad = 2  # For same dimensions, (Hf - stride) / 2\n\n  F1 = nrow(W1)  # num conv filters in conv1\n  F2 = nrow(W2)  # num conv filters in conv2\n  N3 = ncol(W3)  # num nodes in affine3\n  K = ncol(W4)  # num nodes in affine4, equal to number of target dimensions (num classes)\n\n  # Compute predictions over mini-batches\n  probs = matrix(0, rows=N, cols=K)\n  batch_size = 64\n  iters = ceil(N / batch_size)\n  parfor(i in 1:iters, check=0) {  # complains about `probs` as an inter-loop dependency\n    # Get next batch\n    beg = ((i-1) * batch_size) %% N + 1\n    end = min(N, beg + batch_size - 1)\n    X_batch = X[beg:end,]\n\n    # Compute forward pass\n    ## layer 1: conv1 -> relu1 -> pool1\n    [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,\n                                              pad, pad)\n    outr1 = relu::forward(outc1)\n    [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,\n                                                  strideh=2, stridew=2, pad=0, pad=0)\n    ## layer 2: conv2 -> relu2 -> pool2\n    [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,\n                                              stride, stride, pad, pad)\n    outr2 = relu::forward(outc2)\n    [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,\n                                                  strideh=2, stridew=2, pad=0, pad=0)\n    ## layer 3:  affine3 -> relu3\n    outa3 = affine::forward(outp2, W3, b3)\n    outr3 = relu::forward(outa3)\n    ## layer 4:  affine4 -> softmax\n    outa4 = affine::forward(outr3, W4, b4)\n    probs_batch = softmax::forward(outa4)\n\n    # Store predictions\n    probs[beg:end,] = probs_batch\n  }\n}\n";
        return string;
    }

    public Train_output train(Object object, Object object2, Object object3, Object object4, Object object5, Object object6, Object object7, Object object8, Object object9, Object object10) {
        String string = "source('scripts/nn/examples/mnist_lenet_distrib_sgd.dml') as mlcontextns;[W1, b1, W2, b2, W3, b3, W4, b4] = mlcontextns::train(X, Y, X_val, Y_val, C, Hin, Win, batch_size, parallel_batches, epochs);";
        Script script = new Script(string);
        script.in("X", object).in("Y", object2).in("X_val", object3).in("Y_val", object4).in("C", object5).in("Hin", object6).in("Win", object7).in("batch_size", object8).in("parallel_batches", object9).in("epochs", object10).out("W1").out("b1").out("W2").out("b2").out("W3").out("b3").out("W4").out("b4");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("W1");
        Matrix matrix2 = mLResults.getMatrix("b1");
        Matrix matrix3 = mLResults.getMatrix("W2");
        Matrix matrix4 = mLResults.getMatrix("b2");
        Matrix matrix5 = mLResults.getMatrix("W3");
        Matrix matrix6 = mLResults.getMatrix("b3");
        Matrix matrix7 = mLResults.getMatrix("W4");
        Matrix matrix8 = mLResults.getMatrix("b4");
        Train_output train_output = new Train_output(matrix, matrix2, matrix3, matrix4, matrix5, matrix6, matrix7, matrix8);
        return train_output;
    }

    public String train__docs() {
        String string = "train = function(matrix[double] X, matrix[double] Y,\n                 matrix[double] X_val, matrix[double] Y_val,\n                 int C, int Hin, int Win, int batch_size,\n                 int parallel_batches, int epochs)\n    return (matrix[double] W1, matrix[double] b1,\n            matrix[double] W2, matrix[double] b2,\n            matrix[double] W3, matrix[double] b3,\n            matrix[double] W4, matrix[double] b4) {\n  /*\n   * Trains a convolutional net using the \"LeNet\" architecture using\n   * distributed synchronous SGD.\n   *\n   * The input matrix, X, has N examples, each represented as a 3D\n   * volume unrolled into a single vector.  The targets, Y, have K\n   * classes, and are one-hot encoded.\n   *\n   * Inputs:\n   *  - X: Input data matrix, of shape (N, C*Hin*Win).\n   *  - Y: Target matrix, of shape (N, K).\n   *  - X_val: Input validation data matrix, of shape (N, C*Hin*Win).\n   *  - Y_val: Target validation matrix, of shape (N, K).\n   *  - C: Number of input channels (dimensionality of input depth).\n   *  - Hin: Input height.\n   *  - Win: Input width.\n   *  - batch_size: Number of examples in each batch.\n   *  - parallel_batches: Number of batches to run in parallel.\n   *  - epochs: Total number of full training loops over the full data set.\n   *\n   * Outputs:\n   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).\n   *  - b1: 1st layer biases vector, of shape (F1, 1).\n   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).\n   *  - b2: 2nd layer biases vector, of shape (F2, 1).\n   *  - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3).\n   *  - b3: 3rd layer biases vector, of shape (1, N3).\n   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).\n   *  - b4: 4th layer biases vector, of shape (1, K).\n   */\n";
        return string;
    }

    public String train__source() {
        String string = "train = function(matrix[double] X, matrix[double] Y,\n                 matrix[double] X_val, matrix[double] Y_val,\n                 int C, int Hin, int Win, int batch_size,\n                 int parallel_batches, int epochs)\n    return (matrix[double] W1, matrix[double] b1,\n            matrix[double] W2, matrix[double] b2,\n            matrix[double] W3, matrix[double] b3,\n            matrix[double] W4, matrix[double] b4) {\n  /*\n   * Trains a convolutional net using the \"LeNet\" architecture using\n   * distributed synchronous SGD.\n   *\n   * The input matrix, X, has N examples, each represented as a 3D\n   * volume unrolled into a single vector.  The targets, Y, have K\n   * classes, and are one-hot encoded.\n   *\n   * Inputs:\n   *  - X: Input data matrix, of shape (N, C*Hin*Win).\n   *  - Y: Target matrix, of shape (N, K).\n   *  - X_val: Input validation data matrix, of shape (N, C*Hin*Win).\n   *  - Y_val: Target validation matrix, of shape (N, K).\n   *  - C: Number of input channels (dimensionality of input depth).\n   *  - Hin: Input height.\n   *  - Win: Input width.\n   *  - batch_size: Number of examples in each batch.\n   *  - parallel_batches: Number of batches to run in parallel.\n   *  - epochs: Total number of full training loops over the full data set.\n   *\n   * Outputs:\n   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).\n   *  - b1: 1st layer biases vector, of shape (F1, 1).\n   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).\n   *  - b2: 2nd layer biases vector, of shape (F2, 1).\n   *  - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3).\n   *  - b3: 3rd layer biases vector, of shape (1, N3).\n   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).\n   *  - b4: 4th layer biases vector, of shape (1, K).\n   */\n  N = nrow(X)\n  K = ncol(Y)\n\n  # Create network:\n  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax\n  Hf = 5  # filter height\n  Wf = 5  # filter width\n  stride = 1\n  pad = 2  # For same dimensions, (Hf - stride) / 2\n\n  F1 = 32  # num conv filters in conv1\n  F2 = 64  # num conv filters in conv2\n  N3 = 512  # num nodes in affine3\n  # Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes)\n\n  [W1, b1] = conv2d::init(F1, C, Hf, Wf)  # inputs: (N, C*Hin*Win)\n  [W2, b2] = conv2d::init(F2, F1, Hf, Wf)  # inputs: (N, F1*(Hin/2)*(Win/2))\n  [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3)  # inputs: (N, F2*(Hin/2/2)*(Win/2/2))\n  [W4, b4] = affine::init(N3, K)  # inputs: (N, N3)\n  W4 = W4 / sqrt(2)  # different initialization, since being fed into softmax, instead of relu\n\n  # Initialize SGD w/ Nesterov momentum optimizer\n  lr = 0.01  # learning rate\n  mu = 0.9  #0.5  # momentum\n  decay = 0.95  # learning rate decay constant\n  vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1)\n  vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2)\n  vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3)\n  vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4)\n\n  # Regularization\n  lambda = 5e-04\n\n  # Optimize\n  print(\"Starting optimization\")\n  group_batch_size = parallel_batches*batch_size\n  groups = as.integer(ceil(N/group_batch_size))\n  print(\"Total Epochs: \"+epochs+\", Batch size: \"+batch_size+\n        \", Degree of parallelism: \"+parallel_batches+\", Group batch size: \"+group_batch_size+\n        \", Num groups: \"+groups)\n  # Loop over the dataset multiple times\n  for (e in 1:epochs) {\n    # Grab groups of mini-batches\n    for (g in 1:groups) {\n      # Get next group of mini-batches\n      # NOTE: At the end of the dataset, the last mini-batch in this group could be smaller than\n      # the other groups.\n      group_beg = ((g-1) * group_batch_size) %% N + 1\n      group_end = min(N, group_beg + group_batch_size - 1)\n      X_group_batch = X[group_beg:group_end,]\n      y_group_batch = Y[group_beg:group_end,]\n\n      # Data structure to store gradients computed in parallel\n      dW1_agg = matrix(0, rows=parallel_batches, cols=nrow(W1)*ncol(W1))\n      dW2_agg = matrix(0, rows=parallel_batches, cols=nrow(W2)*ncol(W2))\n      dW3_agg = matrix(0, rows=parallel_batches, cols=nrow(W3)*ncol(W3))\n      dW4_agg = matrix(0, rows=parallel_batches, cols=nrow(W4)*ncol(W4))\n      db1_agg = matrix(0, rows=parallel_batches, cols=nrow(b1)*ncol(b1))\n      db2_agg = matrix(0, rows=parallel_batches, cols=nrow(b2)*ncol(b2))\n      db3_agg = matrix(0, rows=parallel_batches, cols=nrow(b3)*ncol(b3))\n      db4_agg = matrix(0, rows=parallel_batches, cols=nrow(b4)*ncol(b4))\n\n      # Run graph on each mini-batch in this group in parallel (ideally on multiple GPUs)\n      parfor (j in 1:parallel_batches) {\n        # Get a mini-batch in this group\n        beg = ((j-1) * batch_size) %% nrow(X_group_batch) + 1\n        end = min(nrow(X_group_batch), beg + batch_size - 1)\n        X_batch = X_group_batch[beg:end,]\n        y_batch = y_group_batch[beg:end,]\n\n        # Enable for debugging:\n        #print(\"Epoch: \"+e+\"\\t Group: \"+g+\"\\t j: \"+j+\"\\t nrow(X_batch): \"+nrow(X_batch))\n\n        # Compute forward pass\n        ## layer 1: conv1 -> relu1 -> pool1\n        [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf,\n                                                  stride, stride, pad, pad)\n        outr1 = relu::forward(outc1)\n        [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,\n                                                      strideh=2, stridew=2, pad=0, pad=0)\n        ## layer 2: conv2 -> relu2 -> pool2\n        [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,\n                                                  stride, stride, pad, pad)\n        outr2 = relu::forward(outc2)\n        [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,\n                                                      strideh=2, stridew=2, pad=0, pad=0)\n        ## layer 3:  affine3 -> relu3 -> dropout\n        outa3 = affine::forward(outp2, W3, b3)\n        outr3 = relu::forward(outa3)\n        [outd3, maskd3] = dropout::forward(outr3, 0.5, -1)\n        ## layer 4:  affine4 -> softmax\n        outa4 = affine::forward(outd3, W4, b4)\n        probs = softmax::forward(outa4)\n\n        # Compute data backward pass\n        ## loss:\n        dprobs = cross_entropy_loss::backward(probs, y_batch)\n        ## layer 4:  affine4 -> softmax\n        douta4 = softmax::backward(dprobs, outa4)\n        [doutd3, dW4, db4] = affine::backward(douta4, outr3, W4, b4)\n        ## layer 3:  affine3 -> relu3 -> dropout\n        doutr3 = dropout::backward(doutd3, outr3, 0.5, maskd3)\n        douta3 = relu::backward(doutr3, outa3)\n        [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3)\n        ## layer 2: conv2 -> relu2 -> pool2\n        doutr2 = max_pool2d::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,\n                                      strideh=2, stridew=2, pad=0, pad=0)\n        doutc2 = relu::backward(doutr2, outc2)\n        [doutp1, dW2, db2] = conv2d::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1,\n                                              Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)\n        ## layer 1: conv1 -> relu1 -> pool1\n        doutr1 = max_pool2d::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,\n                                      strideh=2, stridew=2, pad=0, pad=0)\n        doutc1 = relu::backward(doutr1, outc1)\n        [dX_batch, dW1, db1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C,\n                                                Hin, Win, Hf, Wf, stride, stride, pad, pad)\n\n        # Compute regularization backward pass\n        dW1_reg = l2_reg::backward(W1, lambda)\n        dW2_reg = l2_reg::backward(W2, lambda)\n        dW3_reg = l2_reg::backward(W3, lambda)\n        dW4_reg = l2_reg::backward(W4, lambda)\n        dW1 = dW1 + dW1_reg\n        dW2 = dW2 + dW2_reg\n        dW3 = dW3 + dW3_reg\n        dW4 = dW4 + dW4_reg\n\n        # Flatten and store gradients for this parallel execution\n        # Note: We multiply by a weighting to allow for proper gradient averaging during the\n        # aggregation even with uneven batch sizes.\n        weighting = nrow(X_batch) / nrow(X_group_batch)\n        dW1_agg[j,] = matrix(dW1, rows=1, cols=nrow(W1)*ncol(W1)) * weighting\n        dW2_agg[j,] = matrix(dW2, rows=1, cols=nrow(W2)*ncol(W2)) * weighting\n        dW3_agg[j,] = matrix(dW3, rows=1, cols=nrow(W3)*ncol(W3)) * weighting\n        dW4_agg[j,] = matrix(dW4, rows=1, cols=nrow(W4)*ncol(W4)) * weighting\n        db1_agg[j,] = matrix(db1, rows=1, cols=nrow(b1)*ncol(b1)) * weighting\n        db2_agg[j,] = matrix(db2, rows=1, cols=nrow(b2)*ncol(b2)) * weighting\n        db3_agg[j,] = matrix(db3, rows=1, cols=nrow(b3)*ncol(b3)) * weighting\n        db4_agg[j,] = matrix(db4, rows=1, cols=nrow(b4)*ncol(b4)) * weighting\n      }\n\n      # Aggregate gradients\n      # Note: The gradients are already pre-multiplied by a weight so that addition here\n      # results in gradient averaging even with different possible mini-batch sizes. I.e.,\n      # the final mini-batch at the end of the dataset could be smaller than the other mini-batches.\n      dW1 = matrix(colSums(dW1_agg), rows=nrow(W1), cols=ncol(W1))\n      dW2 = matrix(colSums(dW2_agg), rows=nrow(W2), cols=ncol(W2))\n      dW3 = matrix(colSums(dW3_agg), rows=nrow(W3), cols=ncol(W3))\n      dW4 = matrix(colSums(dW4_agg), rows=nrow(W4), cols=ncol(W4))\n      db1 = matrix(colSums(db1_agg), rows=nrow(b1), cols=ncol(b1))\n      db2 = matrix(colSums(db2_agg), rows=nrow(b2), cols=ncol(b2))\n      db3 = matrix(colSums(db3_agg), rows=nrow(b3), cols=ncol(b3))\n      db4 = matrix(colSums(db4_agg), rows=nrow(b4), cols=ncol(b4))\n\n      # Optimize with SGD w/ Nesterov momentum\n      [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1)\n      [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2)\n      [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3)\n      [W4, vW4] = sgd_nesterov::update(W4, dW4, lr, mu, vW4)\n      [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1)\n      [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2)\n      [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3)\n      [b4, vb4] = sgd_nesterov::update(b4, db4, lr, mu, vb4)\n\n      # Compute loss & accuracy for training & validation data every 100 iterations.\n      if (g %% 10 == 0) {\n        # Get a mini-batch in this group\n        beg = ((j-1) * batch_size) %% nrow(X_group_batch) + 1\n        end = min(nrow(X_group_batch), beg + batch_size - 1)\n        X_batch = X_group_batch[beg:end,]\n        y_batch = y_group_batch[beg:end,]\n\n        # Compute training loss & accuracy using final\n        probs = predict(X_batch, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n        loss_data = cross_entropy_loss::forward(probs, y_batch)\n        loss_reg_W1 = l2_reg::forward(W1, lambda)\n        loss_reg_W2 = l2_reg::forward(W2, lambda)\n        loss_reg_W3 = l2_reg::forward(W3, lambda)\n        loss_reg_W4 = l2_reg::forward(W4, lambda)\n        loss = loss_data + loss_reg_W1 + loss_reg_W2 + loss_reg_W3 + loss_reg_W4\n        accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch))\n\n        # Compute validation loss & accuracy\n        probs_val = predict(X_val, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n        loss_val = cross_entropy_loss::forward(probs_val, Y_val)\n        accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(Y_val))\n\n        # Output results\n        print(\"Epoch: \" + e + \", Group: \" + g + \", Train Loss: \" + loss + \", Train Accuracy: \"\n              + accuracy + \", Val Loss: \" + loss_val + \", Val Accuracy: \" + accuracy_val)\n      }\n    }\n    # Anneal momentum towards 0.999\n    #mu = mu + (0.999 - mu)/(1+epochs-e)\n    # Decay learning rate\n    lr = lr * decay\n  }\n}\n";
        return string;
    }
}

