goras: Go Coverage Report

package goras

import T "gorgonia.org/tensor"

// TrainingDataGenerator is used by a model to generate data on-the-fly during training.
type TrainingDataGenerator interface {
        // NextBatch returns the next batch of data and labels. If there is no more data, it should return nil, nil, nil.
        NextBatch() (map[string]T.Tensor, map[string]T.Tensor, error)
        Reset(batchSize int) error // Resets the generator for the next epoch
        NumBatches() int           // Returns the number of batches in this epoch
}

var _ TrainingDataGenerator = &TensorTrainingDataGenerator{}

// TensorTrainingDataGenerator is a TrainingDataGenerator that uses tensors as inputs and outputs.
// It should only be used with small datasets, as it requires the entire dataset to be loaded into memory at once.
type TensorTrainingDataGenerator struct {
        inputs                map[string]T.Tensor
        outputs               map[string]T.Tensor
        currentBatchedInputs  []map[string]T.Tensor
        currentBatchedOutputs []map[string]T.Tensor
        currentBatch          int
}

// NewTTDG creates a new TensorTrainingDataGenerator.
// This is used by the fit method of the model to generate batches of data.
// The inputs and outputs are the training data and labels respectively.
// They are a slice due to multiple input output capabilities. If you only have one input and output, you can pass in a slice of length 1 for both.
func NewTTDG(xs, ys map[string]T.Tensor) *TensorTrainingDataGenerator {
        return &TensorTrainingDataGenerator{
                inputs:  xs,
                outputs: ys,
        }
}

func (t *TensorTrainingDataGenerator) NextBatch() (map[string]T.Tensor, map[string]T.Tensor, error) {
        if t.currentBatch >= len(t.currentBatchedInputs) {
                return nil, nil, nil
        }
        t.currentBatch++
        return t.currentBatchedInputs[t.currentBatch-1], t.currentBatchedOutputs[t.currentBatch-1], nil
}

func (t *TensorTrainingDataGenerator) Reset(batchSize int) error {
        t.currentBatch = 0
        var err error
        t.currentBatchedInputs, _, err = batchMultipleTensors(t.inputs, batchSize, false)
        if err != nil {
                return err
        }
        t.currentBatchedOutputs, _, err = batchMultipleTensors(t.outputs, batchSize, false)
        if err != nil {
                return err
        }
        return nil
}

func (t *TensorTrainingDataGenerator) NumBatches() int {
        return len(t.currentBatchedInputs)
}

package goras

import (
        "fmt"
        "os"
)

type EpochCallback func(epoch int, avgLoss float64) error

// SaveModelParametersCallback saves the model parameters to the given path.
// It overwrites the file at the given path each epoch, so you only get the most recent model.
func SaveModelParametersCallback(model *Model, path string) EpochCallback {
        return func(epoch int, avgLoss float64) error {
                f, err := os.Create(path)
                if err != nil {
                        return err
                }
                defer f.Close()
                return model.WriteParams(f)
        }
}

// RepeatedSaveModelParametersCallback saves the model parameters to the given path.
// It saves the model every `every` epochs, so you get multiple models.
// The path should contain a %v format specifier, which will be replaced with the epoch number.
func RepeatedSaveModelParametersCallback(model *Model, pathWithFormat string, every int) EpochCallback {
        return func(epoch int, avgLoss float64) error {
                if epoch%every == 0 {
                        f, err := os.Create(fmt.Sprintf(pathWithFormat, epoch))
                        if err != nil {
                                return err
                        }
                        defer f.Close()
                        return model.WriteParams(f)
                }
                return nil
        }
}

package goras

import (
        G "gorgonia.org/gorgonia"
)

// Layer is an interface that all layers must implement to be able to be added to a model.
type Layer interface {
        Parameters() map[string]*G.Node // This returns a map of the parameters. E.g. {"weights":[...], "biases":[...]}
        Name() string                   // This returns a name unique to this layer in the model
        Trainable() bool                // This specifies whether the layer is updated during Fit()
        Type() string                   // This is used for Summary()
        Node() *G.Node                  // This returns the node used as the main output for this layer
        INodes() []*G.Node              // This returns all nodes used as inputs to this layer
}

// LayerBase is a struct that all layers should embed.
// It provides some useful shared fields and methods.
type LayerBase struct {
        Graph       *G.ExprGraph
        LayerName   string
        LayerType   string
        IsTrainable bool
        OutputNode  *G.Node
        InputNodes  []*G.Node
}

// Name returns the name of the layer (e.g. "model_1").
func (l *LayerBase) Name() string {
        return l.LayerName
}

// Type returns the type of the layer (e.g. "dense").
func (l *LayerBase) Type() string {
        return l.LayerType
}

// Trainable returns whether the layer is trainable at the moment.
func (l *LayerBase) Trainable() bool {
        return l.IsTrainable
}

// Node returns the final node in this layer (the output node)
func (l *LayerBase) Node() *G.Node {
        return l.OutputNode
}

// INodes returns the input nodes of this layer.
func (l *LayerBase) INodes() []*G.Node {
        return l.InputNodes
}

// Stuff for reducing repetitive code
type attacher interface {
        Attach(*G.Node) (*G.Node, error)
}

func mustAttach(l attacher, x *G.Node) *G.Node {
        n, err := l.Attach(x)
        if err != nil {
                panic(err)
        }
        return n
}

package goras

import (
        "fmt"

        G "gorgonia.org/gorgonia"
        T "gorgonia.org/tensor"
)

// ActivationLayer is a layer that applies an activation function to its input.
//   - Input/Output Shape: any shape
type ActivationLayer struct {
        LayerBase
        Activation    string
        LeakyReluGrad float64
}

// Activation creates a new ActivationLayer on the Model with the given activation function.
// The activation function can be one of ["sigmoid", "relu", "tanh", "binary", "softmax", "leakyrelu"].
func Activation(m *Model, name string, activation string) *ActivationLayer {
        a := &ActivationLayer{LayerBase{m.Graph, name, "activation(" + activation + ")", false, nil, nil}, activation, 0.01}
        m.AddLayer(a)
        return a
}

// Sigmoid creates a new ActivationLayer on the Model with the sigmoid activation function.
func Sigmoid(m *Model, name string) *ActivationLayer {
        return Activation(m, name, "sigmoid")
}

// Relu creates a new ActivationLayer on the Model with the relu activation function.
func Relu(m *Model, name string) *ActivationLayer {
        return Activation(m, name, "relu")
}

// Tanh creates a new ActivationLayer on the Model with the tanh activation function.
func Tanh(m *Model, name string) *ActivationLayer {
        return Activation(m, name, "tanh")
}

// Binary creates a new ActivationLayer on the Model with the binary activation function.
func Binary(m *Model, name string) *ActivationLayer {
        return Activation(m, name, "binary")
}

// Softmax creates a new ActivationLayer on the Model with the softmax activation function.
func Softmax(m *Model, name string) *ActivationLayer {
        return Activation(m, name, "softmax")
}

// LeakyRelu creates a new ActivationLayer on the Model with the leaky relu activation function.
// You can optionally specify the negative gradient (LeakyRely(model, name, grad)).
// If you don't, it will default to 0.01.
func LeakyRelu(m *Model, name string, grad ...float64) *ActivationLayer {
        a := Activation(m, name, "leakyrelu")
        if len(grad) > 0 {
                a.LeakyReluGrad = grad[0]
        }
        return a
}

// Attach attaches this layer to a previous node.
func (l *ActivationLayer) Attach(n *G.Node) (*G.Node, error) {
        var on *G.Node
        var err error
        switch l.Activation {
        case "sigmoid":
                on, err = G.Sigmoid(n)
        case "relu":
                on, err = G.Rectify(n)
        case "tanh":
                on, err = G.Tanh(n)
        case "binary":
                on, err = G.Gt(n, G.NewConstant(defaultVal(n.Dtype()), G.WithType(n.Dtype()), G.WithName(fmt.Sprintf("%s.binarythresh", l.Name()))), true)
        case "softmax":
                on, err = customSoftMax(n) //G.SoftMax(n, 1) // TODO: my custom softmax seems to be working but gorgonias dosn't. Invistigate more and maybe create an issue.
        case "leakyrelu":
                //return nil, fmt.Errorf("leakyrelu is currently broken, please just use relu for now.")
                //on, err = G.LeakyRelu(n, l.LeakyReluGrad)
                on, err = customLeakyRelu(n, l.LeakyReluGrad, l.Name()) // TODO: my custom leakyrelu seems to be working but gorgonias dosn't. Invistigate more and maybe create an issue.
        default:
                return nil, fmt.Errorf("invalid activation '%s'", l.Activation)
        }
        l.OutputNode = on
        if on != nil {
                G.WithName(l.Name() + ".activation")(on)
        }
        l.InputNodes = []*G.Node{n}
        return on, err
}

func defaultVal(dtype T.Dtype) interface{} {
        switch dtype {
        case T.Float64:
                return float64(0.0)
        case T.Float32:
                return float32(0.0)
        case T.Int:
                return int(0)
        case T.Bool:
                return false
        default:
                panic("type is not implemented to be default vallable. please open an issue so i will fix")
        }
}

// MustAttach attaches this layer to a previous node. It panics on error.
func (l *ActivationLayer) MustAttach(n *G.Node) *G.Node { return mustAttach(l, n) }

// Parameters returns a map of the parameters of the layer.
func (l *ActivationLayer) Parameters() map[string]*G.Node { return make(map[string]*G.Node) }

// This function is designed to be a drop in replacement for G.SoftMax.
// This is to try and find the dreaded softmax panic.
// It will also only do stuff on axis 1
// Also, this is probably slower than the built in softmax function as it uses mutiple nodes.
// TODO: i think that gorgonia might have fixed the softmax issue. Investigate more.
func customSoftMax(x *G.Node) (*G.Node, error) {
        var err error
        exponentiatedClasses, err := G.Exp(x)
        if err != nil {
                return nil, err
        }
        summedExponentiatedClasses, err := G.Sum(exponentiatedClasses, 1)
        if err != nil {
                return nil, err
        }
        return G.BroadcastHadamardDiv(exponentiatedClasses, summedExponentiatedClasses, []byte{}, []byte{1})
}

// IMPORTANT:CURRENTLY BROKEN
// Again, I think the goriginia Leakyrelu is broken. This is a drop in replacement.
// TODO: investigate more.
func customLeakyRelu(x *G.Node, alpha float64, name string) (*G.Node, error) {
        var err error
        var alphaVal interface{}
        switch x.Dtype() {
        case T.Float64:
                alphaVal = -float64(alpha)
        case T.Float32:
                alphaVal = -float32(alpha)
        default:
                return nil, fmt.Errorf("leakyrelu can only be used on float64 and float32")
        }

        rect, err := G.Rectify(x)
        if err != nil {
                return nil, err
        }
        alphaNode := G.NewConstant(alphaVal, G.WithType(x.Dtype()), G.WithName(fmt.Sprintf("%s.alpha", name)))
        multAlphaNode, err := G.HadamardProd(x, alphaNode)
        if err != nil {
                return nil, err
        }
        multAlphaNode, err = G.Rectify(multAlphaNode)
        if err != nil {
                return nil, err
        }
        total, err := G.Sub(rect, multAlphaNode)
        if err != nil {
                return nil, err
        }
        return total, nil
}

package goras

import (
        G "gorgonia.org/gorgonia"
)

// Conv2DLayer is a 2D convolutional layer.
//   - Input Shape: (batch_size, previous_kernels/previous_channels, img_width, img_height)
//   - Output Shape: (batch_size, num_kernels, img_width, img_height)
type Conv2DLayer struct {
        LayerBase
        Kernels    *G.Node
        KernelSize []int
        NumKernels int
        Stride     []int
        Padding    string
}

// SimpleConv2D is a constructor to create a 2D convolutional layer.
// It has a kernel shape of [kernelSize, kernelSize], a stride of [1, 1], and padding of "same".
// This means that the output will be the same shape as the input.
func SimpleConv2D(m *Model, name string, kernelSize int, numKernels int) *Conv2DLayer {
        l := &Conv2DLayer{
                LayerBase{m.Graph, name, "conv2d", true, nil, nil},
                nil,
                []int{kernelSize, kernelSize},
                numKernels,
                []int{1, 1},
                "same",
        }
        m.AddLayer(l)
        return l
}

// Conv2D is a constructor to create a 2D convolutional layer.
// Options for padding are "same" or "valid".
func Conv2D(m *Model, name string, kernelShape, stride []int, padding string, numKernels int) *Conv2DLayer {
        l := &Conv2DLayer{
                LayerBase{m.Graph, name, "conv2d", true, nil, nil},
                nil,
                kernelShape,
                numKernels,
                stride,
                padding,
        }
        m.AddLayer(l)
        return l
}

// Attach attaches this layer to a previous node.
func (l *Conv2DLayer) Attach(x *G.Node) (*G.Node, error) {
        if err := validateShape(x.Shape(), valNDims(4)); err != nil {
                return nil, err
        }
        pad := []int{0, 0} // padding=valid
        if l.Padding == "same" {
                pad = []int{l.KernelSize[0] / 2, l.KernelSize[1] / 2}
        }
        previousKernels := x.Shape()[1]
        l.Kernels = G.NewTensor(l.Graph, x.Dtype(), 4, G.WithShape(l.NumKernels, previousKernels, l.KernelSize[0], l.KernelSize[1]), G.WithInit(G.GlorotN(1.0)), G.WithName(l.Name()+".kernels"))
        on, err := G.Conv2d(x, l.Kernels, l.KernelSize, pad, l.Stride, []int{1, 1})
        l.OutputNode = on
        if on != nil {
                G.WithName(l.Name() + ".conv")(on)
        }
        l.InputNodes = []*G.Node{x}
        return on, err
}

// MustAttach attaches this layer to a previous node. It panics on error.
func (l *Conv2DLayer) MustAttach(n *G.Node) *G.Node { return mustAttach(l, n) }

// Parameters returns a map of the parameters of the layer.
func (l *Conv2DLayer) Parameters() map[string]*G.Node {
        return map[string]*G.Node{"kernels": l.Kernels}
}

package goras

import (
        G "gorgonia.org/gorgonia"
        T "gorgonia.org/tensor"
)

// DenseLayer is a layer that performs a dense (fully connected) operation.
// It does not perform any activation or dropout.
//   - Input Shape: (batch_size, num_inputs)
//   - Output Shape: (batch_size, num_nodes)
type DenseLayer struct {
        LayerBase
        Weights *G.Node
        Nodes   int
}

// Dense creates a new dense layer on the specified model.
func Dense(m *Model, name string, nodes int) *DenseLayer {
        d := &DenseLayer{LayerBase{m.Graph, name, "dense", true, nil, nil}, nil, nodes}
        m.AddLayer(d)
        return d
}

// Attach attaches the layer to a previous node.
func (l *DenseLayer) Attach(n *G.Node) (*G.Node, error) {
        if err := validateShape(n.Shape(), valNDims(2)); err != nil {
                return nil, err
        }
        numInputs := n.Shape()[1]
        batchSize := n.Shape()[0]
        l.Weights = G.NewMatrix(l.Graph, n.Dtype(), G.WithShape(numInputs+1, l.Nodes), G.WithInit(G.GlorotN(1.0)), G.WithName(l.Name()+".weights"))
        bias := G.NewConstant(T.Ones(n.Dtype(), batchSize, 1), G.WithName(l.Name()+".bias"))
        // Build the graph
        withBias, err := G.Concat(1, n, bias)
        if err != nil {
                return nil, err
        }
        multiplied, err := G.Mul(withBias, l.Weights)
        if err != nil {
                return nil, err
        }
        l.OutputNode = multiplied
        if l.OutputNode != nil {
                G.WithName(l.Name() + ".matmul")(l.OutputNode)
        }
        l.InputNodes = []*G.Node{n}
        return multiplied, nil
}

// MustAttach attaches the layer to a previous node, panicking on error.
func (l *DenseLayer) MustAttach(n *G.Node) *G.Node { return mustAttach(l, n) }

// Parameters returns a map of the parameters of the layer.
func (l *DenseLayer) Parameters() map[string]*G.Node {
        return map[string]*G.Node{"weights": l.Weights}
}

package goras

import (
        G "gorgonia.org/gorgonia"
)

// DropoutLayer is a dropout layer.
//   - Input/Output Shape: any shape
type DropoutLayer struct {
        LayerBase
        DropoutProbability float64
}

// Dropout creates a new DropoutLayer on the Model with the given dropout probability.
func Dropout(m *Model, name string, dropoutProbability float64) *DropoutLayer {
        d := &DropoutLayer{LayerBase{m.Graph, name, "dropout", false, nil, nil}, dropoutProbability}
        m.AddLayer(d)
        return d
}

// Attach attaches the DropoutLayer to the given node.
func (l *DropoutLayer) Attach(n *G.Node) (*G.Node, error) {
        on, err := G.Dropout(n, l.DropoutProbability)
        l.OutputNode = on
        if on != nil {
                G.WithName(l.Name() + ".dropout")(on)
        }
        l.InputNodes = []*G.Node{n}
        return on, err
}

// MustAttach attaches the DropoutLayer to the given node. It panics on error.
func (l *DropoutLayer) MustAttach(n *G.Node) *G.Node { return mustAttach(l, n) }

// Parameters returns a map of the parameters of the layer.
func (d *DropoutLayer) Parameters() map[string]*G.Node { return make(map[string]*G.Node) }

package goras

import (
        G "gorgonia.org/gorgonia"
        T "gorgonia.org/tensor"
)

// InputLayer is a layer that takes an input of a specific shape.
//   - Input/Output Shape: (batch_size, ...other_dims) [the specified shape]
type InputLayer struct {
        LayerBase
}

// Input creates a new input layer on the specified model.
// To access the resulting *Node, use the .Node() function.
func Input(m *Model, name string, dtype T.Dtype, shape ...int) *InputLayer {
        if err := validateShape(shape, valAtLeastNDims(1)); err != nil {
                panic(err)
        }
        t := G.NewTensor(m.Graph, dtype, len(shape), G.WithShape(shape...), G.WithName(name+".input"))
        i := &InputLayer{LayerBase{m.Graph, name, "input", false, t, nil}}
        m.AddLayer(i)
        i.InputNodes = []*G.Node{}
        return i
}

// Parameters returns a map of the parameters of the layer.
func (l *InputLayer) Parameters() map[string]*G.Node { return make(map[string]*G.Node) }

package goras

import (
        "fmt"

        G "gorgonia.org/gorgonia"
        T "gorgonia.org/tensor"
)

// A OneHotLayer is a layer that performs a one-hot encoding of the input.
// The input should be a 1D tensor of integers (batchsize,).
// The output will be a 2D tensor of the specified dtype (batchsize, numClasses).
type OneHotLayer struct {
        LayerBase
        NumClasses int
        DType      T.Dtype
}

// Parameters implements Layer.
func (*OneHotLayer) Parameters() map[string]*G.Node {
        return map[string]*G.Node{}
}

func OneHot(m *Model, name string, dtype T.Dtype, numClasses int) *OneHotLayer {
        if numClasses < 1 {
                panic("numClasses must be greater than 0")
        }
        o := &OneHotLayer{LayerBase{m.Graph, name, "onehot", false, nil, nil}, numClasses, dtype}
        m.AddLayer(o)
        return o
}

// Attach attaches the layer to a previous node.
func (l *OneHotLayer) Attach(n *G.Node) (*G.Node, error) {
        if err := validateShape(n.Shape(), valNDims(1)); err != nil {
                return nil, err
        }
        if n.Dtype() != G.Int {
                return nil, fmt.Errorf("OneHotLayer only supports integer inputs")
        }
        output, err := G.ApplyOp(&oneHotOp{numClasses: l.NumClasses, dType: l.DType}, n)
        if err != nil {
                return nil, err
        }
        l.InputNodes = []*G.Node{n}
        l.OutputNode = output
        return output, nil
}

func (l *OneHotLayer) MustAttach(n *G.Node) *G.Node { return mustAttach(l, n) }

package goras

import (
        "math"

        G "gorgonia.org/gorgonia"
        T "gorgonia.org/tensor"
)

// MaxPooling2DLayer is a max pooling layer.
//   - Input Shape: (batch_size, num_channels, img_height, img_width)
//   - Output Shape: (batch_size, num_channels, img_height, img_width) [img_height and img_width will be smaller than the input]
type MaxPooling2DLayer struct {
        LayerBase
        PoolSize []int
        Stride   []int
        Padding  string
}

// SimpleMaxPooling2D creates a new max pooling layer on the specified model.
// It will have padding=same stride=poolSize, and it is the same in both dims.
func SimpleMaxPooling2D(m *Model, name string, poolSize int) *MaxPooling2DLayer {
        l := &MaxPooling2DLayer{
                LayerBase{m.Graph, name, "maxpool2d", false, nil, nil},
                []int{poolSize, poolSize},
                []int{poolSize, poolSize},
                "same",
        }
        m.AddLayer(l)
        return l
}

// MaxPooling2D creates a new max pooling layer on the specified model.
// Padding can be either "same" or "valid".
func MaxPooling2D(m *Model, name string, poolSize, stride []int, padding string) *MaxPooling2DLayer {
        l := &MaxPooling2DLayer{
                LayerBase{m.Graph, name, "maxpool2d", false, nil, nil},
                poolSize,
                stride,
                padding,
        }
        m.AddLayer(l)
        return l
}

// Attach attaches the MaxPooling2DLayer to the given node.
func (l *MaxPooling2DLayer) Attach(x *G.Node) (*G.Node, error) {
        if err := validateShape(x.Shape(), valNDims(4)); err != nil {
                return nil, err
        }
        pad := []int{0, 0} // padding=valid
        if l.Padding == "same" {
                padH := calculateSamePadding(x.Shape()[2], l.PoolSize[0], l.Stride[0])
                padW := calculateSamePadding(x.Shape()[3], l.PoolSize[1], l.Stride[1])
                pad = append(padH, padW...)
        }
        on, err := G.MaxPool2D(x, T.Shape(l.PoolSize), pad, l.Stride)
        l.OutputNode = on
        if on != nil {
                G.WithName(l.Name() + ".maxpool")(on)
        }
        l.InputNodes = []*G.Node{x}
        return on, err
}

// MustAttach attaches the MaxPooling2DLayer to the given node.
func (l *MaxPooling2DLayer) MustAttach(n *G.Node) *G.Node { return mustAttach(l, n) }

// Parameters returns a map of the parameters of the layer.
func (l *MaxPooling2DLayer) Parameters() map[string]*G.Node { return map[string]*G.Node{} }

// This function calculates the padding for "same".
// I borrowed the calculations from here: https://www.pico.net/kb/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-tensorflow/
func calculateSamePadding(width, filterSize, stride int) []int {
        outWidth := int(math.Ceil(float64(width) / float64(stride)))
        padAlongWidth := int(math.Max(float64((outWidth-1)*stride+filterSize-width), 0))
        padLeft := padAlongWidth / 2
        padRight := padAlongWidth - padLeft
        return []int{padLeft, padRight}
}

package goras

import (
        G "gorgonia.org/gorgonia"
        T "gorgonia.org/tensor"
)

// ReshapeLayer is a reshape layer.
//   - Input Shape: any shape
//   - Output Shape: the specified shape [as long as both shapes have the same volume]
type ReshapeLayer struct {
        LayerBase
        ToShape T.Shape
}

// Reshape creates a new ReshapeLayer on the Model with the given target shape.
func Reshape(model *Model, name string, newShape T.Shape) *ReshapeLayer {
        l := &ReshapeLayer{
                LayerBase: LayerBase{model.Graph, name, "reshape", false, nil, nil},
                ToShape:   newShape,
        }
        model.AddLayer(l)
        return l
}

// Attach attaches the ReshapeLayer to the given node.
func (l *ReshapeLayer) Attach(n *G.Node) (*G.Node, error) {
        if err := validateShape(n.Shape(), valMatchingVolume(l.ToShape)); err != nil {
                return nil, err
        }
        on, err := G.Reshape(n, l.ToShape)
        l.OutputNode = on
        if on != nil {
                G.WithName(l.Name() + ".reshape")(on)
        }
        l.InputNodes = []*G.Node{n}
        return on, err
}

// MustAttach attaches the ReshapeLayer to the given node. It panics on error.
func (l *ReshapeLayer) MustAttach(n *G.Node) *G.Node { return mustAttach(l, n) }

// Parameters returns a map of the parameters of the layer.
func (l *ReshapeLayer) Parameters() map[string]*G.Node {
        return make(map[string]*G.Node)
}

package goras

import G "gorgonia.org/gorgonia"

// BCE creates the nodes to calculate binary crossentropy loss between a predicted and target node.
// It should be used when using Model.Build().
func BCELoss(targetName string, output *G.Node) LossFunc {
        return func() (*G.Node, map[string]*G.Node, error) {
                target := G.NewMatrix(output.Graph(), output.Dtype(), G.WithShape(output.Shape()...), G.WithName(targetName))
                x1, err := G.Log(output)
                if err != nil {
                        return nil, nil, err
                }
                x2, err := G.Sub(G.NewConstant(1.0, G.WithName(targetName+".const1a")), output)
                if err != nil {
                        return nil, nil, err
                }
                x2, err = G.Log(x2)
                if err != nil {
                        return nil, nil, err
                }
                x1, err = G.HadamardProd(target, x1)
                if err != nil {
                        return nil, nil, err
                }
                x3, err := G.Sub(G.NewConstant(1.0, G.WithName(targetName+".const1b")), target)
                if err != nil {
                        return nil, nil, err
                }
                x2, err = G.HadamardProd(x3, x2)
                if err != nil {
                        return nil, nil, err
                }
                x, err := G.Add(x1, x2)
                if err != nil {
                        return nil, nil, err
                }
                x, err = G.Mean(x)
                if err != nil {
                        return nil, nil, err
                }
                x, err = G.Neg(x)
                if err != nil {
                        return nil, nil, err
                }
                return x, map[string]*G.Node{targetName: target}, nil
        }
}

package goras

import (
        "fmt"

        G "gorgonia.org/gorgonia"
)

func CCELoss(targetName string, output *G.Node) LossFunc {
        return func() (*G.Node, map[string]*G.Node, error) {
                target := G.NewMatrix(output.Graph(), output.Dtype(), G.WithShape(output.Shape()...), G.WithName(targetName))
                x, err := G.Log(output)
                if err != nil {
                        return nil, nil, fmt.Errorf("CCE error while performing Log op: %v", err)
                }
                x, err = G.HadamardProd(target, x)
                if err != nil {
                        return nil, nil, fmt.Errorf("CCE error while performing HardmanProd op: %v", err)
                }
                x, err = G.Sum(x, 1)
                if err != nil {
                        return nil, nil, fmt.Errorf("CCE error while performing Sum op: %v", err)
                }
                x, err = G.Mean(x)
                if err != nil {
                        return nil, nil, fmt.Errorf("CCE error while performing Mean op: %v", err)
                }
                x, err = G.Neg(x)
                if err != nil {
                        return nil, nil, fmt.Errorf("CCE error while performing Neg op: %v", err)
                }
                return x, map[string]*G.Node{targetName: target}, nil
        }
}

package goras

import (
        "fmt"

        G "gorgonia.org/gorgonia"
)

func L2Loss(layers ...Layer) LossFunc {
        return func() (*G.Node, map[string]*G.Node, error) {
                if len(layers) == 0 {
                        return nil, nil, fmt.Errorf("no layers provided to L2Loss")
                }
                // get a list of all trainable parameters
                var params []*G.Node
                for _, layer := range layers {
                        for _, param := range layer.Parameters() {
                                params = append(params, param)
                        }
                }
                var sumNodes []*G.Node
                for _, param := range params {
                        x, err := G.Square(param)
                        if err != nil {
                                return nil, nil, err
                        }
                        x, err = G.Sum(x, allAxes(param.Shape())...)
                        if err != nil {
                                return nil, nil, err
                        }
                        sumNodes = append(sumNodes, x)
                }
                total := sumNodes[0]
                for _, node := range sumNodes[1:] {
                        var err error
                        total, err = G.Add(total, node)
                        if err != nil {
                                return nil, nil, err
                        }
                }
                return total, map[string]*G.Node{}, nil
        }
}

package goras

import G "gorgonia.org/gorgonia"

// MSE creates the nodes to calculate mean squared error loss between a predicted and target node.
// It should be used when using Model.Build().
func MSELoss(targetName string, output *G.Node) LossFunc {
        return func() (*G.Node, map[string]*G.Node, error) {
                target := G.NewMatrix(output.Graph(), output.Dtype(), G.WithShape(output.Shape()...), G.WithName(targetName))
                x, err := G.Sub(output, target)
                if err != nil {
                        return nil, nil, err
                }
                x, err = G.Square(x)
                if err != nil {
                        return nil, nil, err
                }
                x, err = G.Mean(x)
                if err != nil {
                        return nil, nil, err
                }
                return x, map[string]*G.Node{targetName: target}, nil
        }
}

package goras

import (
        "fmt"

        G "gorgonia.org/gorgonia"
)

// KNOWN BUG: I'm pretty certain this will not work if the graph is using float32s, because all the weights are float64
func WeightedAdditiveLoss(losses []LossFunc, weights []float64) LossFunc {
        return func() (*G.Node, map[string]*G.Node, error) {
                if len(losses) != len(weights) {
                        return nil, nil, fmt.Errorf("number of losses and weights must match")
                }
                lossNodes := []*G.Node{}
                allLossInps := map[string]*G.Node{}
                for _, loss := range losses {
                        lossNode, lossInp, err := loss()
                        if err != nil {
                                return nil, nil, err
                        }
                        lossNodes = append(lossNodes, lossNode)
                        for k, v := range lossInp {
                                if _, ok := allLossInps[k]; ok {
                                        return nil, nil, fmt.Errorf("loss with name %s already exists", k)
                                }
                                allLossInps[k] = v
                        }
                }
                var total *G.Node
                for i, lossNode := range lossNodes {
                        // BUG: the name here is not unique
                        scaleNode := G.NewConstant(weights[i], G.WithName(fmt.Sprintf("weightedadditiveloss.weight%d", i)))
                        x, err := G.Mul(lossNode, scaleNode)
                        if err != nil {
                                return nil, nil, err
                        }
                        if total == nil {
                                total = x
                        } else {
                                total, err = G.Add(total, x)
                                if err != nil {
                                        return nil, nil, err
                                }
                        }
                }
                return total, allLossInps, nil
        }
}

package goras

import (
        "encoding/gob"
        "fmt"
        "io"
        "strings"

        G "gorgonia.org/gorgonia"
        T "gorgonia.org/tensor"
)

// Model is the core primitive of goras.
// It is effectively a wrapper around a Gorgonia graph, with extra functionality.
type Model struct {
        Graph             *G.ExprGraph
        Layers            []Layer
        Machine           G.VM
        InputNodes        map[string]*G.Node
        OutputNodes       map[string]*G.Node
        OutputValues      map[string]*G.Value // This is deliberately a ref because i think maps are scary
        LossValue         G.Value
        LossRequiredNodes map[string]*G.Node
}

// NewModel creates a new model with no layers
func NewModel() *Model {
        return &Model{Graph: G.NewGraph(), Layers: []Layer{}}
}

// AddLayer adds a layer to the model. You usually don't need to call this directly, as the layer constructors do it for you.
func (m *Model) AddLayer(l Layer) {
        m.Layers = append(m.Layers, l)
}

type buildParams struct {
        inputNodes  map[string]*G.Node
        outputNodes map[string]*G.Node
        loss        LossFunc
}

// BuildOpts are options for the Build method.
type BuildOpts func(*buildParams)

// WithInput adds an input node to the model.
//   - inputName: The name we will use to pass tensors to this node. This must be unique, and will be used later in fit and predict methods.
//   - inputNode: The node to use as the input. This is usually from a goras.Input layer.
func WithInput(inputName string, inputNode *G.Node) BuildOpts {
        return func(b *buildParams) { b.inputNodes[inputName] = inputNode }
}

// WithOutput adds an output node to the model.
//   - outputName: The name we will use to get tensors from this node. This must be unique, and will be used later in fit and predict methods.
//   - outputNode: The node to use as the output.
func WithOutput(name string, outputNode *G.Node) BuildOpts {
        return func(b *buildParams) { b.outputNodes[name] = outputNode }
}

// WithLoss specifies the loss function for the model.
func WithLoss(loss LossFunc) BuildOpts {
        return func(b *buildParams) { b.loss = loss }
}

// Build builds the model, using a specified input and output node.
// It adds the loss function to the graph, and creates the machine.
// This should only be called once per model.
func (m *Model) Build(opts ...BuildOpts) error {
        buildParams := &buildParams{
                inputNodes:  make(map[string]*G.Node),
                outputNodes: make(map[string]*G.Node),
        }
        for _, opt := range opts {
                opt(buildParams)
        }
        if len(buildParams.inputNodes) == 0 || len(buildParams.outputNodes) == 0 {
                return fmt.Errorf("must at least have one input and output node")
        }
        if buildParams.loss == nil {
                return fmt.Errorf("loss must be specified")
        }

        // Store input and output nodes
        m.InputNodes = buildParams.inputNodes
        m.OutputNodes = buildParams.outputNodes
        // Read the outputs to values
        m.OutputValues = make(map[string]*G.Value, len(m.OutputNodes))
        for name := range m.OutputNodes {
                var val G.Value
                G.Read(m.OutputNodes[name], &val)
                m.OutputValues[name] = &val
        }
        // Define loss function
        lossNode, lossRequiredNodes, err := buildParams.loss()
        if err != nil {
                return fmt.Errorf("error while adding loss: %v", err)
        }
        G.Read(lossNode, &m.LossValue)
        m.LossRequiredNodes = lossRequiredNodes
        trainables := m.Trainables()
        if len(trainables) != 0 {
                _, err = G.Grad(lossNode, trainables...)
                if err != nil {
                        return fmt.Errorf("error while computing grad: %v", err)
                }
        }

        // Check for duplicate node names
        nodeNames := make(map[string]bool)
        for _, n := range m.Graph.AllNodes() {
                if _, ok := nodeNames[n.Name()]; ok {
                        return fmt.Errorf("duplicate node name %s, either there are two layers with the same name, or this is a bug (please report)", n.Name())
                }
                nodeNames[n.Name()] = true
        }

        // Check for duplicate layer names
        layerNames := make(map[string]bool)
        for _, l := range m.Layers {
                if _, ok := layerNames[l.Name()]; ok {
                        return fmt.Errorf("duplicate layer name %s, either there are two layers with the same name, or this is a bug (please report)", l.Name())
                }
                layerNames[l.Name()] = true
        }

        // Create machine
        m.Machine = G.NewTapeMachine(m.Graph, G.BindDualValues(m.Trainables()...))
        return nil
}

// MustBuild calls Build, but panics if there is an error.
func (m *Model) MustBuild(opts ...BuildOpts) {
        err := m.Build(opts...)
        if err != nil {
                panic(err)
        }
}

// Trainables returns a list of all the trainable nodes in the model.
func (m *Model) Trainables() G.Nodes {
        var ret G.Nodes
        for _, l := range m.Layers {
                if l.Trainable() {
                        for _, t := range l.Parameters() {
                                ret = append(ret, t)
                        }
                }
        }
        return ret
}

// valueToTensor converts a G.Value to a tensor.
// The tensor shares the same underlying data as the value, so changing the returned tensor will change the value.
func valueToTensor(v G.Value) *T.Dense {
        return T.New(T.WithShape(v.Shape()...), T.WithBacking(v.Data()))
}

// GetParams returns a map of all the parameters in the model.
// The keys are the layer name and parameter name, separated by a colon (e.g. "model_1:weights")
func (m *Model) GetParams() map[string]*T.Dense {
        ret := make(map[string]*T.Dense)
        for _, l := range m.Layers {
                for k, v := range l.Parameters() {
                        ret[l.Name()+":"+k] = valueToTensor(v.Value())
                }
        }
        return ret
}

// SetParams sets the parameters in the model, which can be retrieved with Model.GetParams.
// It will only load parameters with matching names, and will ignore any others.
// This means you can load parameters from a model with a different architecture, as long as the names match on equivalent layers.
func (m *Model) SetParams(params map[string]*T.Dense) error {
        for _, l := range m.Layers {
                for k, v := range l.Parameters() {
                        if p, ok := params[l.Name()+":"+k]; ok {
                                if err := G.Let(v, p); err != nil {
                                        return fmt.Errorf("error setting parameter %s: %s", l.Name()+":"+k, err)
                                }
                        }
                }
        }
        return nil
}

// MustSetParams calls SetParams, but panics if there is an error.
func (m *Model) MustSetParams(params map[string]*T.Dense) {
        err := m.SetParams(params)
        if err != nil {
                panic(err)
        }
}

// WriteParams writes the parameters in gob format to an io.Writer.
// The params are retrieved with Model.GetParams.
func (m *Model) WriteParams(w io.Writer) error {
        params := m.GetParams()
        enc := gob.NewEncoder(w)
        return enc.Encode(params)
}

// MustWriteParams calls WriteParams, but panics if there is an error.
func (m *Model) MustWriteParams(w io.Writer) {
        err := m.WriteParams(w)
        if err != nil {
                panic(err)
        }
}

// ReadParams reads the parameters in gob format from an io.Reader.
// The params are retrieved with Model.GetParams.
func (m *Model) ReadParams(r io.Reader) error {
        var params map[string]*T.Dense
        dec := gob.NewDecoder(r)
        if err := dec.Decode(&params); err != nil {
                return err
        }
        return m.SetParams(params)
}

// MustReadParams calls ReadParams, but panics if there is an error.
func (m *Model) MustReadParams(r io.Reader) {
        err := m.ReadParams(r)
        if err != nil {
                panic(err)
        }
}

// BindParamsFrom binds the parameters in the model m1 to the parameters in this model m, meaning layers with the same name will share the same tensors.
// This is a bit of a hack to allow two models to train the same weights.
// This can be called multiple times, where later binds may override earlier ones.
// For example, if you are making an autoencoder, you would have one main model for training, and an encoder model and decoder model which are bound to that.
// That then allows you to run partial bits of the network.
func (m *Model) BindParamsFrom(m1 *Model) error {
        paramsSrc := m1.GetParams()
        for _, l := range m.Layers {
                for k, v := range l.Parameters() {
                        if p, ok := paramsSrc[l.Name()+":"+k]; ok {
                                if err := G.Let(v, p); err != nil {
                                        return fmt.Errorf("error binding parameter %s: %s", l.Name()+":"+k, err)
                                }
                        }
                }
        }
        return nil
}

// MustBindParamsFrom calls BindParamsFrom, but panics if there is an error.
func (m *Model) MustBindParamsFrom(m1 *Model) {
        err := m.BindParamsFrom(m1)
        if err != nil {
                panic(err)
        }
}

// CopyParamsFrom copys the parameters in the model m1 to the parameters in this model m, meaning layers with the same name will share the same values in their tensors.
// The tensors will be copies of each other, so changing one will not change the other.
// If you want to share the tensors, use BindParamsFrom instead.
func (m *Model) CopyParamsFrom(m1 *Model) error {
        paramsSrc := m1.GetParams()
        for _, l := range m.Layers {
                for k, v := range l.Parameters() {
                        if p, ok := paramsSrc[l.Name()+":"+k]; ok {
                                pCopy := p.Clone().(*T.Dense)
                                if err := G.Let(v, pCopy); err != nil {
                                        return fmt.Errorf("error copying parameter %s: %s", l.Name()+":"+k, err)
                                }
                        }
                }
        }
        return nil
}

// MustCopyParamsFrom calls CopyParamsFrom, but panics if there is an error.
func (m *Model) MustCopyParamsFrom(m1 *Model) {
        err := m.CopyParamsFrom(m1)
        if err != nil {
                panic(err)
        }
}

// PredictBatch runs the model on a batch of input data. The batch size must match the input node shape.
func (m *Model) PredictBatch(inputs map[string]T.Tensor) (map[string]T.Tensor, error) {
        if err := checkBatchedInputShapes(m, inputs); err != nil {
                return nil, err
        }
        m.Machine.Reset()
        for name := range inputs {
                if err := G.Let(m.InputNodes[name], inputs[name]); err != nil {
                        return nil, err
                }
        }
        // Set every loss required node to a tensor of the correct shape
        for _, n := range m.LossRequiredNodes {
                if err := G.Let(n, T.New(T.WithShape(n.Shape()...), T.Of(n.Dtype()))); err != nil {
                        return nil, err
                }
        }
        // Run the machine
        if err := m.Machine.RunAll(); err != nil {
                return nil, err
        }
        // We need to clone here otherwise the next time the machine is run, the tensor will be changed
        outputTensors := make(map[string]T.Tensor, len(m.OutputNodes))
        for name := range m.OutputValues {
                outputTensors[name] = T.New(
                        T.WithShape((*m.OutputValues[name]).Shape()...),
                        T.WithBacking((*m.OutputValues[name]).Data()),
                ).Clone().(*T.Dense)
        }
        return outputTensors, nil
}

// MustPredictBatch calls PredictBatch, but panics if there is an error.
func (m *Model) MustPredictBatch(inputs map[string]T.Tensor) map[string]T.Tensor {
        ys, err := m.PredictBatch(inputs)
        if err != nil {
                panic(err)
        }
        return ys
}

// FitBatch runs the model on a batch of input data, and then trains the model on the target data.
// The solver used is passed in as an argument.
// IMPORTANT NOTE: Currently, when the data is batched, the last batch of data will be discarded if the x size does not evenly divide the batch size.
func (m *Model) FitBatch(inputs, lossRequirements map[string]T.Tensor, solver G.Solver) (float64, error) {
        if err := checkBatchedInputShapes(m, inputs); err != nil {
                return 0, err
        }
        if err := checkBatchedLossRequirementShapes(m, lossRequirements); err != nil {
                return 0, err
        }
        m.Machine.Reset()
        for name := range inputs {
                if err := G.Let(m.InputNodes[name], inputs[name]); err != nil {
                        return 0, err
                }
        }
        for name := range lossRequirements {
                if err := G.Let(m.LossRequiredNodes[name], lossRequirements[name]); err != nil {
                        return 0, err
                }
        }
        if err := m.Machine.RunAll(); err != nil {
                return 0, err
        }
        if err := solver.Step(G.NodesToValueGrads(m.Trainables())); err != nil {
                return 0, err
        }
        loss := 0.0
        switch m.LossValue.Dtype() {
        case T.Float64:
                loss = m.LossValue.Data().(float64)
        case T.Float32:
                loss = float64(m.LossValue.Data().(float32))
        default:
                return 0, fmt.Errorf("unsupported loss dtype %v, please use either float64 or float32", m.LossValue.Dtype())
        }
        return loss, nil
}

// MustFitBatch calls FitBatch, but panics if there is an error.
func (m *Model) MustFitBatch(inputs, lossRequirements map[string]T.Tensor, solver G.Solver) float64 {
        loss, err := m.FitBatch(inputs, lossRequirements, solver)
        if err != nil {
                panic(err)
        }
        return loss
}

// FitOpts are options for the Fit method.
type FitOpt func(*fitParams)

type fitParams struct {
        Epochs            int
        LogEvery          int
        Verbose           bool
        ClearLine         bool
        EpochEndCallbakcs []EpochCallback
}

// WithEpochs sets the number of epochs to train for.
func WithEpochs(epochs int) FitOpt { return func(p *fitParams) { p.Epochs = epochs } }

// WithLoggingEvery sets how often to log the loss.
func WithLoggingEvery(epochs int) FitOpt { return func(p *fitParams) { p.LogEvery = epochs } }

// WithVerbose sets whether to log the loss.
func WithVerbose(verbose bool) FitOpt { return func(p *fitParams) { p.Verbose = verbose } }

// WithClearLine sets whether to clear the line when logging the loss.
func WithClearLine(clear bool) FitOpt { return func(p *fitParams) { p.ClearLine = clear } }

// WithEpochCallback adds a callback to be called at the end of each epoch.
func WithEpochCallback(cb EpochCallback) FitOpt {
        return func(p *fitParams) { p.EpochEndCallbakcs = append(p.EpochEndCallbakcs, cb) }
}

// Fit fits the model to the given data.
func (m *Model) Fit(xs, ys map[string]T.Tensor, solver G.Solver, opts ...FitOpt) error {
        return m.FitGenerator(NewTTDG(xs, ys), solver, opts...)
}

// MustFit calls Fit, but panics if there is an error.
func (m *Model) MustFit(xs, ys map[string]T.Tensor, solver G.Solver, opts ...FitOpt) {
        err := m.Fit(xs, ys, solver, opts...)
        if err != nil {
                panic(err)
        }
}

// FitGenerator fits the model to the given data generator.
func (m *Model) FitGenerator(tdg TrainingDataGenerator, solver G.Solver, opts ...FitOpt) error {
        params := &fitParams{
                Epochs:            1,
                LogEvery:          1,
                Verbose:           true,
                ClearLine:         false,
                EpochEndCallbakcs: []EpochCallback{},
        }
        for _, o := range opts {
                o(params)
        }
        batchSize := m.getCurrentBatchSize()
        for epoch := 1; epoch <= params.Epochs; epoch++ {
                tdg.Reset(batchSize)
                numBatches := tdg.NumBatches()
                isLoggingEpoch := ((epoch%params.LogEvery == 0) || (epoch == params.Epochs) || (epoch == 1))
                logEveryBatch := numBatches / 100
                if logEveryBatch == 0 {
                        logEveryBatch = 1
                }
                loss := 0.0
                currentBatches := 0.0
                bi := 0
                for {
                        xBatch, yBatch, err := tdg.NextBatch()
                        if err != nil {
                                return err
                        }
                        if xBatch == nil || yBatch == nil {
                                break
                        }
                        batchLoss, err := m.FitBatch(xBatch, yBatch, solver)
                        if err != nil {
                                return err
                        }
                        loss += batchLoss
                        currentBatches++
                        if params.Verbose && isLoggingEpoch && bi%logEveryBatch == 0 {
                                bar := strings.Repeat("=", int(currentBatches/float64(numBatches)*39))
                                bar += ">"
                                fmt.Printf("\rEpoch %d/%d - Loss: %f |%-40v|", epoch, params.Epochs, loss/currentBatches, bar)
                        }
                        bi++
                }
                if params.Verbose && isLoggingEpoch {
                        lineEnd := "\n"
                        if params.ClearLine {
                                lineEnd = "\r"
                        }
                        fmt.Printf("\rEpoch %d/%d - Loss: %f |Done| %40v%v", epoch, params.Epochs, loss/currentBatches, "", lineEnd)
                }
                for _, cb := range params.EpochEndCallbakcs {
                        if err := cb(epoch, loss/currentBatches); err != nil {
                                return err
                        }
                }
        }
        if params.Verbose {
                fmt.Println()
        }
        return nil
}

// MustFitGenerator calls FitGenerator, but panics if there is an error.
func (m *Model) MustFitGenerator(tdg TrainingDataGenerator, solver G.Solver, opts ...FitOpt) {
        err := m.FitGenerator(tdg, solver, opts...)
        if err != nil {
                panic(err)
        }
}

// Predict returns the models outputs for the given inputs. It cuts the inputs into batches so the inputs can be of any length.
func (m *Model) Predict(xs map[string]T.Tensor) (map[string]T.Tensor, error) {
        xBatchess, numPads, err := batchMultipleTensors(xs, m.getCurrentBatchSize(), true)
        if err != nil {
                return nil, err
        }
        yBatchess := make([]map[string]T.Tensor, len(xBatchess))
        for bi := range xBatchess {
                yBatches, err := m.PredictBatch(xBatchess[bi])
                if err != nil {
                        return nil, err
                }
                // Remove padding
                if bi == len(xBatchess)-1 {
                        for name := range yBatches {
                                yBatches[name], err = sliceBatch(yBatches[name], T.S(0, yBatches[name].Shape()[0]-numPads))
                                if err != nil {
                                        return nil, err
                                }
                        }
                }
                yBatchess[bi] = yBatches
        }
        // Concatenate the batches back together
        ys := make(map[string]T.Tensor, 0)
        for name := range yBatchess[0] {
                batchesForOutput := make([]T.Tensor, 0)
                for batch := range yBatchess {
                        batchesForOutput = append(batchesForOutput, yBatchess[batch][name])
                }
                y, err := T.Concat(0, batchesForOutput[0], batchesForOutput[1:]...)
                if err != nil {
                        return nil, err
                }
                ys[name] = y

        }
        return ys, nil
}

// MustPredict calls Predict, but panics if there is an error.
func (m *Model) MustPredict(xs map[string]T.Tensor) map[string]T.Tensor {
        ys, err := m.Predict(xs)
        if err != nil {
                panic(err)
        }
        return ys
}

func (m *Model) getCurrentBatchSize() int {
        for _, n := range m.InputNodes {
                return n.Shape()[0]
        }
        panic("this shouldn't be possible to reach, do you have no input nodes for some reason?")
}

// Creates a list of batches from the data. The data is a slice of tensors, representing multiple inputs.
// If zeroPadding is true, the last batch will be padded with zeros if it is smaller than the batch size.
// If zeroPadding is false, the last batch will be discarded if it is smaller than the batch size.
// Takes input [input_num]Tensor and returns [batch][input_num]Tensor
func batchMultipleTensors(inputs map[string]T.Tensor, batchSize int, zeroPad bool) ([]map[string]T.Tensor, int, error) {
        numRows := -1
        for _, input := range inputs {
                if numRows == -1 {
                        numRows = input.Shape()[0]
                } else if numRows != input.Shape()[0] {
                        return nil, 0, fmt.Errorf("all inputs must have the same number of rows")
                }
        }
        remainder := numRows % batchSize
        numNeededBatch := batchSize - remainder
        if remainder == 0 {
                numNeededBatch = 0
        }
        // We need to copy so we dont modify the inputs array. This does not do tensor copying, just the slice
        paddedInputs := make(map[string]T.Tensor, len(inputs))
        copyMap(paddedInputs, inputs)
        // If we have a number of inputs that does not perfectly fit, either pad or cut off the remainder
        if remainder != 0 {
                if zeroPad {
                        // Pad the inputs so the remainder is part of a batch
                        for name := range paddedInputs {
                                paddingShape := append([]int{numNeededBatch}, paddedInputs[name].Shape()[1:]...)
                                padding := T.New(T.WithShape(paddingShape...), T.Of(paddedInputs[name].Dtype()))
                                var err error
                                paddedInputs[name], err = T.Concat(0, paddedInputs[name], padding)
                                if err != nil {
                                        return nil, 0, err
                                }
                        }
                } else {
                        // Cut off the remainder
                        for inputI := range paddedInputs {
                                var err error
                                paddedInputs[inputI], err = sliceBatch(paddedInputs[inputI], T.S(0, numRows-remainder))
                                if err != nil {
                                        return nil, 0, err
                                }
                        }
                }
        }
        var batchedInputs []map[string]T.Tensor
        numPaddedRows := -1
        for _, input := range paddedInputs {
                numPaddedRows = input.Shape()[0]
                break
        }
        numBatches := numPaddedRows / batchSize
        for batchI := 0; batchI < numBatches; batchI += 1 {
                batch := map[string]T.Tensor{}
                for inputName, input := range paddedInputs {
                        batchStart := batchI * batchSize
                        slice, err := sliceBatch(input, T.S(batchStart, batchStart+batchSize))
                        if err != nil {
                                panic(err) // TODO - handle this error
                        }
                        batch[inputName] = slice
                }
                batchedInputs = append(batchedInputs, batch)
        }
        return batchedInputs, numNeededBatch, nil
}

// This performs a slice on the first dimension but guarantees that the output will have same ndims as input
func sliceBatch(t T.Tensor, slice T.Slice) (T.Tensor, error) {
        origShape := t.Shape()
        st, err := t.Slice(slice)
        if err != nil {
                return nil, err
        }
        if len(st.Shape()) != len(origShape) {
                newShape := origShape
                newShape[0] = 1
                err = st.Reshape(newShape...)
                if err != nil {
                        return nil, err
                }
        }
        return st, nil
}

// Summary returns a string summarising the model.
func (m *Model) Summary() string {
        s := ""
        s += "================== Inputs ===================\n"
        for name, node := range m.InputNodes {
                s += fmt.Sprintf("Input       %-20v          Shape: %-20v\n", name, fmt.Sprint(node.Shape()))
        }
        s += "================== Outputs ==================\n"
        for name, node := range m.OutputNodes {
                s += fmt.Sprintf("Output      %-20v          Shape: %-20v\n", name, fmt.Sprint(node.Shape()))
        }
        s += "================= Loss Reqs =================\n"
        for name, node := range m.LossRequiredNodes {
                s += fmt.Sprintf("Loss Req    %-20v          Shape: %-20v\n", name, fmt.Sprint(node.Shape()))
        }
        totalParams := 0
        s += "============= Registered Layers =============\n"
        for li := range m.Layers {
                reqs := make([]string, 0)
                for _, r := range m.Layers[li].INodes() {
                        reqs = append(reqs, r.Name())
                }
                numParams := 0
                for _, p := range m.Layers[li].Parameters() {
                        numParams += p.DataSize()
                }
                totalParams += numParams
                s += fmt.Sprintf("Layer %-3v %9v::%-21vShape: %-20v From: %-20v Num Params %v\n",
                        li, m.Layers[li].Name(), m.Layers[li].Type(),
                        fmt.Sprint(m.Layers[li].Node().Shape()),
                        reqs, numParams)
        }
        s += "=================== Stats ===================\n"
        s += fmt.Sprintf("Total number of parameters: %v\n", totalParams)
        return s
}

package goras

import "fmt"

// NewNamer creates a new Namer with the given base name.
func NewNamer(baseName string) func() string {
        counter := 0
        return func() string {
                counter++
                return fmt.Sprintf("%s_%d", baseName, counter)
        }
}

package goras

// WARNING - I think this should probably be in gorgonia, but for now it will live here.

import (
        "fmt"
        "hash"

        "github.com/chewxy/hm"
        "gorgonia.org/gorgonia"
        "gorgonia.org/tensor"
)

var _ gorgonia.Op = &oneHotOp{}
var _ gorgonia.SDOp = &oneHotOp{}

type oneHotOp struct {
        numClasses int
        dType      tensor.Dtype
}

// DiffWRT implements gorgonia.SDOp.
func (*oneHotOp) DiffWRT(inputs int) []bool {
        // I'm pretty sure you cant, nor would ever want to, take the derivative of this op.
        return make([]bool, inputs)
}

// SymDiff implements gorgonia.SDOp.
func (*oneHotOp) SymDiff(inputs gorgonia.Nodes, output *gorgonia.Node, grad *gorgonia.Node) (retVal gorgonia.Nodes, err error) {
        panic("unimplemented (tho tbf this should never be called)")
}

// Arity implements gorgonia.Op.
func (*oneHotOp) Arity() int {
        return 1 // we expect just a vector of indices
}

// CallsExtern implements gorgonia.Op.
func (*oneHotOp) CallsExtern() bool {
        return false
}

// Do implements gorgonia.Op.
func (op *oneHotOp) Do(inp ...gorgonia.Value) (gorgonia.Value, error) {
        batchSize := inp[0].Shape()[0]
        tens := tensor.New(tensor.WithShape(batchSize, op.numClasses), tensor.Of(op.dType))
        for i := 0; i < batchSize; i++ {
                index := inp[0].Data().([]int)[i]
                var err error
                switch op.dType {
                case tensor.Int:
                        err = tens.SetAt(int(1), i, index)
                case tensor.Float64:
                        err = tens.SetAt(float64(1), i, index)
                case tensor.Float32:
                        err = tens.SetAt(float32(1), i, index)
                case tensor.Bool:
                        err = tens.SetAt(true, i, index)
                }
                if err != nil {
                        return nil, err
                }
        }
        return tens, nil
}

// InferShape implements gorgonia.Op.
func (op *oneHotOp) InferShape(inputs ...gorgonia.DimSizer) (tensor.Shape, error) {
        s := inputs[0].(tensor.Shape).Clone()
        s = append(s, op.numClasses)
        return s, nil
}

// OverwritesInput implements gorgonia.Op.
func (*oneHotOp) OverwritesInput() int {
        return -1
}

// ReturnsPtr implements gorgonia.Op.
func (*oneHotOp) ReturnsPtr() bool {
        return false
}

// String implements gorgonia.Op.
func (*oneHotOp) String() string {
        return "OneHotOp"
}

// Type implements gorgonia.Op.
func (*oneHotOp) Type() hm.Type {
        ohTypeInput := gorgonia.TensorType{
                Dims: 1,
                Of:   tensor.Int,
        }
        ohTypeOutput := gorgonia.TensorType{
                Dims: 2,
                Of:   tensor.Float64,
        }
        return hm.NewFnType(ohTypeInput, ohTypeOutput)
}

// I dont actually know what this is for (i just copied this code from another op)
func (op *oneHotOp) WriteHash(h hash.Hash) { fmt.Fprintf(h, op.String()) }

// Hashcode implements gorgonia.Op.
func (*oneHotOp) Hashcode() uint32 {
        // I dont actually know what this is for
        panic("unimplementedb")
}

package goras

import (
        "fmt"

        T "gorgonia.org/tensor"
)

type shapeValidator func(T.Shape) error

func validateShape(shape T.Shape, vals ...shapeValidator) error {
        for _, val := range vals {
                if err := val(shape); err != nil {
                        return err
                }
        }
        return nil
}

func valNDims(n int) shapeValidator {
        return func(s T.Shape) error {
                if len(s) != n {
                        return fmt.Errorf("expected shape with ndims %v but got ndims %v with shape %v", len(s), n, s)
                }
                return nil
        }
}

func valNthDim(dim int, val int) shapeValidator {
        return func(s T.Shape) error {
                if s[dim] != val {
                        return fmt.Errorf("expected shape[%v] to be %v but got %v", dim, val, s[dim])
                }
                return nil
        }
}

func valMatchingDim(target T.Shape) shapeValidator {
        return func(s T.Shape) error {
                if !s.Eq(target) {
                        return fmt.Errorf("expected shape %v but got %v", target, s)
                }
                return nil
        }
}

func valMatchingVolume(target T.Shape) shapeValidator {
        return func(s T.Shape) error {
                if s.TotalSize() != target.TotalSize() {
                        return fmt.Errorf("shapes must have the same size: %v and %v", target, s)
                }
                return nil
        }
}

func valAtLeastNDims(n int) shapeValidator {
        return func(s T.Shape) error {
                if len(s) < n {
                        return fmt.Errorf("expected shape with at least %v dims but got %v", n, len(s))
                }
                return nil
        }
}

func checkBatchedInputShapes(m *Model, inps map[string]T.Tensor) error {
        if len(inps) != len(m.InputNodes) {
                return fmt.Errorf("incorrect number of inputs. expected %v but got %v", len(m.InputNodes), len(inps))
        }

        for name := range inps {
                if _, ok := m.InputNodes[name]; !ok {
                        return fmt.Errorf("input %v not found in model", name)
                }
                if !exactShapeEq(m.InputNodes[name].Shape(), inps[name].Shape()) {
                        return fmt.Errorf("input %v had incorrect shape. expected %v but got %v", name, m.InputNodes[name].Shape(), inps[name].Shape())
                }
        }
        return nil
}

func checkBatchedLossRequirementShapes(m *Model, outs map[string]T.Tensor) error {
        if len(outs) != len(m.LossRequiredNodes) {
                return fmt.Errorf("incorrect number of loss requirements. expected %v but got %v", len(m.LossRequiredNodes), len(outs))
        }

        for name := range outs {
                if _, ok := m.LossRequiredNodes[name]; !ok {
                        return fmt.Errorf("loss requirement %v not found in model", name)
                }
                if !exactShapeEq(m.LossRequiredNodes[name].Shape(), outs[name].Shape()) {
                        return fmt.Errorf("input %v had incorrect shape. expected %v but got %v", name, m.LossRequiredNodes[name].Shape(), outs[name].Shape())
                }
        }
        return nil
}

func exactShapeEq(a, b T.Shape) bool {
        if len(a) != len(b) {
                return false
        }
        for i := range a {
                if a[i] != b[i] {
                        return false
                }
        }
        return true
}

package goras

import (
        "reflect"

        "gorgonia.org/tensor"
        T "gorgonia.org/tensor"
)

type nilHelperType int

var nilType = T.Dtype{Type: reflect.TypeOf(nilHelperType(0))}

func copyMap[T comparable, U any](dst, src map[T]U) {
        for k, v := range src {
                dst[k] = v
        }
}

// NamedTs is a map of string to T.Tensor.
// It is just a convenience type to make code nicer to read.
type NamedTs map[string]T.Tensor

// Return a list of all axes of a tensor
func allAxes(shape tensor.Shape) []int {
        axes := make([]int, shape.Dims())
        for i := range axes {
                axes[i] = i
        }
        return axes
}

package goras

import (
        "image"
        "image/color"

        "golang.org/x/image/draw"
        T "gorgonia.org/tensor"
)

// ImageUtils is a struct that contains functions that are not core to goras, but are useful for image manipulation.
var ImageUtils imageUtils = imageUtils{}

type imageUtils struct{}

// ImagesToTensor converts a list of image.Image to a tensor, with all values between 0 and 1.
// Every image should have the same dimensions.
// If toGreyscale is true, only the r channel of the image will be used, and the tensor will have the shape (n, 1, x, y).
// If toGreyscale is false, the tensor will have the shape (n, 3, x, y).
func (imageUtils) ImagesToTensor(imgs []image.Image, toGreyscale bool) T.Tensor {
        xDim, yDim := imgs[0].Bounds().Size().X, imgs[0].Bounds().Size().Y
        data := []float64{}
        numChannels := 3
        if toGreyscale {
                numChannels = 1
        }
        for _, img := range imgs {
                for channel := 0; channel < numChannels; channel++ {
                        for x := 0; x < xDim; x++ {
                                for y := 0; y < yDim; y++ {
                                        r, g, b, _ := img.At(x, y).RGBA()
                                        r, g, b = r>>8, g>>8, b>>8
                                        v := 0.0
                                        switch channel {
                                        case 0:
                                                v = float64(r) / 255.0
                                        case 1:
                                                v = float64(g) / 255.0
                                        case 2:
                                                v = float64(b) / 255.0
                                        }
                                        data = append(data, (v))
                                }
                        }
                }
        }
        return T.New(T.WithShape(len(imgs), numChannels, xDim, yDim), T.WithBacking(data))
}

// TensorToImages converts a tensor with values from 0-1 to a list of image.Image.
// The tensor should have the shape (n, 3, x, y) if fromGreyscale is false, or (n, 1, x, y) if fromGreyscale is true.
func (imageUtils) TensorToImages(tens T.Tensor, fromGreyscale bool) []image.Image {
        xDim, yDim := tens.Shape()[2], tens.Shape()[3]
        imgs := make([]image.Image, tens.Shape()[0])
        numChannels := 3
        if fromGreyscale {
                numChannels = 1
        }
        for i := 0; i < tens.Shape()[0]; i++ {
                img := image.NewRGBA(image.Rect(0, 0, xDim, yDim))
                for channel := 0; channel < numChannels; channel++ {
                        for x := 0; x < xDim; x++ {
                                for y := 0; y < yDim; y++ {
                                        v, err := tens.At(i, channel, x, y)
                                        vi := v.(float64)
                                        if err != nil {
                                                panic(err)
                                        }
                                        if numChannels == 3 {
                                                r, g, b, _ := img.At(x, y).RGBA()
                                                r, g, b = r>>8, g>>8, b>>8
                                                r8, g8, b8 := uint8(r), uint8(g), uint8(b)
                                                switch channel {
                                                case 0:
                                                        img.Set(x, y, color.RGBA{uint8(vi * 255), g8, b8, 255})
                                                case 1:
                                                        img.Set(x, y, color.RGBA{r8, uint8(vi * 255), b8, 255})
                                                case 2:
                                                        img.Set(x, y, color.RGBA{r8, g8, uint8(vi * 255), 255})
                                                }
                                        } else {
                                                vInt := uint8(vi * 255)
                                                img.Set(x, y, color.RGBA{vInt, vInt, vInt, 255})
                                        }
                                }
                        }
                }
                imgs[i] = img
        }
        return imgs
}

// ResizeImage streches or sqeezes an image to a certain size. It uses the specified interpolation, which is one of "nearest_neighbor", "bilinear" or "approx_bilinear
func (imageUtils) ResizeImage(img image.Image, width, height int, interpolation string) image.Image {
        dst := image.NewRGBA(image.Rect(0, 0, width, height))
        var interpolator draw.Interpolator
        switch interpolation {
        case "nearest_neighbor":
                interpolator = draw.NearestNeighbor
        case "bilinear":
                interpolator = draw.BiLinear
        case "approx_bilinear":
                interpolator = draw.ApproxBiLinear
        default:
                panic("unrecognized interpolation method")
        }
        interpolator.Scale(dst, dst.Bounds(), img, img.Bounds(), draw.Over, nil)
        return dst
}

/*
// TransformImage rotates and/or scales an image. It uses the specified interpolation, which is one of "nearest_neighbor", "bilinear" or "approx_bilinear
func (imageUtils) TransformImage(img image.Image, rotationDegrees, scale float64, interpolation string) image.Image {
        panic("not implemented yet")
}*/

// The stuff in this file is just some stuff to make working with tensors easier.
// There is a pretty good chance that this stuff is already in gorgonia, but I couldn't find it after all 2 seconds of looking I did.
package goras

import (
        "fmt"

        "gorgonia.org/tensor"
)

// Make2DSliceTensor converts a 2D slice to a tensor. The slice is indexed[row][column].
func Make2DSliceTensor[T any](data [][]T) (tensor.Tensor, error) {
        if len(data) == 0 || len(data[0]) == 0 {
                return nil, fmt.Errorf("data slice must have at least one row and one column")
        }
        var eg T
        typ, err := GetTensorDataType(eg)
        if err != nil {
                return nil, err
        }
        t := tensor.New(tensor.WithShape(len(data), len(data[0])), tensor.Of(typ))
        for i, row := range data {
                if len(row) != len(data[0]) {
                        return nil, fmt.Errorf("data slice must have the same number of columns in each row")
                }
                for j, v := range row {
                        if err := t.SetAt(v, i, j); err != nil {
                                return nil, err
                        }
                }
        }
        return t, nil
}

// MustMake2DSliceTensor calls Make2DSliceTensor and panics if there is an error.
func MustMake2DSliceTensor[T any](data [][]T) tensor.Tensor {
        t, err := Make2DSliceTensor(data)
        if err != nil {
                panic(err)
        }
        return t
}

// Make1DSliceTensor converts a 1D slice to a tensor.
func Make1DSliceTensor[T any](data []T) (tensor.Tensor, error) {
        if len(data) == 0 {
                return nil, fmt.Errorf("data slice must have at least one element")
        }
        var eg T
        typ, err := GetTensorDataType(eg)
        if err != nil {
                return nil, err
        }
        t := tensor.New(tensor.WithShape(len(data)), tensor.Of(typ))
        for i, v := range data {
                if err := t.SetAt(v, i); err != nil {
                        return nil, err
                }
        }
        return t, nil
}

// MustMake1DSliceTensor calls Make1DSliceTensor and panics if there is an error.
func MustMake1DSliceTensor[T any](data []T) tensor.Tensor {
        t, err := Make1DSliceTensor(data)
        if err != nil {
                panic(err)
        }
        return t
}

func GetTensorDataType(t interface{}) (tensor.Dtype, error) {
        switch t.(type) {
        case int:
                return tensor.Int, nil
        case float64:
                return tensor.Float64, nil
        case float32:
                return tensor.Float32, nil
        case bool:
                return tensor.Bool, nil
        default:
                return tensor.Dtype{}, fmt.Errorf("unsupported type %T", t)
        }
}

// MustGetTensorDataType calls GetTensorDataType and panics if there is an error.
func MustGetTensorDataType(t interface{}) tensor.Dtype {
        typ, err := GetTensorDataType(t)
        if err != nil {
                panic(err)
        }
        return typ
}