Hot questions for Using Neural networks in swift

Question:

I'm trying to implement a really simple neural network with backpropagation. I trying to train the network with the AND logical operator. But the prediction it's not working for me fine. :(

    public class ActivationFunction {

        class func sigmoid(x: Float) -> Float {
            return 1.0 / (1.0 + exp(-x))
        }

        class func dSigmoid(x: Float) -> Float {
            return x * (1 - x)
        }
    }

    public class NeuralNetConstants {

        public static let learningRate: Float = 0.3
        public static let momentum: Float = 0.6
        public static let iterations: Int = 100000

    }

public class Layer {

    private var output: [Float]
    private var input: [Float]
    private var weights: [Float]
    private var dWeights: [Float]

    init(inputSize: Int, outputSize: Int) {
        self.output = [Float](repeating: 0, count: outputSize)
        self.input = [Float](repeating: 0, count: inputSize + 1)
        self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)
        self.dWeights = [Float](repeating: 0, count: weights.count)
    }

    public func run(inputArray: [Float]) -> [Float] {

        input =  inputArray
        input[input.count-1] = 1
        var offSet = 0

        for i in 0..<output.count {
            for j in 0..<input.count {
                output[i] += weights[offSet+j] * input[j]
            }

            output[i] = ActivationFunction.sigmoid(x: output[i])
            offSet += input.count

        }

        return output
    }

    public func train(error: [Float], learningRate: Float, momentum: Float) -> [Float] {

        var offset = 0
        var nextError = [Float](repeating: 0, count: input.count)

        for i in 0..<output.count {

            let delta = error[i] * ActivationFunction.dSigmoid(x: output[i])

            for j in 0..<input.count {
                let weightIndex = offset + j
                nextError[j] = nextError[j] + weights[weightIndex] * delta
                let dw = input[j] * delta * learningRate
                weights[weightIndex] += dWeights[weightIndex] * momentum + dw
                dWeights[weightIndex] = dw
            }

            offset += input.count
        }

        return nextError
    }

}

public class BackpropNeuralNetwork {

    private var layers: [Layer] = []

    public init(inputSize: Int, hiddenSize: Int, outputSize: Int) {
        self.layers.append(Layer(inputSize: inputSize, outputSize: hiddenSize))
        self.layers.append(Layer(inputSize: hiddenSize, outputSize: outputSize))
    }

    public func getLayer(index: Int) -> Layer {
        return layers[index]
    }

    public func run(input: [Float]) -> [Float] {

        var activations = input

        for i in 0..<layers.count {
            activations = layers[i].run(inputArray: activations)
        }

        return activations
    }

    public func train(input: [Float], targetOutput: [Float], learningRate: Float, momentum: Float) {

        let calculatedOutput = run(input: input)
        var error = [Float](repeating: 0, count: calculatedOutput.count)

        for i in 0..<error.count {
            error[i] = targetOutput[i] - calculatedOutput[i]
        }

        for i in (0...layers.count-1).reversed() {
            error = layers[i].train(error: error, learningRate: learningRate, momentum: momentum)
        }


    }


}

extension ClosedRange where Bound: FloatingPoint {
    public func random() -> Bound {
        let range = self.upperBound - self.lowerBound
        let randomValue = (Bound(arc4random_uniform(UINT32_MAX)) / Bound(UINT32_MAX)) * range + self.lowerBound
        return randomValue
    }
}

This is my training data I just want that my network learn the simple AND logical operator.

My input data:

let traningData: [[Float]] = [ [0,0], [0,1], [1,0], [1,1] ]

let traningResults: [[Float]] = [ [0], [0], [0], [1] ]

let backProb = BackpropNeuralNetwork(inputSize: 2, hiddenSize: 3, outputSize: 1)

for iterations in 0..<NeuralNetConstants.iterations {

    for i in 0..<traningResults.count {
        backProb.train(input: traningData[i], targetOutput: traningResults[i], learningRate: NeuralNetConstants.learningRate, momentum: NeuralNetConstants.momentum)
    }

    for i in 0..<traningResults.count {
        var t = traningData[i]
        print("\(t[0]), \(t[1])  -- \(backProb.run(input: t)[0])")
    }

}

This is my whole code for the neural network. The code is not really swifty but I think it's first more important to understand the theory about neural networks then the code will be more swifty.

The problem is that my results are completely wrong. This is what I get

0.0, 0.0  -- 0.246135
0.0, 1.0  -- 0.251307
1.0, 0.0  -- 0.24325
1.0, 1.0  -- 0.240923

This is what I want to get

0,0, 0,0 -- 0,000
0,0, 1,0 -- 0,005
1,0, 0,0 -- 0,005
1,0, 1,0 -- 0,992

Well for comparison the java implementation works fine..

public class ActivationFunction {

    public static float sigmoid(float x) {
        return (float) (1 / (1 + Math.exp(-x)));
    }

    public static float dSigmoid(float x) {
        return x*(1-x); // because the output is the sigmoid(x) !!! we dont have to apply it twice
    }
}

public class NeuralNetConstants {

    private NeuralNetConstants() {

    }

    public static final float LEARNING_RATE = 0.3f;
    public static final float MOMENTUM = 0.6f;
    public static final int ITERATIONS = 100000;
}

public class Layer {

    private float[] output;
    private float[] input;
    private float[] weights;
    private float[] dWeights;
    private Random random;

    public Layer(int inputSize, int outputSize) {
        output = new float[outputSize];
        input = new float[inputSize + 1];
        weights = new float[(1 + inputSize) * outputSize];
        dWeights = new float[weights.length];
        this.random = new Random();
        initWeights();
    }

    public void initWeights() {
        for (int i = 0; i < weights.length; i++) {
            weights[i] = (random.nextFloat() - 0.5f) * 4f;
        }
    }

    public float[] run(float[] inputArray) {

        System.arraycopy(inputArray, 0, input, 0, inputArray.length);
        input[input.length - 1] = 1; // bias
        int offset = 0;

        for (int i = 0; i < output.length; i++) {
            for (int j = 0; j < input.length; j++) {
                output[i] += weights[offset + j] * input[j];
            }
            output[i] = ActivationFunction.sigmoid(output[i]);
            offset += input.length;
        }

        return Arrays.copyOf(output, output.length);
    }

    public float[] train(float[] error, float learningRate, float momentum) {

        int offset = 0;
        float[] nextError = new float[input.length];

        for (int i = 0; i < output.length; i++) {

            float delta = error[i] * ActivationFunction.dSigmoid(output[i]); 
            for (int j = 0; j < input.length; j++) {
                int previousWeightIndex = offset + j;
                nextError[j] = nextError[j] + weights[previousWeightIndex] * delta;
                float dw = input[j] * delta * learningRate;
                weights[previousWeightIndex] += dWeights[previousWeightIndex] * momentum + dw;
                dWeights[previousWeightIndex] = dw;
            }

            offset += input.length;
        }

        return nextError;
    }
}

public class BackpropNeuralNetwork {

    private Layer[] layers;

    public BackpropNeuralNetwork(int inputSize, int hiddenSize, int outputSize) {
        layers = new Layer[2];
        layers[0] = new Layer(inputSize, hiddenSize);
        layers[1] = new Layer(hiddenSize, outputSize);
    }

    public Layer getLayer(int index) {
        return layers[index];
    }

    public float[] run(float[] input) {
        float[] inputActivation = input;
        for (int i = 0; i < layers.length; i++) {
            inputActivation = layers[i].run(inputActivation);
        }
        return inputActivation;
    }

    public void train(float[] input, float[] targetOutput, float learningRate, float momentum) {

        float[] calculatedOutput = run(input);
        float[] error = new float[calculatedOutput.length];

        for (int i = 0; i < error.length; i++) {
            error[i] = targetOutput[i] - calculatedOutput[i]; 
        }

        for (int i = layers.length - 1; i >= 0; i--) {
            error = layers[i].train(error, learningRate, momentum);
        }
    }
}

public class NeuralNetwork {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
                float[][] trainingData = new float[][] { 
                new float[] { 0, 0 }, 
                new float[] { 0, 1 }, 
                new float[] { 1, 0 },
                new float[] { 1, 1 } 
        };

        float[][] trainingResults = new float[][] {
                new float[] { 0 }, 
                new float[] { 0 }, 
                new float[] { 0 },
                new float[] { 1 } 
        };

        BackpropNeuralNetwork backpropagationNeuralNetworks = new BackpropNeuralNetwork(2, 3,1);

        for (int iterations = 0; iterations < NeuralNetConstants.ITERATIONS; iterations++) {

            for (int i = 0; i < trainingResults.length; i++) {
                backpropagationNeuralNetworks.train(trainingData[i], trainingResults[i],
                        NeuralNetConstants.LEARNING_RATE, NeuralNetConstants.MOMENTUM);
            }

            System.out.println();
            for (int i = 0; i < trainingResults.length; i++) {
                float[] t = trainingData[i];
                System.out.printf("%d epoch\n", iterations + 1);
                System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], backpropagationNeuralNetworks.run(t)[0]);
            }
        }
    }

}

Answer:

You are initializing your weights differently. You are creating one random value and use it often. What you want to do is to create a random value for each weight in the array: Replace

self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize)

with

self.weights = (0..<(1 + inputSize) * outputSize).map { _ in
  return (-2.0...2.0).random()
}

Beside that: please consider to only override the first elements of your input in the Layer.run method. So instead of

input =  inputArray

you should do this:

for (i, e) in inputArray {
  self.input[i] = e
}

Question:

I am trying to implement Daniel Shiffman's XOR Neural Network in swift, I have all the parts, but after training, the results are unexpected.

Part of me thinks it's the actual training system trying to learn multiple things at once.

I have linked my playground in case anyone can spot anything wrong: https://www.dropbox.com/s/9rv8ku3d62h03ip/Neural.playground.zip?dl=0

Daniels code:

https://github.com/shiffman/The-Nature-of-Code-Examples/blob/master/chp10_nn/xor/code/src/Network.java


Answer:

There are a couple of errors in your code. The first (and most important) is a subtlety in the way you're creating your networks.

Right now you're using

inputs = [Neuron](repeating: Neuron(), count:2+1)
hidden = [Neuron](repeating: Neuron(), count:4+1)

But this creates all the inputs with the same Neuron and also all the hidden with the same Neuron, so there are only 4 Neurons: 2 for input (the regular repeated 2 times and a bias neuron) and 2 for hidden (the regular repeated 4 times and 1 for bias).

You can solve it by simply using a for loop:

public class Network
{
    var inputs:[Neuron] = []
    var hidden:[Neuron] = []
    var output:Neuron!

    public init()
    {
        for _ in 1...2 {
            inputs.append(Neuron())
        }

        for _ in 1...4 {
            hidden.append(Neuron())
        }

        //print("inputs length: \(inputs.count)")

        inputs.append(Neuron(bias: true))
        hidden.append(Neuron(bias: true))

        output = Neuron()

        setupInputHidden()
        setupHiddenOutput()
    }

    ...
}

The other (minor) thing is when you calculate the output of a Neuron you're adding the bias instead of replacing it (bias = from.output*c.weight), I don't know if that was on purpose but the result seems to be unaffected.

Question:

Well, long story short, I was bored and decided to try to learn about Neural networks. I have been doing C# for a year and now that I am learning Swift, I preferred to continue with that language, and to follow this tutorial.

The problem is that it is in C (or C++ I am not sure) and that I don't know this language, and I clearly don't hae the courage to learn it now. By deducing, I understand things step-by-step but still, not all of them. So the purpose of this thread is for me to regularly edit my question with new subquestions (avoiding multiple posts) to convert this C++ project into swift. Is this against SO Rules ?

Here are my first ones:

  • In Structures (see here for examples):

    C++ Code:

    struct SNeuron
    {
       int m_NumInputs; //var m_NumInputs:Int (Swift)
    
       vector<double> m_vecWeight; // var m_vecWeight:[Double] = [] (Swift)
    
       SNeuron(int NumInputs); // IS THS THE SAME AS init(numInputs:Int) ?
    
    };
    
  • My Sigmoid function

    func Sigmoid(x:Double) -> Double
    {
        return 1/(1 + exp(-x))
    }
    

    (which is supposed to follow this function)

        inline double Sigmoid(double activation, double response); //inline ?
    
  • What does CParams:: means ?

          //add in the bias
    
          netinput += m_vecLayers[i].m_vecNeurons[j].m_vecWeight[NumInputs-1] *
    
                      CParams::dBias; //?
    
  • Is Vector.push_back() "equals" to Array.append ?

      outputs.push_back(Sigmoid(netinput, CParams::dActivationResponse));
    

    translated in

      outputs.append(Sigmoid(netinput)) 
    
  • Declaration of a function with a syntax I don't understand:

        int GetNumberOfWeights()const; //const ?
    

Well... I am only at the first code page of the tutorial so things may explain themselves in the future but still, if someone has some time to help me, it would be great !


Answer:

Since you seem to know C#, here is a good tutorial on ANN, that you may be able to convert in Swift after doing it in C#