Hot questions for Using Neural networks in brain.js

Question:

I'm trying to do some text analysis to determine if a given string is... talking about politics. I'm thinking I could create a neural network where the input is either a string or a list of words (ordering might matter?) and the output is whether the string is about politics.

However the brain.js library only takes inputs of a number between 0 and 1 or an array of numbers between 0 and 1. How can I coerce my data in such a way that I can achieve the task?


Answer:

new brain.recurrent.LSTM(); 

this does the trick for you.

Example,

var brain = require('brain.js')
var net = new brain.recurrent.LSTM();
net.train([
  {input: "my unit-tests failed.", output: "software"},
  {input: "tried the program, but it was buggy.", output: "software"},
  {input: "i need a new power supply.", output: "hardware"},
  {input: "the drive has a 2TB capacity.", output: "hardware"},
  {input: "unit-tests", output: "software"},
  {input: "program", output: "software"},
  {input: "power supply", output: "hardware"},
  {input: "drive", output: "hardware"},
]);

console.log("output = "+net.run("drive"));


output = hardware

refer to this link=> https://github.com/BrainJS/brain.js/issues/65 this has clear explanation and usage of brain.recurrent.LSTM()

Question:

I must clearly have misunderstood something in the brain.js instructions on training

I played around with this repl.it code

const brain = require('brain.js');

const network = new brain.NeuralNetwork();

network.train([
    { input: { doseA: 0 }, output: { indicatorA: 0 } },
    { input: { doseA: 0.1 }, output: { indicatorA: 0.02 } },
    { input: { doseA: 0.2 }, output: { indicatorA: 0.04 } },
    { input: { doseA: 0.3 }, output: { indicatorA: 0.06 } },
    { input: { doseA: 0.4 }, output: { indicatorA: 0.08 } },
    { input: { doseA: 0.5 }, output: { indicatorA: 0.10 } },
    { input: { doseA: 0.6 }, output: { indicatorA: 0.12 } },
    { input: { doseA: 0.7 }, output: { indicatorA: 0.14 } },
]);

const result = network.run({ doseA: 0.35 });

console.log(result);

>> { indicatorA: 0.12165333330631256 }
    => undefined

was expecting the results to be { indicatorA: 0.07 }

What am I doing wrong?


Answer:

Increasing the number of iterations and decreasing the error threshold worked for me:

const brain = require('brain.js');

const network = new brain.NeuralNetwork();

network.train([
    { input: { doseA: 0 }, output: { indicatorA: 0 } },
    { input: { doseA: 0.1 }, output: { indicatorA: 0.02 } },
    { input: { doseA: 0.2 }, output: { indicatorA: 0.04 } },
    { input: { doseA: 0.3 }, output: { indicatorA: 0.06 } },
    { input: { doseA: 0.4 }, output: { indicatorA: 0.08 } },
    { input: { doseA: 0.5 }, output: { indicatorA: 0.10 } },
    { input: { doseA: 0.6 }, output: { indicatorA: 0.12 } },
    { input: { doseA: 0.7 }, output: { indicatorA: 0.14 } },
], {
  log: true,
  iterations: 1e6,
  errorThresh: 0.00001
});

const result = network.run({ doseA: 0.35 });

console.log(result);
// 

Result : { indicatorA: 0.0693388432264328 }

Question:

I am using the Auto MPG training set from http://archive.ics.uci.edu/ml/datasets/Auto+MPG

My code is:

'use strict';
var brain, fs, normalizeData, trainNetwork, _;

_ = require('lodash');

brain = require('brain');

fs = require('fs');

trainNetwork = function(trainNetworkCb) {
  var net;
  net = new brain.NeuralNetwork();
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, trainingData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    trainingData = lines.splice(0, lines.length / 2);
    trainingData = _.map(trainingData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        input: normalizedData,
        output: {
          continuous: normalizedData.continuous
        }
      };
      delete obj.input.continuous;
      return obj;
    });
    net.train(trainingData, {
      log: true,
      logPeriod: 100,
      errorThresh: 0.00005
    });
    return trainNetworkCb(null, net);
  });
};

trainNetwork(function(err, net) {
  if (err) {
    throw err;
  }
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, testData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    testData = lines.splice(lines.length / 2);
    testData = _.filter(testData, function(point) {
      return point !== '';
    });
    testData = _.map(testData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        output: {
          continuous: normalizedData.continuous
        },
        input: normalizedData
      };
      delete obj.input.continuous;
      return obj;
    });
    return _.each(testData, function(dataPoint) {
      var output;
      output = net.run(dataPoint.input);
      console.log(output);
      console.log(dataPoint);
      return console.log('');
    });
  });
});

normalizeData = function(dataRow) {
  var cylinders, dataSet, model_years, origins, row;
  dataSet = dataRow.split(',');
  dataSet = _.map(dataSet, function(point) {
    return Number(point);
  });
  row = {};
  cylinders = [5, 3, 6, 4, 8];
  _.each(cylinders, function(cylinder) {
    row["cylinder" + cylinder] = cylinder === dataSet[0] ? 1 : 0;
  });
  row.displacement = dataSet[1] / 500;
  row.horsepower = dataSet[2] / 500;
  row.weight = dataSet[3] / 10000;
  row.acceleration = dataSet[4] / 100;
  model_years = [82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70];
  _.each(model_years, function(model_year) {
    row["model_year" + model_year] = model_year === dataSet[5] ? 1 : 0;
  });
  origins = [2, 3, 1];
  _.each(origins, function(origin) {
    row["origin" + origin] = origin === dataSet[6] ? 1 : 0;
  });
  row.continuous = dataSet[7] / 100;
  return row;
};

I believe I am normalizing everything correctly. I am using half the data for training and the other half for testing. The data is not ordered, as far as I can tell, so which half is used for which shouldn't matter.

My errors are pretty large however when testing. Usually by 10MPG or so (30% error). What am I doing incorrectly?

Thanks


Answer:

The dataset you linked is ordered by model-year; perhaps drastic changes in technology made the engines more efficient? Neural networks are dependent on correct outputs during training. I would try training the network with all but the last row, and then test using that. Can you link me the csv file you're using? The normalizeData function doesn't give us what you want with the linked file (http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data)

edit:

It seems like regardless of whatever errorThresh you specify, brain won't run more than 20,000 iterations on training runs. There's several ways to get around this. You can specify the learningRate of your neural network. Upping the learningRate to 0.6 (default is 0.3) helped me get more accurate results

net.train(trainingData, {
  log: true,
  logPeriod: 100,
  errorThresh: 0.00005,
  learningRate: 0.6
});

Higher learningRate means more aggressive weight adjustment, which helps when you aren't running as many iterations as you want.

Alternatively, you can specify the total amount of iterations in the options object (if not specified, it defaults to 20,000 - see here).

net.train(trainingData, {
  log: true,
  logPeriod: 100,
  errorThresh: 0.00005,
  iterations: 100000
});

Brain stops training when i < iterations && error > errorThresh evaluates to false. So feel free to crank up the iterations count to ensure that the above expression turns false because the error is below your specified errorTresh (source).

Question:

I'm trying to use Brain.js for text generation purposes.

See my WIP example at: https://codepen.io/tomsoderlund/pen/WEPqzE (see also console output).

I basically:

  1. Generate an array of all the words: wordsInOrder
  2. Create a dictionaryWords array with sorted unique words.
  3. I create my training set from wordsInOrder like this: { input: [0.0326], output: [0.9565] }, where input is the current word's dictionary index (normalized), and output is the following word's dictionary index.

Then I generate new words by:

  1. Picking a random word from dictionary.
  2. Then running the brainJsNetwork.run([wordValue]) function to generate the following word.
  3. Repeat from step 1 over again.

However, it seems to get stuck on words in the middle of the dictionary, with wordValue’s around 0.5:

Any clues what the problem is?


Answer:

I suspect this is due to your training set. This is supposed to map a certain input to an output that's correct. Like in brainjs color contrast example:

net.train([{input: { r: 0.03, g: 0.7, b: 0.5 }, output: { black: 1 }},
       {input: { r: 0.16, g: 0.09, b: 0.2 }, output: { white: 1 }},
       {input: { r: 0.5, g: 0.5, b: 1.0 }, output: { white: 1 }}]);

For a list of inputs, it gives the correct categorization. Then afterwards if you run the trained network, it gives the likelihood of the categories for the input you give it:

var output = net.run({ r: 1, g: 0.4, b: 0 });  // { white: 0.99, black: 0.002 }

You create the training set from wordsInOrder. That means there are some words which occur multiple times in your training set. A word like 'made' is in your training set multiple times, with different outputs:

made -> If (or the respective wordindex values, normalized to be between 0-1)
made -> It's
made -> outside
made -> in

The trained network will try to compensate for the different possible outcomes and will average the likelihood it outputs. If you then take that output to lookup the word in the dictionaryWords array, you are morelikely to end up with words thate are in the middle of the array (like 'not' and 'necessarily')

You need to take into account that the neural network will return a likelihood of the input belonging to a certain category. so if you want to use it for predicting the next word, you have to encode the training data differently. There are 4 valid next words for 'made' so you would have to encode them like ...

{input: { (wordindex of 'made' }, output: { if: 1, its: 1, outside:1, in:1 }}

Of course this means your output will have a likelihood score for all the 92 unique words in the dictionaryWords array. I am not sure if this simple neural network can be used with an output with 92 dimensions.

Have you looked at markov chains for generating text? It makes it more easy to model which transitions (from one word to the next word) are more likely than others.

Here is an explanation and a javascript implementation.

Question:

I'm having a little trouble with my neural network. I've set it up so it generates an array with 5 values; 0 or 1, i.e [1,1,0,1,0]. And using Node.js I console log the random array, and if I reply with y it will add it to the training with the correct output, and vice versa. Once I have responded, the genRan() runs and creates a new random array and saves the "guess" to var guess. However, after the first run, it no longer gives me a guess value, instead: [object Object].

Here is the code:

var brain = require('brain.js');
var net = new brain.NeuralNetwork();
const readline = require('readline');

const r1 = readline.createInterface({
  input: process.stdin,
  output: process.stdout
});

var ca = 0,
    wa = 0;

net.train([
    {input: [0,0,0,0,0], output: [0]}
]);

function genRan(){
    var a,b,c,d,e;
    var array = [];
    a = Math.round(Math.random());
    b = Math.round(Math.random());
    c = Math.round(Math.random());
    d = Math.round(Math.random());
    e = Math.round(Math.random());

    array.push(a,b,c,d,e);
    var guess = net.run(array);
    ask(array,guess);
}

function ask(a,b){
    var array = a,
        guess = b;
    r1.question((wa+ca) + ") input: " + array + " We think: " + guess + ". Am I correct? (Y/N)", (answer) => {

        if(answer == "Y" || answer == "y"){
            ca++;
            net.train([
                {input : array, output : Math.round(guess)}
            ]);
        }else if(answer == "N" || answer == "n"){
            wa++;
            var roundGuess = Math.round(guess);
            var opposite;
            switch (roundGuess){
                case 1:
                    opposite = 0;
                    break;
                case 0:
                    opposite = 1;
                    break;
                default:
                    opposite = null
            }
            net.train([
                {input : array, output : opposite}
            ]);     
        }
        console.log("Success percent: " + (100 *ca/(ca+wa)) + "% " + (ca+wa) +" attempts\n\r");
        genRan();
    })

}
genRan();

The first question works fine, and presents this:

0) input: 0,0,0,0,0 We think: 0.07046. Am I correct? (Y/N)

When I respond, I get:

Success percent: 100% 1 attempts

1) input 1,1,1,0,1 We think: [object Object]. Am I correct? (Y/N)

For some reason, when it goes to "guess" it doesn't give me a value. Any ideas why?


Answer:

The reason its gone wrong is twofold

  1. The output of net.run is an array - you probably want the first item from it.
  2. The input to output in net.train is an array - you're passing it a distinct value

With a few changes your code works as (I think) you expect it:

  1. Use guess[0] in your ask method throughout
  2. Wrap the oposite variable in square braces to make it an array

     net.train([
            {input : array, output : [opposite]}
        ]);     
    

Working code below for reference (Will not work in stacksnippet though)

var brain = require('brain.js');
var net = new brain.NeuralNetwork();
const readline = require('readline');

const r1 = readline.createInterface({
  input: process.stdin,
  output: process.stdout
});

var ca = 0,
    wa = 0;

net.train([
    {input: [0,0,0,0,0], output: [0]}
]);

function genRan(){
    var a,b,c,d,e;
    var array = [];
    a = Math.round(Math.random());
    b = Math.round(Math.random());
    c = Math.round(Math.random());
    d = Math.round(Math.random());
    e = Math.round(Math.random());

    array.push(a,b,c,d,e);
    //console.log(array);
    var guess = net.run(array);
    ask(array,guess);
}

function ask(a,b){
    var array = a,
        guess = b;
    r1.question((wa+ca) + ") input: " + array + " We think: " + guess[0] + ". Am I correct? (Y/N)", (answer) => {

        if(answer == "Y" || answer == "y"){
            ca++;
            net.train([
                {input : array, output : Math.round(guess[0])}
            ]);
        }else if(answer == "N" || answer == "n"){
            wa++;
            var roundGuess = Math.round(guess[0]);
            var opposite;
            switch (roundGuess){
                case 1:
                    opposite = 0;
                    break;
                case 0:
                    opposite = 1;
                    break;
                default:
                    opposite = null
            }
            net.train([
                {input : array, output : [opposite]}
            ]);     
        }
        console.log("Success percent: " + (100 *ca/(ca+wa)) + "% " + (ca+wa) +" attempts\n\r");
        genRan();
    })

}
genRan();