Python Neural Network values converge to 50%
I'm building a neural network to recognise hand-written symbols. I've never built one before and when I train my network, the predictions all converge to 50%.
When I run my code the error rate decreases until it begins to converge around 0.5, then the predictions for all 21 symbols are equal to 0.5.
My neural network is quite simple:
- It has 3 layers, (input, 1 hidden, output)
- Input layer has 400 inputs, 1 for each pixel (20x20 images)
- Hidden layer has 21 neurons
- Output layer has 21 neurons (1 for each symbol)
import numpy as np
import glob
import csv
np.random.seed(0)
# Pixel values for 'C', represents 20x20 image of a 'C'
input1 = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
# 21 possible outputs, a 1 in labels[9] represents a 'C'
labels = np.array([[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]])
# 400 inputs, 1 for each pixel (20x20 images)
w1 = 2*np.random.random((400,21)) - 1
# 21 nodes in hidden layer
w2 = 2*np.random.random((21,21)) - 1
# 21 output nodes (I have to differentiate between 21 symbols)
w3 = 2*np.random.random((21,21)) - 1
learning_rate = 0.01
l1_bias = np.random.rand(1)
l2_bias = np.random.rand(1)
def sigmoid(x):
x = np.clip( x, -500, 500 )
return 1/(1+np.exp(-x))
def sigmoid_der(x):
return sigmoid(x)*(1-sigmoid(x))
# Learning part
for x in range(1000):
# Feed-forward part
input = input1
# Multiply input by weights
l1 = np.dot(input, w1)
# Add bias
l1 += l1_bias
# Squish with sigmoid
l1 = sigmoid(l1)
# Repeat for other layers
l2 = np.dot(l1, w2)
l2 += l2_bias
l2 = sigmoid(l2)
l3 = np.dot(l2, w3)
l3 = sigmoid(l3)
# l3 is output layer
# Backward propagation
# Calculate error
error = 1 / 2 * ((labels - l3.T) ** 2)
# Calculate adjustment for weights for each layer using error and sigmoid_der
w3_adjustment = np.dot(l2.T, (error * sigmoid_der(l3.T)).T)
w2_adjustment = np.dot(l1.T, (error * sigmoid_der(l2.T)).T)
w1_adjustment = np.dot(input.T, (error * sigmoid_der(l1.T)).T)
# Adjust weights
w3 = w3 - w3_adjustment * learning_rate
w2 = w2 - w2_adjustment * learning_rate
w1 = w1 - w1_adjustment * learning_rate
Does anyone know if I've made some rookie mistake which could be causing my output to converge? Thanks!
python neural-network
add a comment |
I'm building a neural network to recognise hand-written symbols. I've never built one before and when I train my network, the predictions all converge to 50%.
When I run my code the error rate decreases until it begins to converge around 0.5, then the predictions for all 21 symbols are equal to 0.5.
My neural network is quite simple:
- It has 3 layers, (input, 1 hidden, output)
- Input layer has 400 inputs, 1 for each pixel (20x20 images)
- Hidden layer has 21 neurons
- Output layer has 21 neurons (1 for each symbol)
import numpy as np
import glob
import csv
np.random.seed(0)
# Pixel values for 'C', represents 20x20 image of a 'C'
input1 = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
# 21 possible outputs, a 1 in labels[9] represents a 'C'
labels = np.array([[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]])
# 400 inputs, 1 for each pixel (20x20 images)
w1 = 2*np.random.random((400,21)) - 1
# 21 nodes in hidden layer
w2 = 2*np.random.random((21,21)) - 1
# 21 output nodes (I have to differentiate between 21 symbols)
w3 = 2*np.random.random((21,21)) - 1
learning_rate = 0.01
l1_bias = np.random.rand(1)
l2_bias = np.random.rand(1)
def sigmoid(x):
x = np.clip( x, -500, 500 )
return 1/(1+np.exp(-x))
def sigmoid_der(x):
return sigmoid(x)*(1-sigmoid(x))
# Learning part
for x in range(1000):
# Feed-forward part
input = input1
# Multiply input by weights
l1 = np.dot(input, w1)
# Add bias
l1 += l1_bias
# Squish with sigmoid
l1 = sigmoid(l1)
# Repeat for other layers
l2 = np.dot(l1, w2)
l2 += l2_bias
l2 = sigmoid(l2)
l3 = np.dot(l2, w3)
l3 = sigmoid(l3)
# l3 is output layer
# Backward propagation
# Calculate error
error = 1 / 2 * ((labels - l3.T) ** 2)
# Calculate adjustment for weights for each layer using error and sigmoid_der
w3_adjustment = np.dot(l2.T, (error * sigmoid_der(l3.T)).T)
w2_adjustment = np.dot(l1.T, (error * sigmoid_der(l2.T)).T)
w1_adjustment = np.dot(input.T, (error * sigmoid_der(l1.T)).T)
# Adjust weights
w3 = w3 - w3_adjustment * learning_rate
w2 = w2 - w2_adjustment * learning_rate
w1 = w1 - w1_adjustment * learning_rate
Does anyone know if I've made some rookie mistake which could be causing my output to converge? Thanks!
python neural-network
Don't you need a softmax for the last layer to start with?
– Julien
Jan 2 at 3:32
And you seem to be training on a single example, how do you expect it to learn anything?
– Julien
Jan 2 at 3:36
@Julian thanks for your reply, I actually have 8000 training examples but I couldn’t fit them all into the question, so I just included 1 to show the format.
– Francis
Jan 2 at 3:37
As Julien said, you probably need a softmax, and you have used MSE, which is suitable for regression problems, for classfication problems cross-entropy would be better suited
– Oswald
Jan 2 at 3:49
add a comment |
I'm building a neural network to recognise hand-written symbols. I've never built one before and when I train my network, the predictions all converge to 50%.
When I run my code the error rate decreases until it begins to converge around 0.5, then the predictions for all 21 symbols are equal to 0.5.
My neural network is quite simple:
- It has 3 layers, (input, 1 hidden, output)
- Input layer has 400 inputs, 1 for each pixel (20x20 images)
- Hidden layer has 21 neurons
- Output layer has 21 neurons (1 for each symbol)
import numpy as np
import glob
import csv
np.random.seed(0)
# Pixel values for 'C', represents 20x20 image of a 'C'
input1 = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
# 21 possible outputs, a 1 in labels[9] represents a 'C'
labels = np.array([[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]])
# 400 inputs, 1 for each pixel (20x20 images)
w1 = 2*np.random.random((400,21)) - 1
# 21 nodes in hidden layer
w2 = 2*np.random.random((21,21)) - 1
# 21 output nodes (I have to differentiate between 21 symbols)
w3 = 2*np.random.random((21,21)) - 1
learning_rate = 0.01
l1_bias = np.random.rand(1)
l2_bias = np.random.rand(1)
def sigmoid(x):
x = np.clip( x, -500, 500 )
return 1/(1+np.exp(-x))
def sigmoid_der(x):
return sigmoid(x)*(1-sigmoid(x))
# Learning part
for x in range(1000):
# Feed-forward part
input = input1
# Multiply input by weights
l1 = np.dot(input, w1)
# Add bias
l1 += l1_bias
# Squish with sigmoid
l1 = sigmoid(l1)
# Repeat for other layers
l2 = np.dot(l1, w2)
l2 += l2_bias
l2 = sigmoid(l2)
l3 = np.dot(l2, w3)
l3 = sigmoid(l3)
# l3 is output layer
# Backward propagation
# Calculate error
error = 1 / 2 * ((labels - l3.T) ** 2)
# Calculate adjustment for weights for each layer using error and sigmoid_der
w3_adjustment = np.dot(l2.T, (error * sigmoid_der(l3.T)).T)
w2_adjustment = np.dot(l1.T, (error * sigmoid_der(l2.T)).T)
w1_adjustment = np.dot(input.T, (error * sigmoid_der(l1.T)).T)
# Adjust weights
w3 = w3 - w3_adjustment * learning_rate
w2 = w2 - w2_adjustment * learning_rate
w1 = w1 - w1_adjustment * learning_rate
Does anyone know if I've made some rookie mistake which could be causing my output to converge? Thanks!
python neural-network
I'm building a neural network to recognise hand-written symbols. I've never built one before and when I train my network, the predictions all converge to 50%.
When I run my code the error rate decreases until it begins to converge around 0.5, then the predictions for all 21 symbols are equal to 0.5.
My neural network is quite simple:
- It has 3 layers, (input, 1 hidden, output)
- Input layer has 400 inputs, 1 for each pixel (20x20 images)
- Hidden layer has 21 neurons
- Output layer has 21 neurons (1 for each symbol)
import numpy as np
import glob
import csv
np.random.seed(0)
# Pixel values for 'C', represents 20x20 image of a 'C'
input1 = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
# 21 possible outputs, a 1 in labels[9] represents a 'C'
labels = np.array([[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]])
# 400 inputs, 1 for each pixel (20x20 images)
w1 = 2*np.random.random((400,21)) - 1
# 21 nodes in hidden layer
w2 = 2*np.random.random((21,21)) - 1
# 21 output nodes (I have to differentiate between 21 symbols)
w3 = 2*np.random.random((21,21)) - 1
learning_rate = 0.01
l1_bias = np.random.rand(1)
l2_bias = np.random.rand(1)
def sigmoid(x):
x = np.clip( x, -500, 500 )
return 1/(1+np.exp(-x))
def sigmoid_der(x):
return sigmoid(x)*(1-sigmoid(x))
# Learning part
for x in range(1000):
# Feed-forward part
input = input1
# Multiply input by weights
l1 = np.dot(input, w1)
# Add bias
l1 += l1_bias
# Squish with sigmoid
l1 = sigmoid(l1)
# Repeat for other layers
l2 = np.dot(l1, w2)
l2 += l2_bias
l2 = sigmoid(l2)
l3 = np.dot(l2, w3)
l3 = sigmoid(l3)
# l3 is output layer
# Backward propagation
# Calculate error
error = 1 / 2 * ((labels - l3.T) ** 2)
# Calculate adjustment for weights for each layer using error and sigmoid_der
w3_adjustment = np.dot(l2.T, (error * sigmoid_der(l3.T)).T)
w2_adjustment = np.dot(l1.T, (error * sigmoid_der(l2.T)).T)
w1_adjustment = np.dot(input.T, (error * sigmoid_der(l1.T)).T)
# Adjust weights
w3 = w3 - w3_adjustment * learning_rate
w2 = w2 - w2_adjustment * learning_rate
w1 = w1 - w1_adjustment * learning_rate
Does anyone know if I've made some rookie mistake which could be causing my output to converge? Thanks!
python neural-network
python neural-network
asked Jan 2 at 3:16
FrancisFrancis
133
133
Don't you need a softmax for the last layer to start with?
– Julien
Jan 2 at 3:32
And you seem to be training on a single example, how do you expect it to learn anything?
– Julien
Jan 2 at 3:36
@Julian thanks for your reply, I actually have 8000 training examples but I couldn’t fit them all into the question, so I just included 1 to show the format.
– Francis
Jan 2 at 3:37
As Julien said, you probably need a softmax, and you have used MSE, which is suitable for regression problems, for classfication problems cross-entropy would be better suited
– Oswald
Jan 2 at 3:49
add a comment |
Don't you need a softmax for the last layer to start with?
– Julien
Jan 2 at 3:32
And you seem to be training on a single example, how do you expect it to learn anything?
– Julien
Jan 2 at 3:36
@Julian thanks for your reply, I actually have 8000 training examples but I couldn’t fit them all into the question, so I just included 1 to show the format.
– Francis
Jan 2 at 3:37
As Julien said, you probably need a softmax, and you have used MSE, which is suitable for regression problems, for classfication problems cross-entropy would be better suited
– Oswald
Jan 2 at 3:49
Don't you need a softmax for the last layer to start with?
– Julien
Jan 2 at 3:32
Don't you need a softmax for the last layer to start with?
– Julien
Jan 2 at 3:32
And you seem to be training on a single example, how do you expect it to learn anything?
– Julien
Jan 2 at 3:36
And you seem to be training on a single example, how do you expect it to learn anything?
– Julien
Jan 2 at 3:36
@Julian thanks for your reply, I actually have 8000 training examples but I couldn’t fit them all into the question, so I just included 1 to show the format.
– Francis
Jan 2 at 3:37
@Julian thanks for your reply, I actually have 8000 training examples but I couldn’t fit them all into the question, so I just included 1 to show the format.
– Francis
Jan 2 at 3:37
As Julien said, you probably need a softmax, and you have used MSE, which is suitable for regression problems, for classfication problems cross-entropy would be better suited
– Oswald
Jan 2 at 3:49
As Julien said, you probably need a softmax, and you have used MSE, which is suitable for regression problems, for classfication problems cross-entropy would be better suited
– Oswald
Jan 2 at 3:49
add a comment |
0
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f54000836%2fpython-neural-network-values-converge-to-50%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f54000836%2fpython-neural-network-values-converge-to-50%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Don't you need a softmax for the last layer to start with?
– Julien
Jan 2 at 3:32
And you seem to be training on a single example, how do you expect it to learn anything?
– Julien
Jan 2 at 3:36
@Julian thanks for your reply, I actually have 8000 training examples but I couldn’t fit them all into the question, so I just included 1 to show the format.
– Francis
Jan 2 at 3:37
As Julien said, you probably need a softmax, and you have used MSE, which is suitable for regression problems, for classfication problems cross-entropy would be better suited
– Oswald
Jan 2 at 3:49