I'm stuck. My code gives me an error that I am just not understanding, due to my (somewhat lack of) Python skills:
training_set\dogs\dog.108.jpg
training_set\dogs\dog.1080.jpg
training_set\dogs\dog.1081.jpg
training_set\dogs\dog.1082.jpg
training_set\dogs\dog.1083.jpg
training_set\dogs\dog.1084.jpg
training_set\dogs\dog.1085.jpg
training_set\dogs\dog.1086.jpg
training_set\dogs\dog.1087.jpg
Traceback (most recent call last):
RuntimeError: Given groups=1, weight of size [20, 3, 5, 5], expected input[198, 64, 64, 3] to have 3 channels, but got 64 channels instead
The code is:
import numpy as np
import math
import cv2
import random
import torch
from torch.autograd import Variable
import torch.nn as nn
import os.path
from os import path
img_width = 64
num_channels = 3
#num_input_components = img_width*img_width*num_channels
num_output_components = 1
num_epochs = 100
learning_rate = 0.00001
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
# call the parent constructor
super(Net, self).__init__()
# initialize first set of CONV => RELU => POOL layers
self.conv1 = nn.Conv2d(in_channels=num_channels, out_channels=20, kernel_size=(5, 5))
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# initialize second set of CONV => RELU => POOL layers
self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=(5, 5))
self.relu2 = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# initialize first (and only) set of FC => RELU layers
self.fc1 = nn.Linear(in_features=800, out_features=500)
self.relu3 = nn.ReLU()
# initialize our softmax classifier
self.fc2 = nn.Linear(in_features=500, out_features=num_output_components)
self.logSoftmax = nn.LogSoftmax(dim=1)
def forward(self, x):
# pass the input through our first set of CONV => RELU =>
# POOL layers
x = self.conv1(x)
x = self.relu1(x)
x = self.maxpool1(x)
# pass the output from the previous layer through the second
# set of CONV => RELU => POOL layers
x = self.conv2(x)
x = self.relu2(x)
x = self.maxpool2(x)
# flatten the output from the previous layer and pass it
# through our only set of FC => RELU layers
x = flatten(x, 1)
x = self.fc1(x)
x = self.relu3(x)
# pass the output to our softmax classifier to get our output
# predictions
x = self.fc2(x)
output = self.logSoftmax(x)
# return the output predictions
return outpu
class float_image:
def __init__(self, img):
self.img = img
class image_type:
def __init__(self, img_type, float_img):
self.img_type = img_type
self.float_img = float_img
net = Net()
if False: #path.exists('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'):
net.load_state_dict(torch.load('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'))
print("loaded file successfully")
else:
print("training...")
all_train_files = []
file_count = 0
path = 'training_set\\cats\\'
filenames = next(os.walk(path))[2]
for f in filenames:
file_count = file_count + 1
if file_count >= 100:
break;
print(path + f)
img = cv2.imread(path + f).astype(np.float32)
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0
all_train_files.append(image_type(0, flat_file))
file_count = 0
path = 'training_set\\dogs\\'
filenames = next(os.walk(path))[2]
for f in filenames:
file_count = file_count + 1
if file_count >= 100:
break;
print(path + f)
img = cv2.imread(path + f).astype(np.float32)
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0
all_train_files.append(image_type(1, flat_file))
optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
loss_func = torch.nn.MSELoss()
batch = np.zeros((len(all_train_files), img_width, img_width, num_channels), dtype=np.float32)
ground_truth = np.zeros((len(all_train_files), 1), dtype=np.float32)
random.shuffle(all_train_files)
count = 0
for i in all_train_files:
batch[count] = i.float_img
ground_truth[count] = i.img_type
count = count + 1
for epoch in range(num_epochs):
x = Variable(torch.from_numpy(batch))
y = Variable(torch.from_numpy(ground_truth))
prediction = net(x)
loss = loss_func(prediction, y)
print(epoch, loss)
optimizer.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
#torch.save(net.state_dict(), 'weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth')
path = 'test_set\\cats\\'
filenames = next(os.walk(path))[2]
cat_count = 0
total_count = 0
for f in filenames:
# print(path + f)
img = cv2.imread(path + f).astype(np.float32)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0# np.asarray(res).flatten() / 255.0
batch = torch.from_numpy(flat_file)
prediction = net(Variable(batch))
if prediction < 0.5:
cat_count = cat_count + 1
total_count = total_count + 1
# print(batch)
# print(prediction)
print(cat_count / total_count)
print(total_count)
path = 'test_set\\dogs\\'
filenames = next(os.walk(path))[2]
dog_count = 0
total_count = 0
for f in filenames:
# print(path + f)
img = cv2.imread(path + f).astype(np.float32)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
flat_file = res / 255.0 # np.asarray(res).flatten() / 255.0
batch = torch.from_numpy(flat_file)
prediction = net(Variable(batch))
if prediction > 0.5:
dog_count = dog_count + 1
total_count = total_count + 1
# print(batch)
# print(prediction)
print(dog_count / total_count)
print(total_count)
s