Binary vs one hot

Shawn Halayka · 2023-07-20T20:30:00

I’m making a cat vs dog image classification using PyTorch. Should I have only one output variable, or should I have two (aka one hot)?

Artificial Intelligence Programming binary onehot

Started by taby July 01, 2023 04:56 PM

52 comments, last by taby 1 year, 4 months ago

fleabay

1,327

July 07, 2023 02:06 AM

I did some searching and found this.

https://discuss.pytorch.org/t/runtimeerror-given-groups-1-weight-64-3-3-3-so-expected-input-16-64-256-256-to-have-3-channels-but-got-64-channels-instead/12765

🙂🙂🙂🙂🙂<←The tone posse, ready for action.

taby

Author

1,527

July 07, 2023 03:25 PM

Thanks fleabay!

I added in the following statement:

flat_file = np.transpose(flat_file, (2, 0, 1))

Now I'm getting a different error:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (198x10816 and 198x128)

The full code is:




import numpy as np
import math
import cv2
import random
import torch
from torch import flatten
from torch.autograd import Variable
import torch.nn as nn

import os.path
from os import path



img_width = 64
num_channels = 3

num_input_components = img_width*img_width*num_channels
num_output_components = 1

num_epochs = 100
learning_rate = 0.00001



import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

	def __init__(self):
		super(Net, self).__init__()
		self.conv_layer1 = nn.Conv2d(in_channels=num_channels, out_channels=32, kernel_size=3)
		self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
		self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
		
		self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
		self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
		self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
		
		self.fc1 = nn.Linear(1600, 128)
		self.relu1 = nn.ReLU()
		self.fc2 = nn.Linear(128, num_output_components)
    
		# Progresses data across layers    
	def forward(self, x):
		out = self.conv_layer1(x)
		out = self.conv_layer2(out)
		out = self.max_pool1(out)
		
		out = self.conv_layer3(out)
		out = self.conv_layer4(out)
		out = self.max_pool2(out)
				
		out = out.reshape(out.size(0), -1)
		
		out = self.fc1(out)
		out = self.relu1(out)
		out = self.fc2(out)
		
		return out




"""
def __init__(self):
		# call the parent constructor
		super(Net, self).__init__()

		# initialize first set of CONV => RELU => POOL layers
		self.conv1 = nn.Conv2d(in_channels=num_channels, out_channels=20, kernel_size=(5, 5))
		self.relu1 = nn.ReLU()
		self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		# initialize second set of CONV => RELU => POOL layers
		self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=(5, 5))
		self.relu2 = nn.ReLU()
		self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		# initialize first (and only) set of FC => RELU layers
		self.fc1 = nn.Linear(in_features=800, out_features=500)
		self.relu3 = nn.ReLU()
		# initialize our softmax classifier
		self.fc2 = nn.Linear(in_features=500, out_features=num_output_components)
		self.logSoftmax = nn.LogSoftmax(dim=1)

	def forward(self, x):
		# pass the input through our first set of CONV => RELU =>
		# POOL layers
		x = self.conv1(x)
		x = self.relu1(x)
		x = self.maxpool1(x)
		# pass the output from the previous layer through the second
		# set of CONV => RELU => POOL layers
		x = self.conv2(x)
		x = self.relu2(x)
		x = self.maxpool2(x)
		# flatten the output from the previous layer and pass it
		# through our only set of FC => RELU layers
		x = flatten(x, 1)
		x = self.fc1(x)
		x = self.relu3(x)
		# pass the output to our softmax classifier to get our output
		# predictions
		x = self.fc2(x)
		output = self.logSoftmax(x)
		# return the output predictions
		return output


"""


"""
	def __init__(self):
		super(Net, self).__init__()
		self.hidden1 = torch.nn.Linear(num_input_components, 8192)
		self.hidden2 = torch.nn.Linear(8192, 1024) 
		self.hidden3 = torch.nn.Linear(1024, 128)
		self.predict = torch.nn.Linear(128, num_output_components)

	def forward(self, x):
		x = torch.tanh(self.hidden1(x))		
		x = torch.tanh(self.hidden2(x))
		x = torch.tanh(self.hidden3(x))
		x = self.predict(x)    # linear output
		return x
"""



class float_image:

	def __init__(self, img):
		self.img = img

class image_type:

	def __init__(self, img_type, float_img):
		self.img_type = img_type
		self.float_img = float_img





net = Net()


if False: #path.exists('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'):
	net.load_state_dict(torch.load('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'))
	print("loaded file successfully")
else:
	print("training...")





	all_train_files = []

	file_count = 0

	path = 'training_set\\cats\\'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		if file_count >= 100:
			break;

		print(path + f)
		img = cv2.imread(path + f).astype(np.float32)
		#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0

		flat_file = np.transpose(flat_file, (2, 0, 1))

		all_train_files.append(image_type(0, flat_file))


	file_count = 0

	path = 'training_set\\dogs\\'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		if file_count >= 100:
			break;

		print(path + f)
		img = cv2.imread(path + f).astype(np.float32)
		#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0

		
		flat_file = np.transpose(flat_file, (2, 0, 1))

		all_train_files.append(image_type(1, flat_file))




	optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
	loss_func = torch.nn.MSELoss()



	
	batch = np.zeros((len(all_train_files), num_channels, img_width, img_width), dtype=np.float32)
	ground_truth = np.zeros((len(all_train_files), 1), dtype=np.float32)

	random.shuffle(all_train_files)

	count = 0

	for i in all_train_files:

		batch[count] = i.float_img
		ground_truth[count] = i.img_type
		count = count + 1

	for epoch in range(num_epochs):

		x = Variable(torch.from_numpy(batch))
		y = Variable(torch.from_numpy(ground_truth))

		prediction = net(x)	 
		loss = loss_func(prediction, y)

		print(epoch, loss)

		optimizer.zero_grad()	 # clear gradients for next train
		loss.backward()		 # backpropagation, compute gradients
		optimizer.step()		# apply gradients



	#torch.save(net.state_dict(), 'weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth')



path = 'test_set\\cats\\'
filenames = next(os.walk(path))[2]

cat_count = 0
total_count = 0

for f in filenames:

#	print(path + f)
	img = cv2.imread(path + f).astype(np.float32)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
	flat_file = res / 255.0# np.asarray(res).flatten() / 255.0
		

	batch = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction < 0.5:
		cat_count = cat_count + 1

	total_count = total_count + 1
#	print(batch)
#	print(prediction)

print(cat_count / total_count)
print(total_count)



path = 'test_set\\dogs\\'
filenames = next(os.walk(path))[2]

dog_count = 0
total_count = 0

for f in filenames:

#	print(path + f)
	img = cv2.imread(path + f).astype(np.float32)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
	flat_file = res / 255.0 # np.asarray(res).flatten() / 255.0

	batch = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction > 0.5:
		dog_count = dog_count + 1

	total_count = total_count + 1
#	print(batch)
#	print(prediction)

print(dog_count / total_count)
print(total_count)

taby

Author

1,527

July 08, 2023 04:25 PM

So I fine-tuned the size of the tensors, and now I'm getting this error:

Traceback (most recent call last):
  File "img_train.py", line 197, in <module>
    prediction = net(x)

...

RuntimeError: mat1 and mat2 shapes cannot be multiplied (198x8450 and 198x8450)

I'm flabbergasted. Why don't these two shapes multiply, when they are the same size?


import numpy as np
import math
import cv2
import random
import torch
from torch import flatten
from torch.autograd import Variable
import torch.nn as nn

import os.path
from os import path



img_width = 64
num_channels = 3

num_input_components = img_width*img_width*num_channels
num_output_components = 1

num_epochs = 100
learning_rate = 0.00001



import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

	def __init__(self):
		# call the parent constructor
		super(Net, self).__init__()

		# initialize first set of CONV => RELU => POOL layers
		self.conv1 = nn.Conv2d(in_channels=num_channels, out_channels=20, kernel_size=(5, 5))
		self.relu1 = nn.ReLU()
		self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		# initialize second set of CONV => RELU => POOL layers
		self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=(5, 5))
		self.relu2 = nn.ReLU()
		self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		# initialize first (and only) set of FC => RELU layers
		self.fc1 = nn.Linear(in_features=198, out_features=8450)
		self.relu3 = nn.ReLU()
		# initialize our softmax classifier
		self.fc2 = nn.Linear(in_features=500, out_features=num_output_components)
		self.logSoftmax = nn.LogSoftmax(dim=1)

	def forward(self, x):
		# pass the input through our first set of CONV => RELU =>
		# POOL layers
		x = self.conv1(x)
		x = self.relu1(x)
		x = self.maxpool1(x)
		# pass the output from the previous layer through the second
		# set of CONV => RELU => POOL layers
		x = self.conv2(x)
		x = self.relu2(x)
		x = self.maxpool2(x)
		# flatten the output from the previous layer and pass it
		# through our only set of FC => RELU layers
		x = flatten(x, 1)
		x = self.fc1(x)
		x = self.relu3(x)
		# pass the output to our softmax classifier to get our output
		# predictions
		x = self.fc2(x)
		output = self.logSoftmax(x)
		# return the output predictions
		return output





"""
	def __init__(self):
		super(Net, self).__init__()
		self.hidden1 = torch.nn.Linear(num_input_components, 8192)
		self.hidden2 = torch.nn.Linear(8192, 1024) 
		self.hidden3 = torch.nn.Linear(1024, 128)
		self.predict = torch.nn.Linear(128, num_output_components)

	def forward(self, x):
		x = torch.tanh(self.hidden1(x))		
		x = torch.tanh(self.hidden2(x))
		x = torch.tanh(self.hidden3(x))
		x = self.predict(x)    # linear output
		return x
"""



class float_image:

	def __init__(self, img):
		self.img = img

class image_type:

	def __init__(self, img_type, float_img):
		self.img_type = img_type
		self.float_img = float_img





net = Net()


if False: #path.exists('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'):
	net.load_state_dict(torch.load('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'))
	print("loaded file successfully")
else:
	print("training...")





	all_train_files = []

	file_count = 0

	path = 'training_set\\cats\\'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		if file_count >= 100:
			break;

		print(path + f)
		img = cv2.imread(path + f).astype(np.float32)
		#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0

		flat_file = np.transpose(flat_file, (2, 0, 1))

		all_train_files.append(image_type(0, flat_file))


	file_count = 0

	path = 'training_set\\dogs\\'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		if file_count >= 100:
			break;

		print(path + f)
		img = cv2.imread(path + f).astype(np.float32)
		#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0 #np.asarray(res).flatten() / 255.0

		
		flat_file = np.transpose(flat_file, (2, 0, 1))

		all_train_files.append(image_type(1, flat_file))




	optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
	loss_func = torch.nn.MSELoss()



	
	batch = np.zeros((len(all_train_files), num_channels, img_width, img_width), dtype=np.float32)
	ground_truth = np.zeros((len(all_train_files), 1), dtype=np.float32)

	random.shuffle(all_train_files)

	count = 0

	for i in all_train_files:

		batch[count] = i.float_img
		ground_truth[count] = i.img_type
		count = count + 1

	for epoch in range(num_epochs):

		x = Variable(torch.from_numpy(batch))
		y = Variable(torch.from_numpy(ground_truth))

		prediction = net(x)	 
		loss = loss_func(prediction, y)

		print(epoch, loss)

		optimizer.zero_grad()	 # clear gradients for next train
		loss.backward()		 # backpropagation, compute gradients
		optimizer.step()		# apply gradients



	#torch.save(net.state_dict(), 'weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth')



path = 'test_set\\cats\\'
filenames = next(os.walk(path))[2]

cat_count = 0
total_count = 0

for f in filenames:

#	print(path + f)
	img = cv2.imread(path + f).astype(np.float32)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
	flat_file = res / 255.0# np.asarray(res).flatten() / 255.0
		

	batch = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction < 0.5:
		cat_count = cat_count + 1

	total_count = total_count + 1
#	print(batch)
#	print(prediction)

print(cat_count / total_count)
print(total_count)



path = 'test_set\\dogs\\'
filenames = next(os.walk(path))[2]

dog_count = 0
total_count = 0

for f in filenames:

#	print(path + f)
	img = cv2.imread(path + f).astype(np.float32)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
	flat_file = res / 255.0 # np.asarray(res).flatten() / 255.0

	batch = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction > 0.5:
		dog_count = dog_count + 1

	total_count = total_count + 1
#	print(batch)
#	print(prediction)

print(dog_count / total_count)
print(total_count)

alvaro

21,607

July 08, 2023 06:47 PM

In matrix multiplication the number of columns of the first matrix should match the number of rows of the second matrix.

taby

Author

1,527

July 08, 2023 10:58 PM

Sorry, I totally forgot.

taby

Author

1,527

July 09, 2023 12:57 AM

I think that I need to read a couple of books on the subject, before I proceed. Thanks all!

taby

Author

1,527

July 10, 2023 12:50 AM

OK, so I've made some progress.

I'm now using one-hot.

I just don't understand why 8*img_width*img_width comes into play? Why does the magic number 8 come into play?

import numpy as np
import math
import cv2
import random
import torch
from torch import flatten
from torch.autograd import Variable
import torch.nn as nn

import os.path
from os import path



img_width = 32
num_channels = 3

num_input_components = img_width*img_width*num_channels
num_output_components = 2

num_epochs = 100
learning_rate = 0.0001


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class Net(nn.Module):

	def __init__(self, num_channels, num_output_components, all_train_files_len):
		# call the parent constructor
		super(Net, self).__init__()
		self.conv1 = nn.Conv2d(num_channels, img_width, kernel_size=(3,3), stride=1, padding=1)
		self.act1 = nn.ReLU()
		self.drop1 = nn.Dropout(0.3)
 
		self.conv2 = nn.Conv2d(img_width, img_width, kernel_size=(3,3), stride=1, padding=1)
		self.act2 = nn.ReLU()
		self.pool2 = nn.MaxPool2d(kernel_size=(2, 2))
 
		self.flat = nn.Flatten()
 
		self.fc3 = nn.Linear(8*img_width*img_width, 512)
		self.act3 = nn.ReLU()
		self.drop3 = nn.Dropout(0.5)
 
		self.fc4 = nn.Linear(512, num_output_components)

	def forward(self, x):
		# input 3x32x32, output 32x32x32
		x = self.act1(self.conv1(x))
		x = self.drop1(x)
		# input 32x32x32, output 32x32x32
		x = self.act2(self.conv2(x))
		# input 32x32x32, output 32x16x16
		x = self.pool2(x)
		# input 32x16x16, output 8192
		x = self.flat(x)
		# input 8192, output 512
		x = self.act3(self.fc3(x))
		x = self.drop3(x)
		# input 512, output 10
		x = self.fc4(x)
		return x


"""
	def __init__(self):
		super(Net, self).__init__()
		self.hidden1 = torch.nn.Linear(num_input_components, 8192)
		self.hidden2 = torch.nn.Linear(8192, 1024) 
		self.hidden3 = torch.nn.Linear(1024, 128)
		self.predict = torch.nn.Linear(128, num_output_components)

	def forward(self, x):
		x = torch.tanh(self.hidden1(x))		
		x = torch.tanh(self.hidden2(x))
		x = torch.tanh(self.hidden3(x))
		x = self.predict(x)    # linear output
		return x
"""



class float_image:

	def __init__(self, img):
		self.img = img

class image_type:

	def __init__(self, img_type, float_img):
		self.img_type = img_type
		self.float_img = float_img




if False: #path.exists('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'):
	net.load_state_dict(torch.load('weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth'))
	print("loaded file successfully")
else:
	print("training...")





	all_train_files = []

	file_count = 0

	path = 'training_set\\cats\\'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		if file_count >= 10000:
			break;

		print(path + f)
		img = cv2.imread(path + f).astype(np.float32)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0
		flat_file = np.transpose(flat_file, (2, 0, 1))
		all_train_files.append(image_type(0, flat_file))


	file_count = 0

	path = 'training_set\\dogs\\'
	filenames = next(os.walk(path))[2]

	for f in filenames:

		file_count = file_count + 1
		if file_count >= 10000:
			break;


		print(path + f)
		img = cv2.imread(path + f).astype(np.float32)
		res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
		flat_file = res / 255.0
		flat_file = np.transpose(flat_file, (2, 0, 1))
		all_train_files.append(image_type(1, flat_file))




	


	net = Net(num_channels, num_output_components, len(all_train_files))



	optimizer = torch.optim.Adam(net.parameters(), lr = learning_rate)
	loss_func = torch.nn.MSELoss()



	
	batch = np.zeros((len(all_train_files), num_channels, img_width, img_width), dtype=np.float32)
	ground_truth = np.zeros((len(all_train_files), num_output_components), dtype=np.float32)

	random.shuffle(all_train_files)

	count = 0

	for i in all_train_files:

		batch[count] = i.float_img
		
		if i.img_type == 0:
			ground_truth[count][0] = 1
			ground_truth[count][1] = 0
		elif i.img_type == 1:
			ground_truth[count][0] = 0
			ground_truth[count][1] = 1

		count = count + 1
	
	x = Variable(torch.from_numpy(batch))
	y = Variable(torch.from_numpy(ground_truth))
	
	for epoch in range(num_epochs):

		prediction = net(x)	 
		loss = loss_func(prediction, y)

		print(epoch, loss)

		optimizer.zero_grad()	 # clear gradients for next train
		loss.backward()		 # backpropagation, compute gradients
		optimizer.step()		# apply gradients



	#torch.save(net.state_dict(), 'weights_' + str(num_input_components) + '_' + str(num_epochs) + '.pth')



path = 'test_set\\cats\\'
filenames = next(os.walk(path))[2]

cat_count = 0
total_count = 0

for f in filenames:

#	print(path + f)
	img = cv2.imread(path + f).astype(np.float32)
	res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
	flat_file = res / 255.0
	flat_file = np.transpose(flat_file, (2, 0, 1))

	batch = torch.zeros((1, num_channels, img_width, img_width), dtype=torch.float32)
	batch[0] = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction[0][0] > prediction[0][1]:
		cat_count = cat_count + 1

	total_count = total_count + 1
#	print(batch)
#		print(prediction)

print(cat_count / total_count)
print(total_count)





path = 'test_set\\dogs\\'
filenames = next(os.walk(path))[2]

dog_count = 0
total_count = 0

for f in filenames:

#	print(path + f)
	img = cv2.imread(path + f).astype(np.float32)
	res = cv2.resize(img, dsize=(img_width, img_width), interpolation=cv2.INTER_LINEAR)
	flat_file = res / 255.0
	flat_file = np.transpose(flat_file, (2, 0, 1))

	batch = torch.zeros((1, num_channels, img_width, img_width), dtype=torch.float32)
	batch[0] = torch.from_numpy(flat_file)

	prediction = net(Variable(batch))

	if prediction[0][0] < prediction[0][1]:
		dog_count = dog_count + 1

	total_count = total_count + 1
#	print(batch)
#		print(prediction)

print(dog_count / total_count)
print(total_count)

alvaro

21,607

July 10, 2023 06:39 AM

I just don't understand why 8*img_width*img_width comes into play? Why does the magic number 8 come into play?

You start with a 3x(32x32) image. Then you do a convolution that brings that up to 32x(32x32). At some point you do a MaxPool2d operation that shrinks the image to 16x16, so now you have 32x(16x16). After you flatten, that's just 8192 numbers, forgetting the structure as an image. That happens to be 8*32*32, but it's not a very good way to look at it. As you said, the “8” is kind of meaningless (it's the number of channels you are using divided by 4 because one of your operations shrank the image by a factor of 2 in each dimension).

taby

Author

1,527

July 10, 2023 03:17 PM

Do you have a better example code? There are several on the Internet, but I haven't found one that works sort of perfectly.

taby

Author

1,527

July 10, 2023 07:07 PM

I found a different code, but it only gets it right like 70% of the time, which is still no good.

Is is the data that makes it so underwhelming?

class Net(torch.nn.Module):
    def __init__(self, num_channels, num_output_components, all_train_files_len):
        super().__init__()
        self.model = torch.nn.Sequential(
            #Input = 3 x 32 x 32, Output = 32 x 32 x 32
            torch.nn.Conv2d(in_channels = num_channels, out_channels = 32, kernel_size = 3, padding = 1), 
            torch.nn.ReLU(),
            #Input = 32 x 32 x 32, Output = 32 x 16 x 16
            torch.nn.MaxPool2d(kernel_size=2),
  
            #Input = 32 x 16 x 16, Output = 64 x 16 x 16
            torch.nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1),
            torch.nn.ReLU(),
            #Input = 64 x 16 x 16, Output = 64 x 8 x 8
            torch.nn.MaxPool2d(kernel_size=2),
              
            #Input = 64 x 8 x 8, Output = 64 x 8 x 8
            torch.nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = 1),
            torch.nn.ReLU(),
            #Input = 64 x 8 x 8, Output = 64 x 4 x 4
            torch.nn.MaxPool2d(kernel_size=2),
  
            torch.nn.Flatten(),
            torch.nn.Linear(64*4*4, all_train_files_len),
            torch.nn.ReLU(),
            torch.nn.Linear(all_train_files_len, num_output_components)
        )
  
    def forward(self, x):
        return self.model(x)

Binary vs one hot

This topic is closed to new replies.

Popular Topics

Recommended Tutorials

Binary vs one hot

This topic is closed to new replies.

Popular Topics

Recommended Tutorials

Reticulating splines