Muhammad al-xorazmiy nomidagi toshkent axborot texnalogiyalari universiteti kiberxavfsizlik fakulteti

O’zbekiston Respublikasi Raqamli texnologiyalar vazirligi

Mashinali o’qitishga kirish
Mustaqil ish

Guruh: 715-21
Bajarildi: Sultonov Diyor
Fan o’qituvchisi: Ochilov Mannon

Toshkent 2023


import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from random import shuffle
from tqdm import tqdm
from PIL import Image
import pandas as pd

# загружаем данные

data = pd.read_csv('population.csv')
X = data.drop(['Class'], axis=1) # удаляем столбец с метками из данных для обучения
y = data['Class'] # извлекаем столбец с метками

# разделяем данные на обучающую и тестовую выборки

seed = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# обучаем модель

clf = RandomForestClassifier(n_estimators=100, random_state=seed)

# устанавливаем количество деревьев в лесу равным 100, y_train) #

# /content/drive/MyDrive/Room/images/images
from google.colab import drive
Mounted at /content/drive
train_messy = "/content/drive/MyDrive/Room/images/images/train/messy"
train_clean= "/content/drive/MyDrive/Room/images/images/train/clean"

test_messy= "/content/drive/MyDrive/Room/images/images/val/messy"

test_clean= "/content/drive/MyDrive/Room/images/images/val/clean"

image_size = 128"/content/drive/MyDrive/Room/images/images/train/messy/76.png")"/content/drive/MyDrive/Room/images/images/train/clean/76.png")

for image in tqdm(os.listdir(train_messy)):
path = os.path.join(train_messy, image)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (image_size, image_size)).flatten()

for image2 in tqdm(os.listdir(train_clean)):

path = os.path.join(train_clean, image2)
img2 = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img2 = cv2.resize(img2, (image_size, image_size)).flatten()


plt.subplot(1, 2, 1)
plt.imshow(np_img.reshape(image_size, image_size))
plt.subplot(1, 2, 2)
plt.imshow(np_img2.reshape(image_size, image_size))
plt.title("Messy and Clean Rooms in GrayScale")
100%|██████████| 96/96 [01:15<00:00, 1.27it/s]
100%|██████████| 96/96 [01:12<00:00, 1.33it/s]
Text(0.5, 1.0, 'Messy and Clean Rooms in GrayScale')

def train_data():
train_data_messy = []
for image1 in tqdm(os.listdir(train_messy)):
path = os.path.join(train_messy, image)
img1 = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img1 = cv2.resize(img1, (image_size, image_size))
for image2 in tqdm(os.listdir(train_clean)):
path = os.path.join(train_clean, image)
img2 = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img2 = cv2.resize(img2, (image_size, image_size))

train_data= np.concatenate((np.asarray(train_data_messy),np.asarray(train_data_clean)),axis=0)

return train_data
def test_data():
test_data_messy = []
for image1 in tqdm(os.listdir(test_messy)):
path = os.path.join(test_messy, image1)
img1 = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img1 = cv2.resize(img1, (image_size, image_size))
for image2 in tqdm(os.listdir(test_clean)):
path = os.path.join(test_clean, image2)
img2 = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img2 = cv2.resize(img2, (image_size, image_size))

test_data= np.concatenate((np.asarray(test_data_messy),np.asarray(test_data_clean)),axis=0)

return test_data
train_data = train_data()
test_data = test_data()
100%|██████████| 96/96 [00:00<00:00, 162.29it/s]
100%|██████████| 96/96 [00:00<00:00, 161.09it/s]
100%|██████████| 10/10 [00:07<00:00, 1.29it/s]
100%|██████████| 10/10 [00:07<00:00, 1.30it/s]
array([[[219, 220, 228, ..., 145, 145, 145], [202, 201, 221, ..., 148, 149, 149], [190, 190, 201, ..., 149, 148, 143], ..., [ 43, 43, 45, ..., 19, 18, 18], [ 43, 43, 49, ..., 20, 22, 18], [ 44, 45, 53, ..., 18, 21, 17]], [[219, 220, 228, ..., 145, 145, 145], [202, 201, 221, ..., 148, 149, 149], [190, 190, 201, ..., 149, 148, 143], ..., [ 43, 43, 45, ..., 19, 18, 18], [ 43, 43, 49, ..., 20, 22, 18], [ 44, 45, 53, ..., 18, 21, 17]], [[219, 220, 228, ..., 145, 145, 145], [202, 201, 221, ..., 148, 149, 149], [190, 190, 201, ..., 149, 148, 143], ..., [ 43, 43, 45, ..., 19, 18, 18], [ 43, 43, 49, ..., 20, 22, 18], [ 44, 45, 53, ..., 18, 21, 17]], ..., [[171, 173, 173, ..., 131, 149, 154], [172, 173, 173, ..., 126, 157, 151], [173, 174, 175, ..., 52, 53, 67], ..., [ 47, 47, 64, ..., 109, 110, 107], [ 47, 46, 85, ..., 111, 98, 107], [ 48, 48, 97, ..., 107, 111, 117]], [[171, 173, 173, ..., 131, 149, 154], [172, 173, 173, ..., 126, 157, 151], [173, 174, 175, ..., 52, 53, 67], ..., [ 47, 47, 64, ..., 109, 110, 107], [ 47, 46, 85, ..., 111, 98, 107], [ 48, 48, 97, ..., 107, 111, 117]], [[171, 173, 173, ..., 131, 149, 154], [172, 173, 173, ..., 126, 157, 151], [173, 174, 175, ..., 52, 53, 67], ..., [ 47, 47, 64, ..., 109, 110, 107], [ 47, 46, 85, ..., 111, 98, 107], [ 48, 48, 97, ..., 107, 111, 117]]], dtype=uint8)



x_data = (x_data-norm_min)/(norm_max-norm_min)

(20, 128, 128)
z1 = np.zeros(96)
o1 = np.ones(96)
Y_train = np.concatenate((o1, z1), axis=0)
z = np.zeros(10)
o = np.ones(10)
Y_test = np.concatenate((o, z), axis=0)
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
print("X shape: " , x_data.shape)
print("Y shape: " , y_data.shape)
X shape: (212, 128, 128)
Y shape: (212, 1)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.15, random_state=42)

number_of_train = x_train.shape[0]

number_of_test = x_test.shape[0]
x_train_flatten = x_train.reshape(number_of_train,x_train.shape[1]*x_train.shape[2])
x_test_flatten = x_test .reshape(number_of_test,x_test.shape[1]*x_test.shape[2])

print("X train flatten",x_train_flatten.shape)

print("X test flatten",x_test_flatten.shape)
X train flatten (180, 16384)
X test flatten (32, 16384)
x_train = x_train_flatten.T
x_test = x_test_flatten.T

y_test = y_test.T

y_train = y_train.T

print("x train: ",x_train.shape)

print("x test: ",x_test.shape)
print("y train: ",y_train.shape)
print("y test: ",y_test.shape)
x train: (16384, 180)
x test: (16384, 32)
y train: (1, 180)
y test: (1, 32)
def initialize_weights_and_bias(dimension):
w = np.full((dimension,1),0.01)
b = 0.0
return w, b

def sigmoid(z):

y_head = 1/(1+np.exp(-z))
return y_head

def forward_backward_propagation(w,b,x_train,y_train):

# forward propagation
z =,x_train) + b
y_head = sigmoid(z)
loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
cost = (np.sum(loss))/x_train.shape[1]
# backward propagation
derivative_weight = (,((y_head-y_train).T)))/x_train.shape[1]
derivative_bias = np.sum(y_head-y_train)/x_train.shape[1]
gradients = {"derivative_weight": derivative_weight,"derivative_bias": derivative_bias}
return cost,gradients
def update(w, b, x_train, y_train, learning_rate,number_of_iterarion):
cost_list = []
cost_list2 = []
index = []

for i in range(number_of_iterarion):

cost,gradients = forward_backward_propagation(w,b,x_train,y_train)


w = w - learning_rate * gradients["derivative_weight"]

b = b - learning_rate * gradients["derivative_bias"]
if i % 100 == 0:
print ("Cost after iteration %i: %f" %(i, cost))

parameters = {"weight": w,"bias": b}

plt.xlabel("Number of Iterarion")
return parameters, gradients, cost_list

def predict(w,b,x_test):

z = sigmoid(,x_test)+b)

Y_prediction = np.zeros((1,x_test.shape[1]))

for i in range(z.shape[1]):

if z[0,i]<= 0.5:
Y_prediction[0,i] = 0
Y_prediction[0,i] = 1

return Y_prediction

def logistic_regression(x_train, y_train, x_test, y_test, learning_rate , num_iterations):

dimension = x_train.shape[0]

w,b = initialize_weights_and_bias(dimension)

parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate,num_iterations)

y_prediction_test = predict(parameters["weight"],parameters["bias"],x_test)

y_prediction_train = predict(parameters["weight"],parameters["bias"],x_train)

print("Test Accuracy: {} %".format(round(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100,2)))

print("Train Accuracy: {} %".format(round(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100,2)))
logistic_regression(x_train, y_train, x_test, y_test,learning_rate = 0.01, num_iterations = 1500)
:14: RuntimeWarning: divide by zero encountered in log
loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
:14: RuntimeWarning: invalid value encountered in multiply
loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
Cost after iteration 0: nan
Cost after iteration 100: 0.023990
Cost after iteration 200: 0.013663
Cost after iteration 300: 0.009604
Cost after iteration 400: 0.007400
Cost after iteration 500: 0.006012
Cost after iteration 600: 0.005057
Cost after iteration 700: 0.004361
Cost after iteration 800: 0.003831
Cost after iteration 900: 0.003415
Cost after iteration 1000: 0.003079
Cost after iteration 1100: 0.002803
Cost after iteration 1200: 0.002571
Cost after iteration 1300: 0.002375
Cost after iteration 1400: 0.002206

Test Accuracy: 90.62 %
Train Accuracy: 100.0 %
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV




estimator: LogisticRegression
print("best hyperparameters: ", log_reg_cv.best_params_)
print("accuracy: ", log_reg_cv.best_score_)
best hyperparameters: {'C': 0.001, 'penalty': 'l2'}
accuracy: 0.961111111111111 log_reg= LogisticRegression(C=0.001,penalty="l2"),y_train.T)

print("test accuracy: {} ".format(, y_test.T).score(x_test.T, y_test.T)))
print("train accuracy: {} ".format(, y_train.T).score(x_train.T, y_train.T)))
test accuracy: 0.96875
train accuracy: 0.9666666666666667
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
result = confusion_matrix(y_train[0], y_pred)
[[87 1]
[ 5 87]]
test_img = cv2.imread(new_test_path, cv2.IMREAD_GRAYSCALE)
test_img = cv2.resize(test_img, (image_size, image_size))

test_img_1 = (test_img-norm_min)/(norm_max-norm_min)

from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 127),y_train.T)
result = confusion_matrix(y_test[0], y_pred)

[[18 0]
[ 1 13]]
