본문 바로가기

Study/class note

딥러닝 / 사자, 고양이 분류기 만들기

1. 구글 마운트

from google.colab import drive
drive.mount('/content/drive')

2. 압축파일 해제

!unzip -qq /content/drive/MyDrive/animal/cat.zip  -d /content/drive/MyDrive/animal/image_cat
!unzip -qq /content/drive/MyDrive/animal/lion.zip -d /content/drive/MyDrive/animal/image_lion

3. 데이터 리사이즈, 넘버링 변환

# 데이터 분류
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import random

train_size = 1200
test_size = 50

path = '/content/drive/MyDrive/animal/image_cat'

file_list = os.listdir(path) 
sample = np.random.choice(file_list,train_size + test_size, False)

for i,k in enumerate(sample,1):
    img = cv2.imread(path + '/' + k)
    resize_img = cv2.resize(img, (32 , 32), interpolation=cv2.INTER_CUBIC)
    if i <= train_size:
        cv2.imwrite('/content/drive/MyDrive/animal/train_image/'+str(i)+'.jpg', resize_img)
    else:
        cv2.imwrite('/content/drive/MyDrive/animal/test_image/'+str(i-train_size)+'.jpg', resize_img)

path2 = '/content/drive/MyDrive/animal/image_lion'

file_list2 = os.listdir(path2) 
sample2 = np.random.choice(file_list2,train_size + test_size, False)

for i,k in enumerate(sample2,1):
    img = cv2.imread(path2 + '/' + k)
    resize_img = cv2.resize(img, (32 , 32), interpolation=cv2.INTER_CUBIC)
    if i <= train_size:
        cv2.imwrite('/content/drive/MyDrive/animal/train_image/'+str(i+train_size)+'.jpg', resize_img)
    else:
        cv2.imwrite('/content/drive/MyDrive/animal/test_image/'+str(i-train_size+test_size)+'.jpg', resize_img)

+) 수정 코드

# 데이터 분류
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import random

train_size = 1200
test_size = 50

path = '/content/drive/MyDrive/animal/image_cat'
path2 = '/content/drive/MyDrive/animal/image_lion'

file_list = os.listdir(path) 
file_list2 = os.listdir(path2) 

sample = np.random.choice(file_list,train_size + test_size, False)
sample2 = np.random.choice(file_list2,train_size + test_size, False)


for i in range(1,test_size+train_size+1):
    # 데이터1
    img1 = cv2.imread(path + '/' + sample[i])
    resize_img = cv2.resize(img, (32 , 32), interpolation=cv2.INTER_CUBIC)
    
    # 데이터2
    img2 = cv2.imread(path + '/' + sample2[i])
    resize_img2 = cv2.resize(img2, (32 , 32), interpolation=cv2.INTER_CUBIC)
    
    if i <= train_size:
        cv2.imwrite('/content/drive/MyDrive/animal/train_image/'+str(i)+'.jpg', resize_img) #데이터1
        cv2.imwrite('/content/drive/MyDrive/animal/train_image/'+str(i+train_size)+'.jpg', resize_img2) #데이터2
    else:
        num = i - train_size
        cv2.imwrite('/content/drive/MyDrive/animal/test_image/'+str(num)+'.jpg', resize_img)
        cv2.imwrite('/content/drive/MyDrive/animal/test_image/'+str(num+test_size)+'.jpg', resize_img2)

 

4. 정답 데이터 생성

# train 라벨 데이터 생성
path="/content/drive/MyDrive/animal/train_label.csv"
 
file = open( path, 'w')
 
for  i in  range(0, train_size*2):
    if i < train_size:
        file.write( str(1) + '\n')  #1:고양이
    else:        
        file.write( str(0) + '\n')  #0:사자
 
file.close()

# test 라벨 데이터 생성
path="/content/drive/MyDrive/animal/test_label.csv"
 
file = open( path, 'w')
 
for  i in  range(0, test_size*2):
    if i < test_size :
        file.write( str(1) + '\n')  #1:고양이
    else:
        file.write( str(0) + '\n')  #0:사자

file.close()

5. 이미지 로드 함수 생성

#함수 생성
import os
import re
import cv2
import numpy as np
 
def image_load(path):
    file_list = os.listdir(path)
    file_name=[]
 
    for  i  in  file_list:
        a = int( re.sub('[^0-9]','', i) )   # i 가 숫자가 아니면 null 로 변경해라 ~
        file_name.append(a)
    
    file_name.sort() # 정렬작업 
    
    image = []

    for k in file_name:
      file = path + '/' + str(k) + '.jpg'
      img = cv2.imread(file)
      image.append(img)

    return np.array(image)

6. 정답데이터 로드 함수 생성

import csv
 
def  label_load(path):
    file = open(path)
    labeldata = csv.reader(file)
    labellist = []
    for  i  in  labeldata:
        labellist.append(int(i[0]))
 
    return   np.array(labellist)

7. 데이터 로드

# 1. 데이터를 불러옵니다.

# 이미지 데이터와 라벨위치 
train_image = '/content/drive/MyDrive/animal/train_image/'
test_image = '/content/drive/MyDrive/animal/test_image/'
train_label = '/content/drive/MyDrive/animal/train_label.csv'
test_label = '/content/drive/MyDrive/animal/test_label.csv'

# 데이터 로드 
x_train = image_load(train_image)
y_train = label_load(train_label)
x_test = image_load(test_image)
y_test = label_load(test_label)     

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

8. 정규화

# 1. 필요한 패키지
import tensorflow as tf   # 텐써 플로우 2.0 
from tensorflow.keras.models import Sequential  # 모델을 구성하기 위한 모듈
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Conv2D, MaxPooling2D ,Flatten
from tensorflow.keras.utils import to_categorical # one encoding 하는 모듈
 
tf.random.set_seed(777)  
 
# 2. 정규화 진행  
x_train = x_train.reshape(-1, 32, 32, 3) / 255
x_test = x_test.reshape(-1, 32,32, 3) / 255
 
# 3. 정답 데이터를 준비한다. 
# 하나의 숫자를 one hot encoding 한다. (예:  4 ---> 0 0 0 0 1 0 0 0 0 0 ) 
y_train = to_categorical(y_train)  # 훈련 데이터의 라벨(정답)을 원핫 인코딩
y_test = to_categorical(y_test)    # 테스트 데이터의 라벨(정답)을 원핫 인코딩 
 
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

9. 모델 구성

# 4. 모델을 구성합니다. 3층 신경망으로 구성
model = Sequential()
model.add(Conv2D(200, activation='relu', kernel_size=(7,7), padding='same', input_shape=(32,32,3)))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))

model.add(Conv2D(200, activation='relu', kernel_size=(7,7), padding='same'))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))

model.add(Conv2D(200, activation='relu', kernel_size=(7,7), padding='same'))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))

model.add(Conv2D(100, activation='relu', kernel_size=(7,7), padding='same'))
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.15))
 
model.add(Flatten())
model.add(Dense(200,activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(Dense(200,  activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(Dense(2,  activation='softmax'))
model.summary()
 
# 5. 모델을 설정합니다. ( 경사하강법, 오차함수를 정의해줍니다. )
model.compile(optimizer='adam', 
                     loss = 'categorical_crossentropy', 
                     metrics=['acc'])  # 학습과정에서 정확도를 보려고 

# from tensorflow.keras import backend as K  

# K.set_value(model.optimizer.learning_rate, 0.01)  # 러닝레이트를 0.01로 하겠다
# print("Learning rate before second fit : ", model.optimizer.learning_rate.numpy())

#6. 모델을 훈련시킵니다. 
history = model.fit(x_train, y_train,
                    batch_size = 200,
                    validation_data = (x_test, y_test),
                    epochs = 150)
 
# 7.모델을 평가합니다. (오차, 정확도가 출력됩니다.)
model.evaluate(x_test, y_test) 
 
train_acc_list=history.history['acc'] 
test_acc_list=history.history['val_acc']

 
import  matplotlib.pyplot  as  plt
 
x = np.arange( len(train_acc_list) )
plt.plot( x, train_acc_list, label='train acc')
plt.plot( x, test_acc_list, label='test acc',  linestyle='--')
plt.ylim(0, 1)
plt.legend(loc='lower right')
plt.title('CNN - CAT & LION classification')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show()
 
# model.save('/content/drive/MyDrive/leaf/leaf_model2.h5')  # 모델 저장

 

반응형