2. Artificial Intelligence/Stanford Univ. CS231n

[Stanford Univ: CS231n] Spring 2025 Assignment1. Q4(Higher Level Representations: Image Features)

뉴하늘 2025. 4. 30. 12:39
728x90

본 포스팅은 Stanford University  School of Engineering의 CS231n: Convolutional Neural Networks for Visual Recognition을 수강하고 공부한 내용을 정리하기 위한 포스팅입니다.
 
https://github.com/cs231n/cs231n.github.io/blob/master/assignments/2025/assignment1.md

 

cs231n.github.io/assignments/2025/assignment1.md at master · cs231n/cs231n.github.io

Public facing notes page. Contribute to cs231n/cs231n.github.io development by creating an account on GitHub.

github.com

 

https://github.com/KwonKiHyeok/cs231n

 

GitHub - KwonKiHyeok/CS231n: This repository contains my solutions to the assignments of the CS231n course offered by Stanford U

This repository contains my solutions to the assignments of the CS231n course offered by Stanford University (Spring 2025). - KwonKiHyeok/CS231n

github.com

 

Q4: Higher Level Representations: Image Features

 

from __future__ import print_function
from builtins import zip
from builtins import range
from past.builtins import xrange

import matplotlib
import numpy as np
from scipy.ndimage import uniform_filter


def extract_features(imgs, feature_fns, verbose=False):
    """
    Given pixel data for images and several feature functions that can operate on
    single images, apply all feature functions to all images, concatenating the
    feature vectors for each image and storing the features for all images in
    a single matrix.

    Inputs:
    - imgs: N x H X W X C array of pixel data for N images.
    - feature_fns: List of k feature functions. The ith feature function should
      take as input an H x W x D array and return a (one-dimensional) array of
      length F_i.
    - verbose: Boolean; if true, print progress.

    Returns:
    An array of shape (N, F_1 + ... + F_k) where each column is the concatenation
    of all features for a single image.
    """
    num_images = imgs.shape[0]
    if num_images == 0:
        return np.array([])

    # Use the first image to determine feature dimensions
    feature_dims = []
    first_image_features = []
    for feature_fn in feature_fns:
        feats = feature_fn(imgs[0].squeeze())
        assert len(feats.shape) == 1, "Feature functions must be one-dimensional"
        feature_dims.append(feats.size)
        first_image_features.append(feats)

    # Now that we know the dimensions of the features, we can allocate a single
    # big array to store all features as columns.
    total_feature_dim = sum(feature_dims)
    imgs_features = np.zeros((num_images, total_feature_dim))
    imgs_features[0] = np.hstack(first_image_features).T

    # Extract features for the rest of the images.
    for i in range(1, num_images):
        idx = 0
        for feature_fn, feature_dim in zip(feature_fns, feature_dims):
            next_idx = idx + feature_dim
            imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze())
            idx = next_idx
        if verbose and i % 1000 == 999:
            print("Done extracting features for %d / %d images" % (i + 1, num_images))

    return imgs_features


def rgb2gray(rgb):
    """Convert RGB image to grayscale

      Parameters:
        rgb : RGB image

      Returns:
        gray : grayscale image

    """
    return np.dot(rgb[..., :3], [0.299, 0.587, 0.144])


def hog_feature(im):
    """Compute Histogram of Gradient (HOG) feature for an image

         Modified from skimage.feature.hog
         http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog

       Reference:
         Histograms of Oriented Gradients for Human Detection
         Navneet Dalal and Bill Triggs, CVPR 2005

      Parameters:
        im : an input grayscale or rgb image

      Returns:
        feat: Histogram of Gradient (HOG) feature

    """

    # convert rgb to grayscale if needed
    if im.ndim == 3:
        image = rgb2gray(im)
    else:
        image = np.at_least_2d(im)

    sx, sy = image.shape  # image size
    orientations = 9  # number of gradient bins
    cx, cy = (8, 8)  # pixels per cell

    gx = np.zeros(image.shape)
    gy = np.zeros(image.shape)
    gx[:, :-1] = np.diff(image, n=1, axis=1)  # compute gradient on x-direction
    gy[:-1, :] = np.diff(image, n=1, axis=0)  # compute gradient on y-direction
    grad_mag = np.sqrt(gx ** 2 + gy ** 2)  # gradient magnitude
    grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90  # gradient orientation

    n_cellsx = int(np.floor(sx / cx))  # number of cells in x
    n_cellsy = int(np.floor(sy / cy))  # number of cells in y
    # compute orientations integral images
    orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations))
    for i in range(orientations):
        # create new integral image for this orientation
        # isolate orientations in this range
        temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), grad_ori, 0)
        temp_ori = np.where(grad_ori >= 180 / orientations * i, temp_ori, 0)
        # select magnitudes for those orientations
        cond2 = temp_ori > 0
        temp_mag = np.where(cond2, grad_mag, 0)
        orientation_histogram[:, :, i] = uniform_filter(temp_mag, size=(cx, cy))[
            round(cx / 2) :: cx, round(cy / 2) :: cy
        ].T

    return orientation_histogram.ravel()


def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True):
    """
    Compute color histogram for an image using hue.

    Inputs:
    - im: H x W x C array of pixel data for an RGB image.
    - nbin: Number of histogram bins. (default: 10)
    - xmin: Minimum pixel value (default: 0)
    - xmax: Maximum pixel value (default: 255)
    - normalized: Whether to normalize the histogram (default: True)

    Returns:
      1D vector of length nbin giving the color histogram over the hue of the
      input image.
    """
    ndim = im.ndim
    bins = np.linspace(xmin, xmax, nbin + 1)
    hsv = matplotlib.colors.rgb_to_hsv(im / xmax) * xmax
    imhist, bin_edges = np.histogram(hsv[:, :, 0], bins=bins, density=normalized)
    imhist = imhist * np.diff(bin_edges)

    # return histogram
    return imhist


# ~~START DELETE~~
# These are some other features that we implemented to play around, but aren't
# distributing to students.
def color_histogram(im, nbin=10, xmin=0, xmax=255, normalized=True):
    """Compute color histogram feature for an image

      Parameters:
        im : a numpy array of grayscale or rgb image
        nbin : number of histogram bins (default: 10)
        xmin : minimum pixel value (default: 0)
        xmax : maximum pixel value (deafult: 255)
        normalized : bool flag to normalize the histogram

      Returns:
        feat : color histogram feature

    """
    ndim = im.ndim
    bins = np.linspace(xmin, xmax, nbin + 1)
    # grayscale image
    if ndim == 2:
        imhist, bin_edges = np.histogram(im, bins=bins, density=normalized)
        return imhist
    # rgb image
    elif ndim == 3:
        color_hist = np.array([])
        # loop through three color channels
        for k in range(3):
            # compute normalized histogram
            imhist, bin_edges = np.histogram(im[:, :, k], bins=bins, density=normalized)
            imhist = imhist * np.diff(bin_edges)
            # concatenate histogram
            color_hist = np.concatenate((color_hist, imhist))
        # return histogram
        return color_hist
    # unknown image type
    return np.array([])


def color_histogram_spatial(img, levels=3, nbin=4):
    """
    Color histogram over a pyramid.
    """
    feats = []

    for level in range(1, levels + 1):
        chunks = np.array_split(img, level, axis=0)
        chunks = [np.array_split(chunk, level, axis=1) for chunk in chunks]
        for x in chunks:
            for chunk in x:
                feats.append(color_histogram_cross(chunk, nbin=nbin))

    return np.hstack(feats)


def color_histogram_cross(img, nbin=5, normalized=True):
    """
    RGB color histogram where our bins are 3 dimensional.
    """
    height, width, channels = img.shape
    new_size = (height * width, channels)
    colors = np.reshape(img, new_size)
    return np.histogramdd(colors, bins=nbin, normed=normalized)[0].flatten()


# ~~END DELETE~~

 

Train Softmax classifier on features

# Use the validation set to tune the learning rate and regularization strength

from cs231n.classifiers.linear_classifier import Softmax

learning_rates = [2.5e-1, 1e-1, 7.5e-2, 5e-2]
regularization_strengths = [1e-3, 1e-2, 1e-4]

results = {}
best_val = -1
best_softmax = None

################################################################################
# TODO:                                                                        #
# Use the validation set to set the learning rate and regularization strength. #
# This should be identical to the validation that you did for the Softmax; save#
# the best trained classifer in best_softmax. If you carefully tune the model, #
# you should be able to get accuracy of above 0.42 on the validation set.      #
################################################################################

for lr in learning_rates:
  for reg in regularization_strengths:
   
    softmax = Softmax()
    softmax.train(X_train_feats, y_train, lr, reg, num_iters = 1000, batch_size = 200, verbose = True)
    
    y_train_pred = softmax.predict(X_train_feats)
    train_accuracy = np.mean(y_train == y_train_pred)

    y_val_pred = softmax.predict(X_val_feats)
    val_accuracy = np.mean(y_val == y_val_pred)
    results[(lr, reg)] = (train_accuracy, val_accuracy)

    if val_accuracy > best_val:
      best_val = val_accuracy
      best_softmax = softmax

# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print('lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy))

print('best validation accuracy achieved: %f' % best_val)
lr 5.000000e-02 reg 1.000000e-04 train accuracy: 0.522469 val accuracy: 0.518000
lr 5.000000e-02 reg 1.000000e-03 train accuracy: 0.522551 val accuracy: 0.511000
lr 5.000000e-02 reg 1.000000e-02 train accuracy: 0.521020 val accuracy: 0.514000
lr 7.500000e-02 reg 1.000000e-04 train accuracy: 0.522286 val accuracy: 0.515000
lr 7.500000e-02 reg 1.000000e-03 train accuracy: 0.522000 val accuracy: 0.520000
lr 7.500000e-02 reg 1.000000e-02 train accuracy: 0.519694 val accuracy: 0.517000
lr 1.000000e-01 reg 1.000000e-04 train accuracy: 0.524878 val accuracy: 0.524000
lr 1.000000e-01 reg 1.000000e-03 train accuracy: 0.521592 val accuracy: 0.518000
lr 1.000000e-01 reg 1.000000e-02 train accuracy: 0.517612 val accuracy: 0.515000
lr 2.500000e-01 reg 1.000000e-04 train accuracy: 0.516122 val accuracy: 0.505000
lr 2.500000e-01 reg 1.000000e-03 train accuracy: 0.515673 val accuracy: 0.507000
lr 2.500000e-01 reg 1.000000e-02 train accuracy: 0.508061 val accuracy: 0.506000
lr 5.000000e-01 reg 1.000000e-04 train accuracy: 0.501776 val accuracy: 0.494000
lr 5.000000e-01 reg 1.000000e-03 train accuracy: 0.502837 val accuracy: 0.491000
lr 5.000000e-01 reg 1.000000e-02 train accuracy: 0.491918 val accuracy: 0.470000
best validation accuracy achieved: 0.524000
# Evaluate your trained Softmax on the test set: you should be able to get at least 0.42
y_test_pred = best_softmax.predict(X_test_feats)
test_accuracy = np.mean(y_test == y_test_pred)
print(test_accuracy)
0.504

 

 

# An important way to gain intuition about how an algorithm works is to
# visualize the mistakes that it makes. In this visualization, we show examples
# of images that are misclassified by our current system. The first column
# shows images that our system labeled as "plane" but whose true label is
# something other than "plane".

examples_per_class = 8
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for cls, cls_name in enumerate(classes):
    idxs = np.where((y_test != cls) & (y_test_pred == cls))[0]
    idxs = np.random.choice(idxs, examples_per_class, replace=False)
    for i, idx in enumerate(idxs):
        plt.subplot(examples_per_class, len(classes), i * len(classes) + cls + 1)
        plt.imshow(X_test[idx].astype('uint8'))
        plt.axis('off')
        if i == 0:
            plt.title(cls_name)
plt.show()

 

 

Neural Network on image features

# Preprocessing: Remove the bias dimension
# Make sure to run this cell only ONCE
print(X_train_feats.shape)
X_train_feats = X_train_feats[:, :-1]
X_val_feats = X_val_feats[:, :-1]
X_test_feats = X_test_feats[:, :-1]

print(X_train_feats.shape)

from cs231n.classifiers.fc_net import TwoLayerNet
from cs231n.solver import Solver

input_dim = X_train_feats.shape[1]
hidden_dim = 500
num_classes = 10

data = {
    'X_train': X_train_feats,
    'y_train': y_train,
    'X_val': X_val_feats,
    'y_val': y_val,
    'X_test': X_test_feats,
    'y_test': y_test,
}

net = TwoLayerNet(input_dim, hidden_dim, num_classes)
best_net = None

################################################################################
# TODO: Train a two-layer neural network on image features. You may want to    #
# cross-validate various parameters as in previous sections. Store your best   #
# model in the best_net variable.                                              #
################################################################################

learning_rates = [2.5e-1, 1e-1, 7.5e-2, 5e-2]
regularization_strengths = [1e-3, 1e-2, 1e-4]

results = {}
best_val = -1
best_softmax = None

for lr in learning_rates:
  for reg in regularization_strengths:
   
    net = TwoLayerNet(input_dim, hidden_dim, num_classes, reg)
    solver = Solver(net, data,
                num_epochs=10, batch_size=200,
                update_rule='sgd',
                optim_config={
                  'learning_rate': lr,
                })
    
    solver.train()    
    # Compute validation set accuracy and append to the dictionary           
    results[(lr, reg)] = solver.best_val_acc
    
    # Save if validation accuracy is the best
    if results[(lr, reg)] > best_val:
      best_val = results[(lr, reg)]
      best_net = net

# Print out results.
for lr, reg in sorted(results):
    val_accuracy = results[(lr, reg)]
    print('lr %e reg %e val accuracy: %f' % (lr, reg, val_accuracy))
lr 5.000000e-02 reg 1.000000e-04 val accuracy: 0.536000
lr 5.000000e-02 reg 1.000000e-03 val accuracy: 0.535000
lr 5.000000e-02 reg 1.000000e-02 val accuracy: 0.569000
lr 7.500000e-02 reg 1.000000e-04 val accuracy: 0.548000
lr 7.500000e-02 reg 1.000000e-03 val accuracy: 0.559000
lr 7.500000e-02 reg 1.000000e-02 val accuracy: 0.595000
lr 1.000000e-01 reg 1.000000e-04 val accuracy: 0.579000
lr 1.000000e-01 reg 1.000000e-03 val accuracy: 0.590000
lr 1.000000e-01 reg 1.000000e-02 val accuracy: 0.609000
lr 2.500000e-01 reg 1.000000e-04 val accuracy: 0.595000
lr 2.500000e-01 reg 1.000000e-03 val accuracy: 0.595000
lr 2.500000e-01 reg 1.000000e-02 val accuracy: 0.604000
# Run your best neural net classifier on the test set. You should be able
# to get more than 58% accuracy. It is also possible to get >60% accuracy
# with careful tuning.

y_test_pred = np.argmax(best_net.loss(data['X_test']), axis=1)
test_acc = (y_test_pred == data['y_test']).mean()
print(test_acc)
0.585
728x90