机器学习实验5

首先尝试使用粒子群算法求解正则项的系数

import numpy as np
import matplotlib.pyplot as plt

def read_data(addr):
    data = np.loadtxt(addr, delimiter=',')
    n = data.shape[0]
    X = np.ones([n, 1])
    for i in range(8):
        for power in range(5):
            X = np.concatenate([X, np.expand_dims(np.power(data[:,i], power), axis=1)], axis=1)
    Y = None
    if "train" in addr:
        Y = np.expand_dims(data[:, -1], axis=1)
    return (X,Y,n)

# def cost_gradient(W, X, Y, n, lambd):  # L2 正则
#     Y_hat = 1 / (1 + np.exp(-X @ W))
#     cost_first_term = -np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)) / (2 * n)
#     reg_term = lambd / 2 * np.sum(W[1:] ** 2)
#     j = cost_first_term + reg_term
#     G = np.zeros_like(W)
#     G = (X.T @ (Y_hat - Y)) / n
#     G[1:] += lambd * W[1:]
#     return (j, G)

def cost_gradient(W, X, Y, n, lambd):  # L1 正则
    Y_hat = 1 / (1 + np.exp(-X @ W))
    cost_first_term = -np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)) / (2 * n)
    # L1 正则化项
    reg_term = lambd * np.sum(np.abs(W[1:]))
    j = cost_first_term + reg_term
    G = np.zeros_like(W)
    G = (X.T @ (Y_hat - Y)) / n
    # L1 正则化项的梯度
    G[1:] += lambd * np.sign(W[1:])
    return (j, G)

def train(W, X, Y, lr, n, iterations, lambd):
    J = np.zeros([iterations, 1])
    for i in range(iterations):
        (J[i], G) = cost_gradient(W, X, Y, n, lambd)
        W = W - lr*G
    err = error(W, X, Y)
    return (W,J,err)

def error(W, X, Y):
    Y_hat = 1 / (1 + np.exp(-X@W))
    Y_hat[Y_hat<0.5] = 0
    Y_hat[Y_hat>0.5] = 1
    return (1-np.mean(np.equal(Y_hat, Y)))

def predict(W):
    (X, _, _) = read_data("test_data.csv")
    Y_hat = 1 / (1 + np.exp(-X@W))
    Y_hat[Y_hat<0.5] = 0
    Y_hat[Y_hat>0.5] = 1
    idx = np.expand_dims(np.arange(1,201), axis=1)
    np.savetxt("predict.csv", np.concatenate([idx, Y_hat], axis=1), header = "Index,ID", comments='', delimiter=',')

class Particle:
    def __init__(self, num_dimensions):
        self.position = np.random.uniform(0, 0.05, size=num_dimensions)
        self.velocity = np.random.uniform(-0.001, 0.001, size=num_dimensions)
        self.best_position = self.position.copy()
        self.best_cost = float('inf')

    def update_position(self):
        self.position = self.position + self.velocity

    def update_best(self, cost_function):
        current_cost = cost_function(self.position)
        if current_cost < self.best_cost:
            self.best_position = self.position.copy()
            self.best_cost = current_cost

def particle_swarm_optimization(X, Y, n):
    num_particles = 10
    num_iterations = 30
    dimensions = 1
    particles = [Particle(dimensions) for _ in range(num_particles)]
    global_best_position = particles[0].best_position.copy()
    global_best_cost = float('inf')

    def cost_function(lambd):
        W = np.random.random([X.shape[1], 1])
        (W, J, err) = train(W, X, Y, 0.0001, n, 100000, lambd)
        return err

    for iteration in range(num_iterations):
        for particle in particles:
            # 更新粒子的速度和位置
            r1, r2 = np.random.random(), np.random.random()
            particle.velocity = 0.5 * particle.velocity + 1.5 * r1 * (particle.best_position - particle.position) + 1.5 * r2 * (global_best_position - particle.position)
            particle.update_position()
            # 更新个体最佳位置和全局最佳位置
            particle.update_best(cost_function)
            print("当前粒子位置：",particle.best_position)
            print("当前粒子误差：",particle.best_cost)
            if particle.best_cost < global_best_cost:
                global_best_position = particle.best_position.copy()
                global_best_cost = particle.best_cost

    return global_best_position[0], global_best_cost

(X, Y, n) = read_data("train.csv")
best_lambda, best_cost = particle_swarm_optimization(X, Y, n)
print(f"Best lambda: {best_lambda}, Best cost: {best_cost}")

知道了大概的位置后，尝试带入构造好的模型进行求解

# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt

def read_data(addr):
    data = np.loadtxt(addr, delimiter=',')

    n = data.shape[0]
    
    ###### You may modify this section to change the model
    X = np.ones([n, 1])
    for i in range(8):
        for power in range(5):
            X = np.concatenate([X, np.expand_dims(np.power(data[:,i], power), axis=1)], axis=1)
 
    Y = None
    if "train" in addr:
        Y = np.expand_dims(data[:, -1], axis=1)
    
    return (X,Y,n)

def cost_gradient(W, X, Y, n, lambd):  # L1 正
    Y_hat = 1 / (1 + np.exp(-X @ W))
    cost_first_term = -np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)) / (2 * n)
    # L1 正则化项
    reg_term = lambd * np.sum(np.abs(W[1:]))
    j = cost_first_term + reg_term
    G = np.zeros_like(W)
    G = (X.T @ (Y_hat - Y)) / n
    # L1 正则化项的梯度
    G[1:] += lambd * np.sign(W[1:])
    return (j, G)


def train(W, X, Y, lr, n, iterations, lambd):
    J = np.zeros([iterations, 1])
    
    for i in range(iterations):
        (J[i], G) = cost_gradient(W, X, Y, n, lambd)
        W = W - lr*G
    err = error(W, X, Y)
    
    return (W,J,err)

def error(W, X, Y):
    Y_hat = 1 / (1 + np.exp(-X@W))
    Y_hat[Y_hat<0.5] = 0
    Y_hat[Y_hat>0.5] = 1
    
    return (1-np.mean(np.equal(Y_hat, Y)))

def predict(W):
    (X, _, _) = read_data("test_data.csv")
    
    Y_hat = 1 / (1 + np.exp(-X@W))
    Y_hat[Y_hat<0.5] = 0
    Y_hat[Y_hat>0.5] = 1
    
    idx = np.expand_dims(np.arange(1,201), axis=1)
    np.savetxt("predict.csv", np.concatenate([idx, Y_hat], axis=1), header = "Index,ID", comments='', delimiter=',')
    
iterations = 100000   ###### Training loops
lr = 0.0001    ###### Learning rate
lambd = 0.0349708  # 用粒子群算法得到

(X, Y, n) = read_data("train.csv")
W = np.random.random([X.shape[1], 1])

(W,J,err) = train(W, X, Y, lr, n, iterations, lambd)
print(err)

plt.figure()
plt.plot(range(iterations), J)

predict(W)