机器学习实验5

首先尝试使用粒子群算法求解正则项的系数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import numpy as np
import matplotlib.pyplot as plt

def read_data(addr):
data = np.loadtxt(addr, delimiter=',')
n = data.shape[0]
X = np.ones([n, 1])
for i in range(8):
for power in range(5):
X = np.concatenate([X, np.expand_dims(np.power(data[:,i], power), axis=1)], axis=1)
Y = None
if "train" in addr:
Y = np.expand_dims(data[:, -1], axis=1)
return (X,Y,n)

# def cost_gradient(W, X, Y, n, lambd): # L2 正则
# Y_hat = 1 / (1 + np.exp(-X @ W))
# cost_first_term = -np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)) / (2 * n)
# reg_term = lambd / 2 * np.sum(W[1:] ** 2)
# j = cost_first_term + reg_term
# G = np.zeros_like(W)
# G = (X.T @ (Y_hat - Y)) / n
# G[1:] += lambd * W[1:]
# return (j, G)

def cost_gradient(W, X, Y, n, lambd): # L1 正则
Y_hat = 1 / (1 + np.exp(-X @ W))
cost_first_term = -np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)) / (2 * n)
# L1 正则化项
reg_term = lambd * np.sum(np.abs(W[1:]))
j = cost_first_term + reg_term
G = np.zeros_like(W)
G = (X.T @ (Y_hat - Y)) / n
# L1 正则化项的梯度
G[1:] += lambd * np.sign(W[1:])
return (j, G)

def train(W, X, Y, lr, n, iterations, lambd):
J = np.zeros([iterations, 1])
for i in range(iterations):
(J[i], G) = cost_gradient(W, X, Y, n, lambd)
W = W - lr*G
err = error(W, X, Y)
return (W,J,err)

def error(W, X, Y):
Y_hat = 1 / (1 + np.exp(-X@W))
Y_hat[Y_hat<0.5] = 0
Y_hat[Y_hat>0.5] = 1
return (1-np.mean(np.equal(Y_hat, Y)))

def predict(W):
(X, _, _) = read_data("test_data.csv")
Y_hat = 1 / (1 + np.exp(-X@W))
Y_hat[Y_hat<0.5] = 0
Y_hat[Y_hat>0.5] = 1
idx = np.expand_dims(np.arange(1,201), axis=1)
np.savetxt("predict.csv", np.concatenate([idx, Y_hat], axis=1), header = "Index,ID", comments='', delimiter=',')

class Particle:
def __init__(self, num_dimensions):
self.position = np.random.uniform(0, 0.05, size=num_dimensions)
self.velocity = np.random.uniform(-0.001, 0.001, size=num_dimensions)
self.best_position = self.position.copy()
self.best_cost = float('inf')

def update_position(self):
self.position = self.position + self.velocity

def update_best(self, cost_function):
current_cost = cost_function(self.position)
if current_cost < self.best_cost:
self.best_position = self.position.copy()
self.best_cost = current_cost

def particle_swarm_optimization(X, Y, n):
num_particles = 10
num_iterations = 30
dimensions = 1
particles = [Particle(dimensions) for _ in range(num_particles)]
global_best_position = particles[0].best_position.copy()
global_best_cost = float('inf')

def cost_function(lambd):
W = np.random.random([X.shape[1], 1])
(W, J, err) = train(W, X, Y, 0.0001, n, 100000, lambd)
return err

for iteration in range(num_iterations):
for particle in particles:
# 更新粒子的速度和位置
r1, r2 = np.random.random(), np.random.random()
particle.velocity = 0.5 * particle.velocity + 1.5 * r1 * (particle.best_position - particle.position) + 1.5 * r2 * (global_best_position - particle.position)
particle.update_position()
# 更新个体最佳位置和全局最佳位置
particle.update_best(cost_function)
print("当前粒子位置:",particle.best_position)
print("当前粒子误差:",particle.best_cost)
if particle.best_cost < global_best_cost:
global_best_position = particle.best_position.copy()
global_best_cost = particle.best_cost

return global_best_position[0], global_best_cost

(X, Y, n) = read_data("train.csv")
best_lambda, best_cost = particle_swarm_optimization(X, Y, n)
print(f"Best lambda: {best_lambda}, Best cost: {best_cost}")

知道了大概的位置后,尝试带入构造好的模型进行求解

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt

def read_data(addr):
data = np.loadtxt(addr, delimiter=',')

n = data.shape[0]

###### You may modify this section to change the model
X = np.ones([n, 1])
for i in range(8):
for power in range(5):
X = np.concatenate([X, np.expand_dims(np.power(data[:,i], power), axis=1)], axis=1)

Y = None
if "train" in addr:
Y = np.expand_dims(data[:, -1], axis=1)

return (X,Y,n)

def cost_gradient(W, X, Y, n, lambd): # L1 正
Y_hat = 1 / (1 + np.exp(-X @ W))
cost_first_term = -np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)) / (2 * n)
# L1 正则化项
reg_term = lambd * np.sum(np.abs(W[1:]))
j = cost_first_term + reg_term
G = np.zeros_like(W)
G = (X.T @ (Y_hat - Y)) / n
# L1 正则化项的梯度
G[1:] += lambd * np.sign(W[1:])
return (j, G)


def train(W, X, Y, lr, n, iterations, lambd):
J = np.zeros([iterations, 1])

for i in range(iterations):
(J[i], G) = cost_gradient(W, X, Y, n, lambd)
W = W - lr*G
err = error(W, X, Y)

return (W,J,err)

def error(W, X, Y):
Y_hat = 1 / (1 + np.exp(-X@W))
Y_hat[Y_hat<0.5] = 0
Y_hat[Y_hat>0.5] = 1

return (1-np.mean(np.equal(Y_hat, Y)))

def predict(W):
(X, _, _) = read_data("test_data.csv")

Y_hat = 1 / (1 + np.exp(-X@W))
Y_hat[Y_hat<0.5] = 0
Y_hat[Y_hat>0.5] = 1

idx = np.expand_dims(np.arange(1,201), axis=1)
np.savetxt("predict.csv", np.concatenate([idx, Y_hat], axis=1), header = "Index,ID", comments='', delimiter=',')

iterations = 100000 ###### Training loops
lr = 0.0001 ###### Learning rate
lambd = 0.0349708 # 用粒子群算法得到

(X, Y, n) = read_data("train.csv")
W = np.random.random([X.shape[1], 1])

(W,J,err) = train(W, X, Y, lr, n, iterations, lambd)
print(err)

plt.figure()
plt.plot(range(iterations), J)

predict(W)

最后效果大概在误差 0.035 左右,公榜正确率为 0.96


机器学习实验5
http://jrhu0048.github.io/2024/10/10/ji-qi-xue-xi/ji-qi-xue-xi-shi-yan-5/
作者
JR.HU
发布于
2024年10月10日
更新于
2024年10月15日
许可协议