云栈社区»论坛 › 站务中心「 Forum Service 」 › 深度学习入门精讲：从神经元到CNN/RNN实战（Python实现） ...

发回帖发新帖

2262 积分	0 好友	295 主题

发消息

深度学习入门精讲：从神经元到CNN/RNN实战（Python实现）

发表于昨天 07:31 | 查看: 2| 回复: 0

一、什么是深度学习？

1. 深度学习的定义

深度学习是机器学习的一个子领域，它通过模拟人脑的神经网络结构，使用多层非线性变换来学习数据的层次化表示。其核心思想是让机器自动学习特征表示，而不是依赖人工设计的特征。

# 深度学习与传统机器学习的对比
import matplotlib.pyplot as plt

# 可视化对比
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# 传统机器学习
axes[0].text(0.5, 0.5, '特征工程 → 模型训练\n\n人工特征设计\n+ 传统算法\n= 有限表现力', 
             ha='center', va='center', fontsize=12, fontweight='bold')
axes[0].set_title('传统机器学习', fontsize=14, fontweight='bold')
axes[0].axis('off')

# 深度学习
axes[1].text(0.5, 0.5, '原始数据 → 深度学习\n\n自动特征学习\n+ 多层神经网络\n= 强大表现力', 
             ha='center', va='center', fontsize=12, fontweight='bold')
axes[1].set_title('深度学习', fontsize=14, fontweight='bold')
axes[1].axis('off')

plt.suptitle('传统机器学习 vs 深度学习', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

2. 深度学习的历史里程碑

# 深度学习发展时间线
timeline_data = {
    '1943': 'McCulloch & Pitts提出人工神经元模型',
    '1958': 'Frank Rosenblatt发明感知机',
    '1969': 'Minsky & Papert指出感知机的局限性',
    '1986': '反向传播算法重新被发现',
    '1998': 'Yann LeCun提出LeNet-5（卷积神经网络）',
    '2006': 'Geoffrey Hinton提出深度信念网络',
    '2012': 'AlexNet在ImageNet比赛中大获成功',
    '2014': '生成对抗网络（GAN）被提出',
    '2015': 'ResNet解决深度网络训练难题',
    '2017': 'Transformer架构革命性突破',
    '2020': 'GPT-3展现强大语言能力',
    '2022': '扩散模型引领图像生成革命'
}

print("深度学习发展里程碑:")
for year, event in timeline_data.items():
    print(f"{year}: {event}")

二、神经网络基础

1. 人工神经元模型

import numpy as np

class ArtificialNeuron:
    """实现基本的人工神经元"""

    def __init__(self, input_size, activation='sigmoid'):
        """
        初始化神经元
        Args:
            input_size: 输入特征数量
            activation: 激活函数类型
        """
        # 初始化权重和偏置
        self.weights = np.random.randn(input_size) * 0.1
        self.bias = np.random.randn() * 0.1
        self.activation_type = activation

    def activate(self, x):
        """前向传播计算"""
        # 线性组合
        z = np.dot(x, self.weights) + self.bias

        # 应用激活函数
        if self.activation_type == 'sigmoid':
            return self._sigmoid(z)
        elif self.activation_type == 'relu':
            return self._relu(z)
        elif self.activation_type == 'tanh':
            return self._tanh(z)
        else:
            return z  # 线性激活

    def _sigmoid(self, x):
        """Sigmoid激活函数"""
        return 1 / (1 + np.exp(-x))

    def _relu(self, x):
        """ReLU激活函数"""
        return np.maximum(0, x)

    def _tanh(self, x):
        """Tanh激活函数"""
        return np.tanh(x)

    def __call__(self, x):
        """使神经元可调用"""
        return self.activate(x)

# 使用示例
neuron = ArtificialNeuron(3, activation='sigmoid')
inputs = np.array([0.5, -0.2, 0.8])
output = neuron(inputs)
print(f"神经元输出: {output:.4f}")

2. 常用激活函数及其特性

import numpy as np
import matplotlib.pyplot as plt

def plot_activation_functions():
    """绘制常用激活函数"""

    x = np.linspace(-5, 5, 100)

    # 定义激活函数
    functions = {
        'Sigmoid': lambda x: 1 / (1 + np.exp(-x)),
        'ReLU': lambda x: np.maximum(0, x),
        'Leaky ReLU': lambda x: np.where(x > 0, x, 0.01 * x),
        'Tanh': lambda x: np.tanh(x),
        'Swish': lambda x: x * (1 / (1 + np.exp(-x))),
        'ELU': lambda x: np.where(x > 0, x, np.exp(x) - 1),
        'Softplus': lambda x: np.log(1 + np.exp(x))
    }

    # 创建子图
    fig, axes = plt.subplots(2, 4, figsize=(15, 8))
    axes = axes.ravel()

    for idx, (name, func) in enumerate(functions.items()):
        ax = axes[idx]
        y = func(x)

        ax.plot(x, y, 'b-', linewidth=2)
        ax.axhline(y=0, color='k', linestyle='-', alpha=0.3)
        ax.axvline(x=0, color='k', linestyle='-', alpha=0.3)
        ax.grid(True, alpha=0.3)
        ax.set_title(name, fontsize=12, fontweight='bold')

        # 添加特性标注
        if name == 'Sigmoid':
            ax.text(0, 0.5, '输出范围: (0,1)\n平滑、可微\n存在梯度消失问题', 
                    fontsize=9, ha='center')
        elif name == 'ReLU':
            ax.text(0, 2.5, '稀疏激活\n计算高效\n存在"死神经元"问题', 
                    fontsize=9, ha='center')

    # 调整布局
    plt.tight_layout()
    plt.suptitle('常用激活函数', fontsize=16, fontweight='bold', y=1.02)
    plt.show()

# 绘制激活函数
plot_activation_functions()

三、深度学习架构类型

1. 前馈神经网络（FNN）

class FeedForwardNeuralNetwork:
    """实现简单的前馈神经网络"""

    def __init__(self, layer_sizes, activations=None):
        """
        初始化神经网络
        Args:
            layer_sizes: 每层神经元数量列表，如[10, 20, 5, 1]
            activations: 每层的激活函数列表
        """
        self.layer_sizes = layer_sizes
        self.num_layers = len(layer_sizes) - 1

        # 初始化权重和偏置
        self.weights = []
        self.biases = []

        for i in range(self.num_layers):
            # Xavier/Glorot初始化
            scale = np.sqrt(2.0 / (layer_sizes[i] + layer_sizes[i+1]))
            w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * scale
            b = np.zeros(layer_sizes[i+1])

            self.weights.append(w)
            self.biases.append(b)

        # 设置激活函数
        if activations is None:
            activations = ['relu'] * (self.num_layers - 1) + ['sigmoid']
        self.activations = activations

    def forward(self, X):
        """前向传播"""
        self.activations_history = [X]
        self.z_history = []

        a = X
        for i in range(self.num_layers):
            z = np.dot(a, self.weights[i]) + self.biases[i]
            self.z_history.append(z)

            if self.activations[i] == 'sigmoid':
                a = 1 / (1 + np.exp(-z))
            elif self.activations[i] == 'relu':
                a = np.maximum(0, z)
            elif self.activations[i] == 'tanh':
                a = np.tanh(z)
            else:
                a = z  # 线性激活

            self.activations_history.append(a)

        return a

    def predict(self, X):
        """预测"""
        return self.forward(X)

    def summary(self):
        """打印网络结构信息"""
        print("=" * 50)
        print("神经网络结构摘要")
        print("=" * 50)
        print(f"层数: {self.num_layers}")
        print(f"神经元配置: {self.layer_sizes}")
        print(f"激活函数: {self.activations}")

        total_params = 0
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            params = w.size + b.size
            total_params += params
            print(f"层 {i+1}: {w.shape[0]} → {w.shape[1]} "
                  f"(权重: {w.shape}, 偏置: {b.shape}) "
                  f"参数数: {params:,}")

        print(f"总参数数: {total_params:,}")
        print("=" * 50)

# 创建并测试神经网络
nn = FeedForwardNeuralNetwork(
    layer_sizes=[10, 20, 15, 1],
    activations=['relu', 'relu', 'sigmoid']
)

nn.summary()

# 测试前向传播
X_test = np.random.randn(5, 10)  # 5个样本，10个特征
output = nn.predict(X_test)
print(f"\n输入形状: {X_test.shape}")
print(f"输出形状: {output.shape}")

2. 卷积神经网络（CNN）

class SimpleCNN:
    """实现简化的卷积神经网络"""

    def __init__(self, input_shape=(28, 28, 1)):
        """初始化CNN"""
        self.input_shape = input_shape
        self.layers = []

    def add_conv_layer(self, filters=32, kernel_size=3, activation='relu'):
        """添加卷积层"""
        layer_info = {
            'type': 'conv',
            'filters': filters,
            'kernel_size': kernel_size,
            'activation': activation
        }
        self.layers.append(layer_info)
        return self

    def add_pooling_layer(self, pool_size=2, stride=2):
        """添加池化层"""
        layer_info = {
            'type': 'pool',
            'pool_size': pool_size,
            'stride': stride
        }
        self.layers.append(layer_info)
        return self

    def add_dense_layer(self, units, activation='relu'):
        """添加全连接层"""
        layer_info = {
            'type': 'dense',
            'units': units,
            'activation': activation
        }
        self.layers.append(layer_info)
        return self

    def add_flatten_layer(self):
        """添加展平层"""
        self.layers.append({'type': 'flatten'})
        return self

    def forward(self, X):
        """前向传播（简化版）"""
        # 模拟卷积操作
        output = X

        for layer in self.layers:
            if layer['type'] == 'conv':
                # 简化的卷积操作
                output = self._conv_forward(output, layer)
            elif layer['type'] == 'pool':
                # 简化的池化操作
                output = self._pool_forward(output, layer)
            elif layer['type'] == 'flatten':
                # 展平操作
                output = output.reshape(output.shape[0], -1)
            elif layer['type'] == 'dense':
                # 全连接层
                output = self._dense_forward(output, layer)

        return output

    def _conv_forward(self, X, layer_params):
        """简化的卷积前向传播"""
        # 在实际实现中，这里会有实际的卷积计算
        # 这里我们返回一个简化的结果
        batch_size, height, width, channels = X.shape
        filters = layer_params['filters']

        # 简化的输出形状计算
        kernel_size = layer_params['kernel_size']
        output_height = height - kernel_size + 1
        output_width = width - kernel_size + 1

        return np.random.randn(batch_size, output_height, output_width, filters)

    def _pool_forward(self, X, layer_params):
        """简化的池化前向传播"""
        batch_size, height, width, channels = X.shape
        pool_size = layer_params['pool_size']

        # 简化的输出形状计算
        output_height = height // pool_size
        output_width = width // pool_size

        return np.random.randn(batch_size, output_height, output_width, channels)

    def _dense_forward(self, X, layer_params):
        """简化的全连接前向传播"""
        units = layer_params['units']
        activation = layer_params['activation']

        # 线性变换
        output = np.random.randn(X.shape[0], units)

        # 激活函数
        if activation == 'relu':
            output = np.maximum(0, output)
        elif activation == 'sigmoid':
            output = 1 / (1 + np.exp(-output))

        return output

    def summary(self):
        """打印网络结构信息"""
        print("=" * 60)
        print("卷积神经网络结构摘要")
        print("=" * 60)
        print(f"输入形状: {self.input_shape}")

        current_shape = self.input_shape

        for i, layer in enumerate(self.layers):
            layer_type = layer['type'].upper()

            if layer_type == 'CONV':
                filters = layer['filters']
                kernel_size = layer['kernel_size']
                print(f"层 {i+1}: 卷积层 "
                      f"({current_shape[0]}x{current_shape[1]}x{current_shape[2]}) → "
                      f"过滤器: {filters}, 核大小: {kernel_size}x{kernel_size}")

                # 更新形状（简化计算）
                current_shape = (
                    current_shape[0] - kernel_size + 1,
                    current_shape[1] - kernel_size + 1,
                    filters
                )

            elif layer_type == 'POOL':
                pool_size = layer['pool_size']
                print(f"层 {i+1}: 池化层 "
                      f"({current_shape[0]}x{current_shape[1]}x{current_shape[2]}) → "
                      f"池化大小: {pool_size}x{pool_size}")

                # 更新形状
                current_shape = (
                    current_shape[0] // pool_size,
                    current_shape[1] // pool_size,
                    current_shape[2]
                )

            elif layer_type == 'FLATTEN':
                flattened_size = np.prod(current_shape)
                print(f"层 {i+1}: 展平层 "
                      f"({current_shape[0]}x{current_shape[1]}x{current_shape[2]}) → "
                      f"({flattened_size},)")

                current_shape = (flattened_size,)

            elif layer_type == 'DENSE':
                units = layer['units']
                print(f"层 {i+1}: 全连接层 "
                      f"({current_shape[0] if isinstance(current_shape, tuple) else current_shape} ) → "
                      f"({units},)")

                current_shape = (units,)

        print("=" * 60)

# 创建并测试CNN
cnn = SimpleCNN(input_shape=(28, 28, 1))
cnn.add_conv_layer(filters=32, kernel_size=3, activation='relu')
cnn.add_pooling_layer(pool_size=2)
cnn.add_conv_layer(filters=64, kernel_size=3, activation='relu')
cnn.add_pooling_layer(pool_size=2)
cnn.add_flatten_layer()
cnn.add_dense_layer(units=128, activation='relu')
cnn.add_dense_layer(units=10, activation='softmax')

cnn.summary()

3. 循环神经网络（RNN）

class SimpleRNN:
    """实现简化的循环神经网络"""

    def __init__(self, input_size, hidden_size, output_size):
        """初始化RNN"""
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # 初始化参数
        # 输入到隐藏层的权重
        self.W_xh = np.random.randn(input_size, hidden_size) * 0.01
        # 隐藏层到隐藏层的权重
        self.W_hh = np.random.randn(hidden_size, hidden_size) * 0.01
        # 隐藏层到输出层的权重
        self.W_hy = np.random.randn(hidden_size, output_size) * 0.01

        # 偏置项
        self.b_h = np.zeros(hidden_size)
        self.b_y = np.zeros(output_size)

        # 缓存
        self.history = {}

    def forward(self, X):
        """
        前向传播
        Args:
            X: 输入序列，形状为(seq_length, batch_size, input_size)
        Returns:
            输出序列
        """
        seq_length, batch_size, _ = X.shape

        # 初始化隐藏状态
        h = np.zeros((batch_size, self.hidden_size))

        # 存储历史值
        self.history['h_states'] = [h]
        self.history['inputs'] = X

        # 存储输出
        outputs = []

        for t in range(seq_length):
            # 当前时间步的输入
            x_t = X[t]

            # 更新隐藏状态
            h = np.tanh(np.dot(x_t, self.W_xh) + np.dot(h, self.W_hh) + self.b_h)
            self.history['h_states'].append(h)

            # 计算输出
            y_t = np.dot(h, self.W_hy) + self.b_y
            outputs.append(y_t)

        # 堆叠所有时间步的输出
        return np.stack(outputs)

    def backward(self, d_outputs):
        """反向传播（简化版）"""
        # 在实际实现中，这里会有详细的反向传播计算
        # 这里我们返回梯度的简化版本
        gradients = {
            'dW_xh': np.random.randn(*self.W_xh.shape) * 0.01,
            'dW_hh': np.random.randn(*self.W_hh.shape) * 0.01,
            'dW_hy': np.random.randn(*self.W_hy.shape) * 0.01,
            'db_h': np.random.randn(*self.b_h.shape) * 0.01,
            'db_y': np.random.randn(*self.b_y.shape) * 0.01
        }

        return gradients

    def summary(self):
        """打印网络结构信息"""
        print("=" * 50)
        print("循环神经网络结构摘要")
        print("=" * 50)
        print(f"输入大小: {self.input_size}")
        print(f"隐藏层大小: {self.hidden_size}")
        print(f"输出大小: {self.output_size}")
        print(f"总参数数: {self.W_xh.size + self.W_hh.size + self.W_hy.size + self.b_h.size + self.b_y.size:,}")
        print("=" * 50)

    def generate_sequence(self, seed, length=20):
        """生成序列（简化版）"""
        # 在实际实现中，这里会有序列生成逻辑
        generated = [seed]
        h = np.zeros((1, self.hidden_size))

        for i in range(length):
            # 简化的序列生成
            x = generated[-1]
            h = np.tanh(np.dot(x, self.W_xh) + np.dot(h, self.W_hh) + self.b_h)
            y = np.dot(h, self.W_hy) + self.b_y

            # 添加一些随机性
            next_item = y + np.random.randn(*y.shape) * 0.1
            generated.append(next_item)

        return np.stack(generated)

# 创建并测试RNN
rnn = SimpleRNN(input_size=10, hidden_size=20, output_size=5)
rnn.summary()

# 测试前向传播
seq_length = 15
batch_size = 8
X_test = np.random.randn(seq_length, batch_size, 10)
output = rnn.forward(X_test)

print(f"\n输入形状: {X_test.shape}")
print(f"输出形状: {output.shape}")

四、深度学习框架比较

1. 主要框架对比

import pandas as pd

# 创建框架对比表格
frameworks_data = {
    '框架': ['TensorFlow', 'PyTorch', 'Keras', 'MXNet', 'JAX', 'PaddlePaddle'],
    '发布年份': [2015, 2016, 2015, 2015, 2018, 2016],
    '开发者': ['Google', 'Facebook', 'François Chollet', 'Amazon', 'Google', '百度'],
    '主要语言': ['Python/C++', 'Python/C++', 'Python', 'Python/C++', 'Python', 'Python'],
    '易用性': [3, 5, 5, 3, 4, 4],
    '灵活性': [5, 5, 3, 5, 5, 4],
    '部署能力': [5, 4, 3, 5, 4, 5],
    '社区规模': [5, 5, 4, 3, 3, 3],
    '主要优势': [
        '生产部署、生态完善',
        '动态图、研究友好',
        'API简洁、快速原型',
        '分布式训练、多语言',
        '函数式、自动微分',
        '中文文档、国产框架'
    ]
}

frameworks_df = pd.DataFrame(frameworks_data)

# 格式化输出
print("深度学习框架对比")
print("=" * 100)
print(frameworks_df.to_string(index=False))
print("\n" + "=" * 100)

# 创建选择指南
print("\n选择指南:")
print("1. TensorFlow: 适合生产部署，企业级应用，需要强大生态系统")
print("2. PyTorch: 适合学术研究，快速原型开发，动态计算图")
print("3. Keras: 适合初学者，快速上手，高级API")
print("4. MXNet: 适合分布式训练，多语言支持")
print("5. JAX: 适合函数式编程，数值计算，自动微分")
print("6. PaddlePaddle: 国产框架，中文文档丰富，工业级应用")

2. 框架安装与简单示例

def get_framework_setup_guide():
    """获取框架安装和使用指南"""

    guides = {
        'TensorFlow': {
            '安装': 'pip install tensorflow',
            '导入': 'import tensorflow as tf',
            '简单示例': '''# 创建简单的神经网络
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])''',
            '最新版本': '2.x (2023年)'
        },
        'PyTorch': {
            '安装': 'pip install torch torchvision',
            '导入': 'import torch',
            '简单示例': '''# 创建简单的神经网络
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(784, 64)
        self.fc2 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x''',
            '最新版本': '2.0+ (2023年)'
        },
        'Keras': {
            '安装': 'pip install keras',
            '导入': 'from keras import layers, models',
            '简单示例': '''# 创建简单的神经网络
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(784,)))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])''',
            '最新版本': '3.0 (2023年)'
        }
    }

    return guides

# 打印框架指南
guides = get_framework_setup_guide()
for framework, info in guides.items():
    print(f"\n{'='*60}")
    print(f"{framework} 快速指南")
    print(f"{'='*60}")
    for key, value in info.items():
        print(f"{key}: {value}")

五、深度学习的关键概念

1. 损失函数

import numpy as np
import matplotlib.pyplot as plt

class LossFunctions:
    """实现常见的损失函数"""

    @staticmethod
    def mse(y_true, y_pred):
        """均方误差 (Mean Squared Error)"""
        return np.mean((y_true - y_pred) ** 2)

    @staticmethod
    def mae(y_true, y_pred):
        """平均绝对误差 (Mean Absolute Error)"""
        return np.mean(np.abs(y_true - y_pred))

    @staticmethod
    def binary_crossentropy(y_true, y_pred, epsilon=1e-7):
        """二分类交叉熵"""
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

    @staticmethod
    def categorical_crossentropy(y_true, y_pred, epsilon=1e-7):
        """多分类交叉熵"""
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(np.sum(y_true * np.log(y_pred), axis=-1))

    @staticmethod
    def huber_loss(y_true, y_pred, delta=1.0):
        """Huber损失 (结合MSE和MAE的优点)"""
        error = y_true - y_pred
        abs_error = np.abs(error)

        quadratic = np.minimum(abs_error, delta)
        linear = abs_error - quadratic

        return np.mean(0.5 * quadratic ** 2 + delta * linear)

    @staticmethod
    def contrastive_loss(y_true, y_pred, margin=1.0):
        """对比损失 (用于度量学习)"""
        positive_distance = y_true * y_pred ** 2
        negative_distance = (1 - y_true) * np.maximum(margin - y_pred, 0) ** 2

        return np.mean(positive_distance + negative_distance)

    @staticmethod
    def visualize_loss_functions():
        """可视化损失函数"""
        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        axes = axes.ravel()

        x = np.linspace(-3, 3, 100)
        y_true = 0  # 假设真实值为0

        # MSE
        y_pred = x
        loss = (y_true - y_pred) ** 2
        axes[0].plot(x, loss, 'b-', linewidth=2)
        axes[0].set_title('MSE (均方误差)', fontweight='bold')
        axes[0].set_xlabel('预测值')
        axes[0].set_ylabel('损失')
        axes[0].grid(True, alpha=0.3)

        # MAE
        loss = np.abs(y_true - y_pred)
        axes[1].plot(x, loss, 'r-', linewidth=2)
        axes[1].set_title('MAE (平均绝对误差)', fontweight='bold')
        axes[1].set_xlabel('预测值')
        axes[1].grid(True, alpha=0.3)

        # Huber Loss
        delta = 1.0
        loss = np.where(np.abs(x) <= delta, 0.5 * x ** 2, delta * (np.abs(x) - 0.5 * delta))
        axes[2].plot(x, loss, 'g-', linewidth=2)
        axes[2].set_title('Huber损失', fontweight='bold')
        axes[2].set_xlabel('预测值')
        axes[2].grid(True, alpha=0.3)

        # Binary Crossentropy (假设真实值为1)
        y_true_binary = 1
        y_pred_binary = 1 / (1 + np.exp(-x))  # Sigmoid变换
        loss = - (y_true_binary * np.log(y_pred_binary) + 
                 (1 - y_true_binary) * np.log(1 - y_pred_binary))
        axes[3].plot(x, loss, 'm-', linewidth=2)
        axes[3].set_title('二分类交叉熵 (y_true=1)', fontweight='bold')
        axes[3].set_xlabel('预测值 (z)')
        axes[3].grid(True, alpha=0.3)

        # 对比损失
        distance = np.abs(x)
        margin = 1.0
        y_true_contrastive = np.ones_like(x)  # 假设是正样本对
        loss = y_true_contrastive * distance ** 2
        axes[4].plot(x, loss, 'c-', linewidth=2, label='正样本')

        y_true_contrastive = np.zeros_like(x)  # 假设是负样本对
        loss = (1 - y_true_contrastive) * np.maximum(margin - distance, 0) ** 2
        axes[4].plot(x, loss, 'y-', linewidth=2, label='负样本')
        axes[4].set_title('对比损失', fontweight='bold')
        axes[4].set_xlabel('距离')
        axes[4].legend()
        axes[4].grid(True, alpha=0.3)

        # 损失函数应用场景
        axes[5].axis('off')
        axes[5].text(0.5, 0.5, 
                    '损失函数选择指南:\n\n'
                    '• MSE: 回归问题，对异常值敏感\n'
                    '• MAE: 回归问题，对异常值鲁棒\n'
                    '• Huber: 结合MSE和MAE的优点\n'
                    '• Binary CE: 二分类问题\n'
                    '• Categorical CE: 多分类问题\n'
                    '• Contrastive: 度量学习，相似度计算',
                    ha='center', va='center', fontsize=11,
                    bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

        plt.suptitle('深度学习常用损失函数', fontsize=16, fontweight='bold', y=1.02)
        plt.tight_layout()
        plt.show()

# 测试损失函数
loss_funcs = LossFunctions()

# 测试数据
y_true = np.array([1, 0, 1, 0])
y_pred = np.array([0.9, 0.2, 0.8, 0.3])

print("损失函数计算结果:")
print(f"MSE: {loss_funcs.mse(y_true, y_pred):.4f}")
print(f"MAE: {loss_funcs.mae(y_true, y_pred):.4f}")
print(f"Binary Crossentropy: {loss_funcs.binary_crossentropy(y_true, y_pred):.4f}")

# 可视化损失函数
LossFunctions.visualize_loss_functions()

2. 优化器

class OptimizerComparison:
    """优化器比较和可视化"""

    @staticmethod
    def visualize_optimization_path():
        """可视化不同优化器的优化路径"""

        # 定义测试函数 (Rosenbrock函数，有全局最小值)
        def rosenbrock(x, y):
            return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2

        # 生成网格
        x = np.linspace(-2, 2, 100)
        y = np.linspace(-1, 3, 100)
        X, Y = np.meshgrid(x, y)
        Z = rosenbrock(X, Y)

        # 优化器模拟
        optimizers = {
            'SGD': {
                'lr': 0.01,
                'momentum': 0.0
            },
            'SGD with Momentum': {
                'lr': 0.01,
                'momentum': 0.9
            },
            'Adam': {
                'lr': 0.01,
                'beta1': 0.9,
                'beta2': 0.999
            },
            'RMSprop': {
                'lr': 0.01,
                'rho': 0.9
            },
            'Adagrad': {
                'lr': 0.1
            }
        }

        # 创建图形
        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        axes = axes.ravel()

        for idx, (name, params) in enumerate(optimizers.items()):
            ax = axes[idx]

            # 绘制等高线
            ax.contour(X, Y, Z, levels=np.logspace(-1, 3, 20), alpha=0.5)

            # 初始化参数
            x_pos, y_pos = -1.5, 2.5

            # 存储轨迹
            trajectory = [(x_pos, y_pos)]

            # 模拟优化过程
            for step in range(100):
                # 计算梯度
                grad_x = -2 * (1 - x_pos) - 400 * x_pos * (y_pos - x_pos ** 2)
                grad_y = 200 * (y_pos - x_pos ** 2)

                # 应用不同优化器更新规则
                if name == 'SGD':
                    x_pos -= params['lr'] * grad_x
                    y_pos -= params['lr'] * grad_y

                elif name == 'SGD with Momentum':
                    # 简化的动量实现
                    if step == 0:
                        vx, vy = 0, 0

                    vx = params['momentum'] * vx + params['lr'] * grad_x
                    vy = params['momentum'] * vy + params['lr'] * grad_y

                    x_pos -= vx
                    y_pos -= vy

                elif name == 'Adam':
                    # 简化的Adam实现
                    if step == 0:
                        m1x, m1y = 0, 0
                        m2x, m2y = 0, 0

                    m1x = params['beta1'] * m1x + (1 - params['beta1']) * grad_x
                    m1y = params['beta1'] * m1y + (1 - params['beta1']) * grad_y

                    m2x = params['beta2'] * m2x + (1 - params['beta2']) * grad_x ** 2
                    m2y = params['beta2'] * m2y + (1 - params['beta2']) * grad_y ** 2

                    # 偏置校正
                    m1x_hat = m1x / (1 - params['beta1'] ** (step + 1))
                    m1y_hat = m1y / (1 - params['beta1'] ** (step + 1))
                    m2x_hat = m2x / (1 - params['beta2'] ** (step + 1))
                    m2y_hat = m2y / (1 - params['beta2'] ** (step + 1))

                    x_pos -= params['lr'] * m1x_hat / (np.sqrt(m2x_hat) + 1e-8)
                    y_pos -= params['lr'] * m1y_hat / (np.sqrt(m2y_hat) + 1e-8)

                trajectory.append((x_pos, y_pos))

            # 绘制轨迹
            trajectory = np.array(trajectory)
            ax.plot(trajectory[:, 0], trajectory[:, 1], 'ro-', linewidth=2, markersize=3)
            ax.plot(trajectory[0, 0], trajectory[0, 1], 'go', markersize=8, label='起点')
            ax.plot(trajectory[-1, 0], trajectory[-1, 1], 'bo', markersize=8, label='终点')

            ax.set_title(name, fontweight='bold')
            ax.set_xlabel('x')
            ax.set_ylabel('y')
            ax.legend()
            ax.grid(True, alpha=0.3)

        # 优化器选择指南
        axes[5].axis('off')
        axes[5].text(0.5, 0.5, 
                    '优化器选择指南:\n\n'
                    '• SGD: 简单，收敛慢，可能震荡\n'
                    '• SGD+Momentum: 减少震荡，加速收敛\n'
                    '• Adam: 自适应学习率，通常表现好\n'
                    '• RMSprop: 适合非平稳目标\n'
                    '• Adagrad: 适合稀疏数据\n\n'
                    '一般推荐: Adam (默认选择)',
                    ha='center', va='center', fontsize=11,
                    bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))

        plt.suptitle('优化器比较：在Rosenbrock函数上的优化路径', 
                    fontsize=16, fontweight='bold', y=1.02)
        plt.tight_layout()
        plt.show()

    @staticmethod
    def optimizer_summary():
        """优化器特性总结"""

        optimizers_info = {
            'SGD': {
                '公式': 'θ = θ - η·∇J(θ)',
                '优点': '简单，理论基础强',
                '缺点': '收敛慢，易震荡',
                '适用场景': '凸优化问题'
            },
            'Momentum': {
                '公式': 'v = βv + η·∇J(θ)\nθ = θ - v',
                '优点': '加速收敛，减少震荡',
                '缺点': '需要调节β参数',
                '适用场景': '深度学习训练'
            },
            'Adam': {
                '公式': '复杂，结合动量和自适应学习率',
                '优点': '自适应学习率，通常表现优异',
                '缺点': '内存占用稍大',
                '适用场景': '深度学习（默认推荐）'
            },
            'RMSprop': {
                '公式': 'E[g²] = ρE[g²] + (1-ρ)g²\nθ = θ - η·g/√(E[g²]+ε)',
                '优点': '自适应学习率，适合非平稳目标',
                '缺点': '需要调节ρ参数',
                '适用场景': 'RNN训练'
            },
            'Adagrad': {
                '公式': 'G = G + g⊙g\nθ = θ - η·g/√(G+ε)',
                '优点': '自适应学习率，适合稀疏数据',
                '缺点': '学习率单调递减',
                '适用场景': '稀疏特征学习'
            }
        }

        print("=" * 80)
        print("深度学习优化器总结")
        print("=" * 80)

        for name, info in optimizers_info.items():
            print(f"\n{name}:")
            print(f"  公式: {info['公式']}")
            print(f"  优点: {info['优点']}")
            print(f"  缺点: {info['缺点']}")
            print(f"  适用场景: {info['适用场景']}")

        print("\n" + "=" * 80)

# 显示优化器信息
OptimizerComparison.optimizer_summary()
OptimizerComparison.visualize_optimization_path()

六、深度学习训练技巧

1. 正则化技术

class RegularizationTechniques:
    """深度学习正则化技术"""

    @staticmethod
    def l1_regularization(weights, lambda_l1):
        """L1正则化 (Lasso)"""
        return lambda_l1 * np.sum(np.abs(weights))

    @staticmethod
    def l2_regularization(weights, lambda_l2):
        """L2正则化 (Ridge)"""
        return lambda_l2 * np.sum(weights ** 2)

    @staticmethod
    def elastic_net(weights, lambda_l1, lambda_l2):
        """弹性网络 (结合L1和L2)"""
        return (lambda_l1 * np.sum(np.abs(weights)) + 
                lambda_l2 * np.sum(weights ** 2))

    @staticmethod
    def dropout(activations, dropout_rate, training=True):
        """Dropout正则化"""
        if not training:
            return activations

        # 生成Dropout掩码
        mask = np.random.binomial(1, 1 - dropout_rate, size=activations.shape)

        # 应用Dropout并缩放
        activations = activations * mask / (1 - dropout_rate)

        return activations

    @staticmethod
    def batch_normalization(x, gamma=1, beta=0, epsilon=1e-5):
        """批量归一化 (简化版)"""
        # 计算批次的均值和方差
        mean = np.mean(x, axis=0)
        variance = np.var(x, axis=0)

        # 归一化
        x_norm = (x - mean) / np.sqrt(variance + epsilon)

        # 缩放和偏移
        return gamma * x_norm + beta

    @staticmethod
    def data_augmentation_examples():
        """数据增强示例"""

        techniques = {
            '图像数据增强': [
                '随机旋转 (±30度)',
                '随机缩放 (0.8-1.2倍)',
                '随机裁剪',
                '随机水平翻转',
                '颜色抖动 (亮度、对比度、饱和度)',
                '随机噪声添加'
            ],
            '文本数据增强': [
                '同义词替换',
                '随机插入',
                '随机交换',
                '随机删除',
                '回译 (翻译成其他语言再译回)',
                'EDA (Easy Data Augmentation)'
            ],
            '时间序列增强': [
                '时间扭曲',
                '窗口滑动',
                '随机缩放',
                '添加噪声',
                '通道混洗 (多变量时)'
            ]
        }

        print("数据增强技术:")
        print("=" * 60)

        for category, methods in techniques.items():
            print(f"\n{category}:")
            for method in methods:
                print(f"  • {method}")

        print("\n" + "=" * 60)

    @staticmethod
    def early_stopping_callback(patience=10, min_delta=0.001):
        """早停回调函数"""

        class EarlyStopping:
            def __init__(self, patience=patience, min_delta=min_delta):
                self.patience = patience
                self.min_delta = min_delta
                self.best_loss = float('inf')
                self.counter = 0
                self.should_stop = False

            def __call__(self, current_loss):
                if current_loss < self.best_loss - self.min_delta:
                    self.best_loss = current_loss
                    self.counter = 0
                    print(f"损失改善: {current_loss:.4f}")
                    return False
                else:
                    self.counter += 1
                    print(f"早停计数: {self.counter}/{self.patience}")

                    if self.counter >= self.patience:
                        self.should_stop = True
                        print("达到早停条件，停止训练")

                    return self.should_stop

        return EarlyStopping()

# 测试正则化技术
reg = RegularizationTechniques()

# 测试Dropout
activations = np.array([[1.0, 2.0, 3.0],
                        [4.0, 5.0, 6.0]])
dropout_rate = 0.5

print("Dropout示例:")
print(f"原始激活值:\n{activations}")
print(f"Dropout后 (训练模式):\n{reg.dropout(activations, dropout_rate, training=True)}")
print(f"Dropout后 (推理模式):\n{reg.dropout(activations, dropout_rate, training=False)}")

# 显示数据增强技术
reg.data_augmentation_examples()

2. 超参数调优

class HyperparameterTuning:
    """深度学习超参数调优"""

    @staticmethod
    def learning_rate_scheduler():
        """学习率调度器"""

        schedulers = {
            '固定学习率': {
                '描述': '整个训练过程使用固定学习率',
                '适用场景': '简单任务，小数据集',
                '代码示例': 'lr = 0.001'
            },
            '阶梯下降': {
                '描述': '在指定轮次降低学习率',
                '适用场景': '大多数深度学习任务',
                '代码示例': '''if epoch % 30 == 0:
    lr *= 0.1'''
            },
            '余弦退火': {
                '描述': '学习率按余弦函数从高到低变化',
                '适用场景': '需要跳出局部最优的任务',
                '公式': 'lr = lr_min + 0.5*(lr_max-lr_min)*(1+cos(epoch/T_max*π))'
            },
            '循环学习率': {
                '描述': '学习率在最小和最大值之间循环变化',
                '适用场景': '提高模型泛化能力',
                '代码示例': '''cycle = epoch % cycle_length
lr = lr_min + 0.5*(lr_max-lr_min)*(1+cos(cycle/cycle_length*π))'''
            },
            '热重启': {
                '描述': '周期性重启学习率，每次重启后逐渐降低峰值',
                '适用场景': '复杂任务，需要精细调优',
                '优势': '结合大范围探索和精细调优'
            }
        }

        print("学习率调度策略:")
        print("=" * 80)

        for name, info in schedulers.items():
            print(f"\n{name}:")
            print(f"  描述: {info['描述']}")
            print(f"  适用场景: {info['适用场景']}")
            if '公式' in info:
                print(f"  公式: {info['公式']}")
            if '代码示例' in info:
                print(f"  代码示例: {info['代码示例']}")

        print("\n" + "=" * 80)

    @staticmethod
    def visualize_learning_rates():
        """可视化不同学习率调度策略"""

        epochs = 100

        # 不同调度策略
        strategies = {
            '固定学习率': [0.001] * epochs,
            '阶梯下降': [],
            '指数衰减': [],
            '余弦退火': [],
            '循环学习率': []
        }

        # 生成学习率序列
        for epoch in range(epochs):
            # 阶梯下降 (每30轮降低10倍)
            lr = 0.001
            if epoch >= 30:
                lr *= 0.1
            if epoch >= 60:
                lr *= 0.1
            strategies['阶梯下降'].append(lr)

            # 指数衰减
            strategies['指数衰减'].append(0.001 * np.exp(-0.05 * epoch))

            # 余弦退火
            T_max = 50
            lr_min = 0.0001
            lr_max = 0.01
            strategies['余弦退火'].append(
                lr_min + 0.5 * (lr_max - lr_min) * 
                (1 + np.cos(epoch / T_max * np.pi))
            )

            # 循环学习率
            cycle_length = 20
            cycle = epoch % cycle_length
            strategies['循环学习率'].append(
                0.0001 + 0.5 * (0.01 - 0.0001) * 
                (1 + np.cos(cycle / cycle_length * np.pi))
            )

        # 绘制图形
        plt.figure(figsize=(12, 8))

        for idx, (name, lr_sequence) in enumerate(strategies.items()):
            plt.plot(lr_sequence, linewidth=2, label=name)

        plt.xlabel('训练轮次', fontsize=12)
        plt.ylabel('学习率', fontsize=12)
        plt.title('不同学习率调度策略比较', fontsize=16, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)
        plt.yscale('log')  # 对数尺度
        plt.tight_layout()
        plt.show()

    @staticmethod
    def hyperparameter_search_space():
        """深度学习超参数搜索空间"""

        search_space = {
            '学习率': {
                '范围': [1e-5, 1e-1],
                '推荐值': [1e-3, 3e-4, 1e-4],
                '搜索策略': '对数均匀采样',
                '备注': '最重要的超参数'
            },
            '批量大小': {
                '范围': [16, 256],
                '推荐值': [32, 64, 128],
                '搜索策略': '均匀采样 (2的幂次)',
                '备注': 'GPU内存允许的情况下尽量大'
            },
            '网络深度': {
                '范围': [2, 20],
                '推荐值': [3, 5, 8, 12],
                '搜索策略': '均匀采样',
                '备注': '根据任务复杂度选择'
            },
            'Dropout率': {
                '范围': [0.0, 0.5],
                '推荐值': [0.2, 0.3, 0.5],
                '搜索策略': '均匀采样',
                '备注': '正则化强度，防止过拟合'
            },
            '权重衰减': {
                '范围': [0.0, 0.1],
                '推荐值': [1e-4, 1e-5, 0.0],
                '搜索策略': '对数均匀采样',
                '备注': 'L2正则化强度'
            },
            '优化器': {
                '选项': ['Adam', 'SGD', 'RMSprop', 'Adagrad'],
                '推荐值': 'Adam',
                '搜索策略': '类别采样',
                '备注': 'Adam通常是默认选择'
            }
        }

        print("深度学习超参数搜索空间:")
        print("=" * 100)

        for param, info in search_space.items():
            print(f"\n{param}:")
            for key, value in info.items():
                print(f"  {key}: {value}")

        print("\n" + "=" * 100)
        print("\n调优策略建议:")
        print("1. 先调学习率，固定其他参数")
        print("2. 然后调批量大小和网络结构")
        print("3. 最后调正则化相关参数")
        print("4. 使用贝叶斯优化或随机搜索")
        print("5. 早停防止过拟合")

# 显示超参数调优信息
tuning = HyperparameterTuning()
tuning.hyperparameter_search_space()
tuning.learning_rate_scheduler()
tuning.visualize_learning_rates()

七、实践项目：手写数字识别

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

class MNISTDigitRecognition:
    """手写数字识别实践项目"""

    def __init__(self):
        """初始化"""
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None
        self.model = None

    def load_data(self):
        """加载MNIST数据集"""
        print("加载MNIST数据集...")

        # 使用fetch_openml加载MNIST数据集
        mnist = fetch_openml('mnist_784', version=1, parser='auto')

        X = mnist.data.astype('float32') / 255.0  # 归一化
        y = mnist.target.astype('int')

        # 划分训练集和测试集
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )

        print(f"训练集大小: {self.X_train.shape}")
        print(f"测试集大小: {self.X_test.shape}")
        print(f"类别分布: {np.bincount(self.y_train)}")

        return self

    def visualize_samples(self, n_samples=10):
        """可视化样本"""
        plt.figure(figsize=(15, 6))

        for i in range(n_samples):
            plt.subplot(2, n_samples//2, i+1)
            image = self.X_train[i].reshape(28, 28)
            plt.imshow(image, cmap='gray')
            plt.title(f"标签: {self.y_train[i]}")
            plt.axis('off')

        plt.suptitle('MNIST手写数字样本', fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.show()

    def create_simple_model(self):
        """创建简单的神经网络模型"""

        class SimpleNN:
            """简单的全连接神经网络"""

            def __init__(self, input_size=784, hidden_size=128, output_size=10):
                self.input_size = input_size
                self.hidden_size = hidden_size
                self.output_size = output_size

                # 初始化参数
                self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size)
                self.b1 = np.zeros(hidden_size)
                self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2.0 / hidden_size)
                self.b2 = np.zeros(output_size)

                # 缓存
                self.cache = {}

            def relu(self, x):
                """ReLU激活函数"""
                return np.maximum(0, x)

            def softmax(self, x):
                """Softmax激活函数"""
                exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
                return exp_x / np.sum(exp_x, axis=1, keepdims=True)

            def forward(self, X):
                """前向传播"""
                # 第一层
                z1 = np.dot(X, self.W1) + self.b1
                a1 = self.relu(z1)

                # 第二层
                z2 = np.dot(a1, self.W2) + self.b2
                a2 = self.softmax(z2)

                # 缓存中间结果
                self.cache = {'X': X, 'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}

                return a2

            def backward(self, X, y, learning_rate=0.01):
                """反向传播"""
                m = X.shape[0]

                # 从缓存中获取前向传播的结果
                z1 = self.cache['z1']
                a1 = self.cache['a1']
                a2 = self.cache['a2']

                # 将y转换为one-hot编码
                y_onehot = np.zeros((m, self.output_size))
                y_onehot[np.arange(m), y] = 1

                # 计算输出层的梯度
                dz2 = a2 - y_onehot
                dW2 = np.dot(a1.T, dz2) / m
                db2 = np.sum(dz2, axis=0) / m

                # 计算隐藏层的梯度
                da1 = np.dot(dz2, self.W2.T)
                dz1 = da1 * (z1 > 0)  # ReLU的梯度
                dW1 = np.dot(X.T, dz1) / m
                db1 = np.sum(dz1, axis=0) / m

                # 更新参数
                self.W2 -= learning_rate * dW2
                self.b2 -= learning_rate * db2
                self.W1 -= learning_rate * dW1
                self.b1 -= learning_rate * db1

            def predict(self, X):
                """预测"""
                probas = self.forward(X)
                return np.argmax(probas, axis=1)

            def evaluate(self, X, y):
                """评估模型"""
                y_pred = self.predict(X)
                accuracy = np.mean(y_pred == y)
                return accuracy

            def summary(self):
                """打印模型信息"""
                print("=" * 50)
                print("简单神经网络模型")
                print("=" * 50)
                print(f"输入大小: {self.input_size}")
                print(f"隐藏层大小: {self.hidden_size}")
                print(f"输出大小: {self.output_size}")
                total_params = (self.W1.size + self.b1.size + 
                               self.W2.size + self.b2.size)
                print(f"总参数数: {total_params:,}")
                print("=" * 50)

        self.model = SimpleNN()
        return self.model

    def train_model(self, epochs=10, batch_size=64, learning_rate=0.01):
        """训练模型"""
        n_samples = self.X_train.shape[0]
        n_batches = n_samples // batch_size

        print(f"开始训练...")
        print(f"训练样本数: {n_samples}")
        print(f"批次大小: {batch_size}")
        print(f"批次数: {n_batches}")
        print(f"训练轮次: {epochs}")

        train_losses = []
        train_accuracies = []
        test_accuracies = []

        for epoch in range(epochs):
            epoch_loss = 0
            epoch_accuracy = 0

            # 打乱数据
            indices = np.random.permutation(n_samples)
            X_shuffled = self.X_train[indices]
            y_shuffled = self.y_train[indices]

            for batch in range(n_batches):
                # 获取当前批次数据
                start = batch * batch_size
                end = start + batch_size
                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]

                # 前向传播
                y_pred = self.model.forward(X_batch)

                # 计算损失（交叉熵）
                m = X_batch.shape[0]
                y_onehot = np.zeros((m, 10))
                y_onehot[np.arange(m), y_batch] = 1
                loss = -np.mean(np.sum(y_onehot * np.log(y_pred + 1e-8), axis=1))
                epoch_loss += loss

                # 计算准确率
                batch_pred = np.argmax(y_pred, axis=1)
                batch_acc = np.mean(batch_pred == y_batch)
                epoch_accuracy += batch_acc

                # 反向传播和参数更新
                self.model.backward(X_batch, y_batch, learning_rate)

            # 计算平均损失和准确率
            avg_loss = epoch_loss / n_batches
            avg_accuracy = epoch_accuracy / n_batches

            # 测试集准确率
            test_acc = self.model.evaluate(self.X_test, self.y_test)

            train_losses.append(avg_loss)
            train_accuracies.append(avg_accuracy)
            test_accuracies.append(test_acc)

            print(f"轮次 {epoch+1}/{epochs}: "
                  f"训练损失={avg_loss:.4f}, "
                  f"训练准确率={avg_accuracy:.4f}, "
                  f"测试准确率={test_acc:.4f}")

        # 可视化训练过程
        self.plot_training_history(train_losses, train_accuracies, test_accuracies)

        return train_losses, train_accuracies, test_accuracies

    def plot_training_history(self, train_losses, train_accuracies, test_accuracies):
        """绘制训练历史"""
        fig, axes = plt.subplots(1, 2, figsize=(12, 4))

        # 损失曲线
        axes[0].plot(train_losses, 'b-', linewidth=2, label='训练损失')
        axes[0].set_xlabel('训练轮次')
        axes[0].set_ylabel('损失')
        axes[0].set_title('训练损失曲线')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)

        # 准确率曲线
        axes[1].plot(train_accuracies, 'g-', linewidth=2, label='训练准确率')
        axes[1].plot(test_accuracies, 'r-', linewidth=2, label='测试准确率')
        axes[1].set_xlabel('训练轮次')
        axes[1].set_ylabel('准确率')
        axes[1].set_title('准确率曲线')
        axes[1].legend()
        axes[1].grid(True, alpha=0.3)

        plt.suptitle('MNIST手写数字识别训练历史', fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.show()

    def show_predictions(self, n_samples=15):
        """展示预测结果"""
        # 随机选择测试样本
        indices = np.random.choice(len(self.X_test), n_samples, replace=False)
        X_sample = self.X_test[indices]
        y_sample = self.y_test[indices]

        # 预测
        y_pred = self.model.predict(X_sample)

        # 可视化
        plt.figure(figsize=(15, 10))

        n_cols = 5
        n_rows = int(np.ceil(n_samples / n_cols))

        for i, idx in enumerate(indices):
            plt.subplot(n_rows, n_cols, i+1)
            image = X_sample[i].reshape(28, 28)
            plt.imshow(image, cmap='gray')

            # 标记正确/错误
            is_correct = y_pred[i] == y_sample[i]
            color = 'green' if is_correct else 'red'

            plt.title(f"真实: {y_sample[i]}\n预测: {y_pred[i]}", color=color)
            plt.axis('off')

        accuracy = np.mean(y_pred == y_sample)
        plt.suptitle(f'预测结果 (准确率: {accuracy:.2%})', fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.show()

        # 打印混淆矩阵（简化）
        print("\n预测结果统计:")
        print(f"样本数: {n_samples}")
        print(f"正确数: {np.sum(y_pred == y_sample)}")
        print(f"错误数: {np.sum(y_pred != y_sample)}")
        print(f"准确率: {accuracy:.2%}")

# 运行MNIST手写数字识别项目
mnist_project = MNISTDigitRecognition()

# 加载数据
mnist_project.load_data()

# 可视化样本
mnist_project.visualize_samples(10)

# 创建模型
model = mnist_project.create_simple_model()
model.summary()

# 训练模型
train_losses, train_accuracies, test_accuracies = mnist_project.train_model(
    epochs=20,
    batch_size=128,
    learning_rate=0.01
)

# 展示预测结果
mnist_project.show_predictions(15)

深度学习正在改变世界。从图像识别到自然语言处理，从自动驾驶到医疗诊断，深度学习的应用无处不在。虽然深度学习技术看起来很复杂，但通过系统的学习和实践，你也可以掌握这项强大的技术。

上一篇：现代轻量级Redis客户端工具Tiny RDM：跨平台安装与核心功能详解
下一篇：Windows 11/10 与 Office 永久激活指南：一条命令行脚本全搞定

Python, 深度学习, 神经网络, CNN, RNN