高级特性

存储引擎对比

特性 InnoDB MyISAM Memory CSV
事务支持 ✅ ACID
行锁 表锁 表锁
外键
全文索引 ✅ 5.6+
集群索引 N/A
崩溃恢复 ✅ 自动
存储限制 64TB 256TB RAM
适用场景 生产环境
事务需求
只读/日志
全文搜索
缓存/临时
高速访问
数据交换
日志分析

1. 索引详解(AI 驱动)

1.1 什么是索引?

索引是数据库中用于加速数据检索的数据结构。就像书籍的目录一样,索引可以帮助快速定位数据,而不需要扫描整个表。合理的索引设计可以显著提升查询性能,但也会占用存储空间并影响写入性能。

-- 查看表的当前索引状态
SHOW INDEX FROM users;

-- 分析表获取索引使用统计
ANALYZE TABLE users;

-- 查看索引统计信息
SELECT * FROM mysql.innodb_index_stats
WHERE database_name = DATABASE() AND table_name = 'users';
1.2 索引类型(点击展开)
索引类型概览
类型 关键字 特点 适用场景
主键索引 PRIMARY KEY 唯一、非空、自动创建聚簇索引 唯一标识记录
唯一索引 UNIQUE 值唯一,可为NULL 邮箱、手机号等唯一字段
普通索引 INDEX/KEY 最基本的索引,无限制 加速WHERE查询
复合索引 INDEX(a,b,c) 多列组合,遵循最左前缀 多条件查询
全文索引 FULLTEXT 文本搜索,支持中文分词 文章、内容搜索
空间索引 SPATIAL 地理空间数据 LBS、地图应用
前缀索引 INDEX(col(10)) 只索引前缀部分 长字符串列
1.2.1 主键索引(PRIMARY KEY)
-- 主键索引(自动创建,唯一且非空)
CREATE TABLE users (
    id INT AUTO_INCREMENT PRIMARY KEY,
    username VARCHAR(100)
);

-- 或者单独定义
CREATE TABLE users (
    id INT AUTO_INCREMENT,
    username VARCHAR(100),
    PRIMARY KEY (id)
);

-- 复合主键
CREATE TABLE user_scores (
    user_id INT,
    game_id INT,
    score INT,
    PRIMARY KEY (user_id, game_id)
);
1.2.2 唯一索引(UNIQUE)
-- 创建唯一索引(确保列值唯一)
CREATE UNIQUE INDEX idx_email ON users(email);

-- 在表定义中创建
CREATE TABLE users (
    id INT AUTO_INCREMENT PRIMARY KEY,
    username VARCHAR(100),
    email VARCHAR(255) UNIQUE
);

-- 复合唯一索引
CREATE UNIQUE INDEX idx_username_email ON users(username, email);
1.2.3 普通索引(INDEX)
-- 创建普通索引
CREATE INDEX idx_username ON users(username);

-- 复合索引(最左前缀原则)
CREATE INDEX idx_age_email ON users(age, email);

-- 使用复合索引的查询会命中索引
SELECT * FROM users WHERE age = 25; -- 命中
SELECT * FROM users WHERE age = 25 AND email = 'test@example.com'; -- 命中
SELECT * FROM users WHERE email = 'test@example.com'; -- 不命中

-- 添加索引
ALTER TABLE users ADD INDEX idx_created_at (created_at);
1.2.4 全文索引(FULLTEXT)
-- 全文索引(用于文本搜索)
CREATE FULLTEXT INDEX idx_content ON articles(content);

-- 全文搜索
SELECT * FROM articles
WHERE MATCH(content) AGAINST('MySQL 教程' IN NATURAL LANGUAGE MODE);

-- 布尔模式搜索
SELECT * FROM articles
WHERE MATCH(content, title) AGAINST('MySQL +教程 -基础' IN BOOLEAN MODE);

-- 查询扩展模式
SELECT * FROM articles
WHERE MATCH(content) AGAINST('MySQL' WITH QUERY EXPANSION);

-- 中文分词支持
CREATE TABLE articles (
    id INT AUTO_INCREMENT PRIMARY KEY,
    content TEXT,
    FULLTEXT INDEX ft_content (content) WITH PARSER ngram
);
1.2.5 空间索引(SPATIAL)
-- 空间索引(用于地理数据)
CREATE TABLE locations (
    id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(100),
    location GEOMETRY NOT NULL,
    SPATIAL INDEX idx_location (location)
);

-- 插入空间数据
INSERT INTO locations (name, location) VALUES
('Beijing', ST_GeomFromText('POINT(116.4074 39.9042)')),
('Shanghai', ST_GeomFromText('POINT(121.4737 31.2304)'));

-- 空间查询
SELECT name, ST_AsText(location) as coordinates
FROM locations
WHERE ST_Distance_Sphere(
    location,
    ST_GeomFromText('POINT(116.4074 39.9042)')
) < 50000; -- 50公里范围内
1.2.6 前缀索引
-- 前缀索引(节省空间,适用于长字符串)
CREATE INDEX idx_email_prefix ON users(email(20));

-- 查看前缀长度建议
SELECT
    COUNT(DISTINCT LEFT(email, 10)) as distinct_10,
    COUNT(DISTINCT LEFT(email, 20)) as distinct_20,
    COUNT(DISTINCT LEFT(email, 30)) as distinct_30
FROM users;

-- 创建合适长度的前缀索引
CREATE INDEX idx_title_prefix ON articles(title(50));
1.2.7 函数索引
-- 函数索引(对表达式建立索引)
CREATE INDEX idx_email_lower ON users((LOWER(email)));

-- 使用函数索引的查询
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';

-- 计算列索引
ALTER TABLE products ADD COLUMN discounted_price DECIMAL(10,2);
CREATE INDEX idx_discounted_price ON products((price * discount_rate));

-- JSON 列索引
CREATE INDEX idx_user_prefs ON users((JSON_EXTRACT(preferences, '$.language')));
SELECT * FROM users
WHERE JSON_EXTRACT(preferences, '$.language') = 'zh-CN';
                

1.3 索引设计原则

1.3.1 选择合适的列建立索引
-- 建议索引的列类型:
-- 1. 经常在 WHERE 条件中使用的列
CREATE INDEX idx_status ON orders(status);

-- 2. 经常在 JOIN 条件中使用的列
CREATE INDEX idx_user_id ON orders(user_id);

-- 3. 经常在 ORDER BY 中使用的列
CREATE INDEX idx_created_at ON posts(created_at);

-- 4. 经常在 GROUP BY 中使用的列
CREATE INDEX idx_category ON posts(category_id);

-- 5. 唯一性高的列
CREATE UNIQUE INDEX idx_username ON users(username);
1.3.2 复合索引的列顺序
-- 复合索引设计原则:
-- 1. 将选择性高的列放在前面
CREATE INDEX idx_status_created ON orders(status, created_at);

-- 2. 考虑查询的列顺序
-- 常见查询:SELECT * FROM orders WHERE user_id = ? AND status = ?
CREATE INDEX idx_user_status ON orders(user_id, status);

-- 3. 最左前缀原则
-- 索引 (user_id, status, created_at)
-- 可以使用索引的查询:
SELECT * FROM orders WHERE user_id = 1;
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending';
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending' AND created_at > '2024-01-01';

-- 不可以使用索引的查询:
SELECT * FROM orders WHERE status = 'pending'; -- 跳过最左列
SELECT * FROM orders WHERE created_at > '2024-01-01'; -- 跳过前两列
1.3.3 覆盖索引
-- 覆盖索引(索引包含查询所需的所有列)
-- 索引包含:user_id, status, total_amount
CREATE INDEX idx_user_status_amount ON orders(user_id, status, total_amount);

-- 查询只使用索引,不需要回表
SELECT user_id, status, total_amount
FROM orders
WHERE user_id = 1 AND status = 'completed';

-- 使用 EXPLAIN 验证是否使用覆盖索引
EXPLAIN SELECT user_id, status, total_amount
FROM orders WHERE user_id = 1 AND status = 'completed';

-- Extra 字段显示 "Using index" 表示使用了覆盖索引

1.4 索引管理

1.4.1 查看索引信息
-- 查看表的所有索引
SHOW INDEX FROM users;

-- 查看索引的统计信息
SELECT * FROM mysql.innodb_index_stats
WHERE database_name = DATABASE() AND table_name = 'users';

-- 查看索引使用情况
SELECT
    table_name,
    index_name,
    cardinality,
    nullable,
    index_type
FROM information_schema.statistics
WHERE table_schema = DATABASE()
ORDER BY table_name, index_name;

-- 查看未使用的索引
SELECT * FROM sys.schema_unused_indexes
WHERE object_schema = DATABASE();
1.4.2 索引维护
-- 分析表(更新索引统计信息)
ANALYZE TABLE users;

-- 优化表(重建表和索引)
OPTIMIZE TABLE users;

-- 重建索引
ALTER TABLE users ENGINE=InnoDB;

-- 清理碎片
ALTER TABLE users ENGINE=InnoDB;

-- 禁用索引(批量导入时提升性能)
ALTER TABLE users DISABLE KEYS;
-- 批量插入数据
ALTER TABLE users ENABLE KEYS;
1.4.3 索引删除
-- 删除索引
DROP INDEX idx_username ON users;

-- 删除主键(如果有自增列需要先删除)
ALTER TABLE users DROP PRIMARY KEY;

-- 删除多个索引
ALTER TABLE users
    DROP INDEX idx_email,
    DROP INDEX idx_age;

1.5 AI 智能索引优化

-- AI 自动索引推荐
SELECT * FROM sys.ai_index_recommendations
WHERE table_name = 'users'
ORDER BY potential_improvement DESC;

-- 应用 AI 推荐的索引
CALL sys.apply_ai_index_recommendations('users');

-- 查看 AI 优化建议详情
SELECT * FROM sys.ai_query_analysis
WHERE query_time > NOW() - INTERVAL 1 DAY;

-- AI 实时监控索引效果
SELECT
    index_name,
    used_count,
    avg_access_time,
    improvement_percentage
FROM sys.ai_index_performance_monitor
WHERE table_name = 'users';

1.6 索引性能优化技巧

1.6.1 隐藏索引(测试性能)
-- 创建隐藏索引(不实际使用,用于测试)
CREATE INDEX idx_test ON users(username) INVISIBLE;

-- 将已有索引设为隐藏
ALTER TABLE users ALTER INDEX idx_username INVISIBLE;

-- 显示隐藏索引
ALTER TABLE users ALTER INDEX idx_username VISIBLE;

-- 查看隐藏索引
SELECT index_name, is_visible
FROM information_schema.statistics
WHERE table_schema = DATABASE() AND table_name = 'users' AND is_visible = 'NO';
1.6.2 索引提示
-- 强制使用指定索引
SELECT * FROM users USE INDEX (idx_username) WHERE username = 'test';

-- 强制不使用索引(全表扫描)
SELECT * FROM users IGNORE INDEX (idx_username) WHERE username = 'test';

-- 指定多个索引
SELECT * FROM users USE INDEX (idx_username, idx_email)
WHERE username = 'test' OR email = 'test@example.com';
1.6.3 索引下推优化
-- MySQL 8.0+ 支持索引下推
-- 对于复合索引,条件过滤在存储引擎层完成
-- 示例:索引 (name, age)
-- 查询:SELECT * FROM users WHERE name = 'test' AND age > 25
-- MySQL 会先在索引中过滤 age > 25,减少回表次数

-- 查看是否使用了索引下推
EXPLAIN SELECT * FROM users
WHERE name = 'test' AND age > 25;
-- Extra 字段显示 "Using index condition" 表示使用了索引下推

1.7 索引最佳实践

-- 最佳实践总结:

-- 1. 为 WHERE、JOIN、ORDER BY、GROUP BY 中的列创建索引
CREATE INDEX idx_user_status ON orders(user_id, status);

-- 2. 避免在区分度低的列上创建索引(如性别、布尔值)
-- 不推荐:CREATE INDEX idx_gender ON users(gender);

-- 3. 不要过度索引(索引会占用空间并降低写入性能)
-- 定期检查未使用的索引并删除
SELECT * FROM sys.schema_unused_indexes;

-- 4. 使用 EXPLAIN 分析查询计划
EXPLAIN SELECT * FROM users WHERE age > 25;

-- 5. 定期维护索引
ANALYZE TABLE users; -- 更新统计信息
OPTIMIZE TABLE users; -- 重建表和索引

-- 6. 使用覆盖索引减少回表
CREATE INDEX idx_covering ON orders(user_id, status, total_amount);

-- 7. 合理使用复合索引,遵循最左前缀原则
CREATE INDEX idx_user_created ON orders(user_id, created_at);

-- 8. 大表删除和更新时考虑索引影响
-- 可以先禁用索引,操作完成后再启用
ALTER TABLE large_table DISABLE KEYS;
-- 执行批量操作
ALTER TABLE large_table ENABLE KEYS;

-- 9. 利用 AI 智能推荐
CALL sys.recommend_indexes('your_database');

-- 10. 监控索引性能,及时调整
SELECT * FROM sys.schema_index_statistics
WHERE table_schema = DATABASE();

1.8 索引常见问题

1.8.1 索引失效的场景
-- 索引失效的常见场景:

-- 1. 使用函数或表达式
SELECT * FROM users WHERE LOWER(username) = 'test'; -- username 索引失效
-- 解决:创建函数索引
CREATE INDEX idx_username_lower ON users((LOWER(username)));

-- 2. 类型不匹配
SELECT * FROM users WHERE id = '1'; -- id 是数字类型,字符串导致索引失效
-- 解决:保持类型一致
SELECT * FROM users WHERE id = 1;

-- 3. LIKE 查询以通配符开头
SELECT * FROM users WHERE username LIKE '%test%'; -- 索引失效
-- 解决:使用全文索引或避免前导通配符
SELECT * FROM users WHERE username LIKE 'test%'; -- 索引有效

-- 4. OR 条件
SELECT * FROM users WHERE username = 'test' OR email = 'test@example.com';
-- 如果两个字段都有索引,会分别使用,但效率可能不如 UNION
-- 解决:使用 UNION ALL
SELECT * FROM users WHERE username = 'test'
UNION ALL
SELECT * FROM users WHERE email = 'test@example.com';

-- 5. NOT IN, NOT EXISTS, <>
SELECT * FROM users WHERE username <> 'test'; -- 索引可能失效

-- 6. 查询条件中包含 NULL
SELECT * FROM users WHERE username IS NULL; -- 如果 username 有索引,可以命中
1.8.2 索引碎片问题
-- 检测索引碎片
SELECT
    table_name,
    engine,
    ROUND(data_length / 1024 / 1024, 2) as data_mb,
    ROUND(index_length / 1024 / 1024, 2) as index_mb,
    ROUND(data_free / 1024 / 1024, 2) as free_mb
FROM information_schema.tables
WHERE table_schema = DATABASE()
ORDER BY data_free DESC;

-- 重建索引和表(消除碎片)
OPTIMIZE TABLE users;

-- 或者重建表
ALTER TABLE users ENGINE=InnoDB;

-- 对于大表,可以使用 pt-online-schema-change 在线重建
-- pt-online-schema-change --alter "ENGINE=InnoDB" D=your_database,t=users --execute

2. 事务处理(支持分布式事务)

事务是数据库管理系统执行过程中的一个逻辑单位,由一系列SQL语句组成。事务具有ACID特性:原子性(Atomicity)、一致性(Consistency)、隔离性(Isolation)、持久性(Durability)。

flowchart TD
    A[开始事务] --> B{执行SQL语句}
    
    B --> C[写UNDO日志]
    C --> D[修改数据缓存]
    D --> E{操作成功?}
    
    E -->|是| F{提交还是回滚?}
    E -->|否| G[回滚事务]
    
    G --> H[读取UNDO日志]
    H --> I[恢复数据]
    I --> J[结束事务]
    
    F -->|COMMIT| K[写入COMMIT标记]
    K --> L[刷新REDO日志到磁盘]
    L --> M[释放锁]
    M --> J
    
    F -->|ROLLBACK| G
    
    style A fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
    style J fill:#c8e6c9,stroke:#2e7d32,stroke-width:2px
    style G fill:#ffcdd2,stroke:#c62828,stroke-width:2px
    style L fill:#c8e6c9,stroke:#2e7d32,stroke-width:2px

ACID 特性说明:

特性 描述 实现原理
原子性 (A) 事务中的所有操作要么全部成功,要么全部失败 UNDO日志
一致性 (C) 事务执行前后,数据库状态保持一致 约束、触发器
隔离性 (I) 并发事务之间互不干扰 锁机制、MVCC
持久性 (D) 事务提交后,修改永久保存 REDO日志
-- 开始事务
START TRANSACTION;

-- 执行 SQL 语句
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;

-- 提交事务(自动使用 2PC)
COMMIT;

-- 或回滚事务
ROLLBACK;

-- 保存点
SAVEPOINT sp1;
UPDATE accounts SET balance = balance - 50 WHERE id = 1;
-- 回滚到保存点
ROLLBACK TO sp1;

-- 分布式事务(XA 事务改进)
XA START 'xid1';
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
XA END 'xid1';
XA PREPARE 'xid1';
XA COMMIT 'xid1';

3. 视图(物化视图)

-- 创建普通视图
CREATE VIEW user_summary AS
SELECT
    username,
    email,
    TIMESTAMPDIFF(YEAR, created_at, NOW()) as years_active
FROM users;

-- 创建物化视图(MySQL 9.x 新增)
CREATE MATERIALIZED VIEW mv_user_stats
REFRESH EVERY 1 HOUR
AS
SELECT
    COUNT(*) as total_users,
    AVG(age) as avg_age,
    MAX(created_at) as last_signup
FROM users;

-- 手动刷新物化视图
REFRESH MATERIALIZED VIEW mv_user_stats;

-- 使用视图
SELECT * FROM user_summary;
SELECT * FROM mv_user_stats;

-- 删除视图
DROP VIEW user_summary;
DROP MATERIALIZED VIEW mv_user_stats;

4. 存储过程(支持 JavaScript)

-- 传统存储过程
DELIMITER //
CREATE PROCEDURE get_users_by_age(IN min_age INT)
BEGIN
    SELECT * FROM users WHERE age >= min_age;
END //
DELIMITER ;

-- JavaScript 存储过程(MySQL 9.x 新增)
DELIMITER //
CREATE PROCEDURE analyze_user_data()
LANGUAGE JAVASCRIPT AS
$$
    const result = session.sql('SELECT * FROM users').execute();
    const analysis = {
        total: result.length,
        avg_age: result.reduce((sum, row) => sum + row.age, 0) / result.length
    };
    return JSON.stringify(analysis);
$$ //
DELIMITER ;

-- 调用存储过程
CALL get_users_by_age(25);
CALL analyze_user_data();

-- 删除存储过程
DROP PROCEDURE get_users_by_age;
DROP PROCEDURE analyze_user_data;

5. 触发器(支持批量操作)

-- 创建触发器
DELIMITER //
CREATE TRIGGER before_user_insert
BEFORE INSERT ON users
FOR EACH ROW
BEGIN
    IF NEW.age < 0 THEN
        SET NEW.age = 0;
    END IF;
    IF NEW.email IS NULL THEN
        SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Email 不能为空';
    END IF;
END //
DELIMITER ;

-- 批量操作触发器(FOR EACH STATEMENT)
DELIMITER //
CREATE TRIGGER log_bulk_operation
AFTER INSERT ON users
FOR EACH STATEMENT
BEGIN
    INSERT INTO audit_log (operation, row_count, timestamp)
    VALUES ('BULK_INSERT', ROW_COUNT(), NOW());
END //
DELIMITER ;

-- 删除触发器
DROP TRIGGER before_user_insert;

6. 连接查询(LATERAL JOIN)

-- 内连接
SELECT u.username, o.order_id
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- LATERAL JOIN(MySQL 9.x 新增)
SELECT u.username, latest_orders.*
FROM users u
LEFT JOIN LATERAL (
    SELECT o.order_id, o.total_amount
    FROM orders o
    WHERE o.user_id = u.id
    ORDER BY o.order_date DESC
    LIMIT 3
) AS latest_orders ON TRUE;

-- 递归 CTE(查询层级数据)
WITH RECURSIVE category_tree AS (
    SELECT id, name, parent_id, 1 as level
    FROM categories
    WHERE parent_id IS NULL
    UNION ALL
    SELECT c.id, c.name, c.parent_id, ct.level + 1
    FROM categories c
    INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree ORDER BY level, name;

7. 窗口函数(Window Functions)

窗口函数是 MySQL 8.0+ 引入的强大功能,用于对一组相关的行执行计算,同时保留原始行的结构。窗口函数在数据分析、排名统计、移动平均等场景中非常有用。

flowchart LR
    subgraph 传统聚合
    A1[GROUP BY] --> A2[分组聚合]
    A2 --> A3[返回单行结果]
    end

    subgraph 窗口函数
    B1[OVER 子句] --> B2[定义窗口范围]
    B2 --> B3[返回多行结果]
    end

    style A1 fill:#e3f2fd,stroke:#1565c0
    style A3 fill:#ffcdd2,stroke:#c62828
    style B1 fill:#c8e6c9,stroke:#2e7d32
    style B3 fill:#c8e6c9,stroke:#2e7d32

7.1 窗口函数基础语法

-- 窗口函数基本语法
SELECT
    column1,
    column2,
    window_function() OVER (
        PARTITION BY partition_column
        ORDER BY order_column
        ROWS BETWEEN start AND end
    ) as result_column
FROM table_name;

-- OVER 子句说明:
-- PARTITION BY: 将数据分组(类似 GROUP BY,但不减少行数)
-- ORDER BY: 窗口内排序
-- ROWS/RANGE: 定义窗口的物理范围

-- 示例数据准备
CREATE TABLE employees (
    id INT PRIMARY KEY,
    name VARCHAR(50),
    department VARCHAR(50),
    salary DECIMAL(10,2),
    hire_date DATE
);

INSERT INTO employees VALUES
(1, '张三', '技术部', 15000, '2020-01-15'),
(2, '李四', '技术部', 18000, '2019-03-20'),
(3, '王五', '技术部', 12000, '2021-06-10'),
(4, '赵六', '销售部', 10000, '2020-08-05'),
(5, '钱七', '销售部', 13000, '2018-11-12'),
(6, '孙八', '销售部', 11000, '2022-02-28');

7.2 排名函数

-- 1. ROW_NUMBER(): 连续排名,无并列
SELECT
    name,
    department,
    salary,
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as row_num
FROM employees;

-- 结果示例:
-- 张三  技术部  15000  2
-- 李四  技术部  18000  1
-- 王五  技术部  12000  3

-- 2. RANK(): 跳跃排名,有并列
SELECT
    name,
    department,
    salary,
    RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank
FROM employees;

-- 如果有并列:1, 2, 2, 4(跳过 3)

-- 3. DENSE_RANK(): 连续排名,有并列
SELECT
    name,
    department,
    salary,
    DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dense_rank
FROM employees;

-- 如果有并列:1, 2, 2, 3(不跳过)

-- 4. NTILE(n): 将数据分成 n 个桶
SELECT
    name,
    salary,
    NTILE(3) OVER (ORDER BY salary DESC) as salary_quartile
FROM employees;

-- 将员工按薪资分为 3 组(高、中、低)

7.3 聚合窗口函数

-- 1. SUM() 窗口聚合
SELECT
    name,
    department,
    salary,
    SUM(salary) OVER (PARTITION BY department) as dept_total,
    SUM(salary) OVER (ORDER BY salary DESC) as running_total
FROM employees;

-- dept_total: 部门总薪资(每行相同)
-- running_total: 累计薪资(从高到低累加)

-- 2. AVG() 窗口聚合
SELECT
    name,
    department,
    salary,
    AVG(salary) OVER (PARTITION BY department) as dept_avg,
    AVG(salary) OVER (ORDER BY hire_date) as running_avg
FROM employees;

-- dept_avg: 部门平均薪资
-- running_avg: 按入职时间累计平均

-- 3. COUNT() 窗口聚合
SELECT
    name,
    department,
    salary,
    COUNT(*) OVER (PARTITION BY department) as dept_count,
    COUNT(*) OVER (ORDER BY hire_date) as employee_sequence
FROM employees;

-- 4. MIN/MAX() 窗口聚合
SELECT
    name,
    department,
    salary,
    MIN(salary) OVER (PARTITION BY department) as dept_min,
    MAX(salary) OVER (PARTITION BY department) as dept_max,
    MAX(salary) OVER () as company_max
FROM employees;

7.4 值函数

-- 1. LAG(): 访问前一行数据
SELECT
    name,
    hire_date,
    salary,
    LAG(salary, 1) OVER (ORDER BY hire_date) as prev_salary,
    salary - LAG(salary, 1) OVER (ORDER BY hire_date) as salary_diff
FROM employees;

-- 计算与前一位入职员工的薪资差

-- 2. LEAD(): 访问后一行数据
SELECT
    name,
    hire_date,
    salary,
    LEAD(salary, 1) OVER (ORDER BY hire_date) as next_salary
FROM employees;

-- 3. FIRST_VALUE(): 获取窗口第一行
SELECT
    name,
    department,
    salary,
    FIRST_VALUE(name) OVER (PARTITION BY department ORDER BY salary DESC) as highest_paid
FROM employees;

-- 4. LAST_VALUE(): 获取窗口最后一行
SELECT
    name,
    department,
    salary,
    LAST_VALUE(name) OVER (
        PARTITION BY department
        ORDER BY salary DESC
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) as lowest_paid
FROM employees;

-- 注意:LAST_VALUE 需要指定完整的窗口范围

-- 5. NTH_VALUE(): 获取第 N 行
SELECT
    name,
    salary,
    NTH_VALUE(name, 2) OVER (ORDER BY salary DESC) as second_highest
FROM employees;

7.5 窗口帧定义

-- 窗口帧(Window Frame)定义窗口的具体范围

-- 1. ROWS vs RANGE
-- ROWS: 基于物理行位置
-- RANGE: 基于逻辑值范围

-- 2. 常用边界
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW  -- 从开始到当前行
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW          -- 前 3 行到当前行
ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING            -- 当前行到后 3 行
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING            -- 前后各 1 行(共 3 行)
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING  -- 整个分区

-- 3. 移动平均示例
SELECT
    hire_date,
    name,
    salary,
    AVG(salary) OVER (
        ORDER BY hire_date
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) as moving_avg_3
FROM employees;

-- 计算 3 行移动平均值

-- 4. 累计求和示例
SELECT
    hire_date,
    name,
    salary,
    SUM(salary) OVER (
        ORDER BY hire_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) as running_total
FROM employees;

7.6 实战应用场景

-- 场景 1: 计算员工薪资排名和部门占比
SELECT
    name,
    department,
    salary,
    RANK() OVER (ORDER BY salary DESC) as company_rank,
    RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank,
    ROUND(salary * 100.0 / SUM(salary) OVER (), 2) as salary_percentage
FROM employees;

-- 场景 2: 识别薪资高于部门平均的员工
WITH dept_avg AS (
    SELECT
        name,
        department,
        salary,
        AVG(salary) OVER (PARTITION BY department) as dept_avg_salary
    FROM employees
)
SELECT
    name,
    department,
    salary,
    dept_avg_salary,
    CASE
        WHEN salary > dept_avg_salary THEN '高于平均'
        WHEN salary = dept_avg_salary THEN '等于平均'
        ELSE '低于平均'
    END as salary_level
FROM dept_avg;

-- 场景 3: 计算连续 N 天登录的用户
CREATE TABLE user_logins (
    user_id INT,
    login_date DATE
);

WITH login_groups AS (
    SELECT
        user_id,
        login_date,
        DATE_SUB(login_date, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY) as grp
    FROM user_logins
)
SELECT
    user_id,
    COUNT(*) as consecutive_days
FROM login_groups
GROUP BY user_id, grp
HAVING COUNT(*) >= 3;

-- 场景 4: Top-N 查询(每个部门薪资前 3 名)
WITH ranked_employees AS (
    SELECT
        name,
        department,
        salary,
        DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank
    FROM employees
)
SELECT name, department, salary, dept_rank
FROM ranked_employees
WHERE dept_rank <= 3;

-- 场景 5: 计算同比/环比增长率
CREATE TABLE monthly_sales (
    month DATE,
    sales DECIMAL(10,2)
);

SELECT
    month,
    sales,
    LAG(sales, 1) OVER (ORDER BY month) as prev_month_sales,
    ROUND(
        (sales - LAG(sales, 1) OVER (ORDER BY month)) * 100.0 /
        LAG(sales, 1) OVER (ORDER BY month), 2
    ) as growth_rate
FROM monthly_sales;

7.7 窗口函数性能优化

-- 1. 使用合适的索引
CREATE INDEX idx_dept_salary ON employees(department, salary);

-- 2. 避免不必要的窗口计算
-- 不推荐:在子查询中重复计算
SELECT * FROM (
    SELECT name, ROW_NUMBER() OVER () as rn FROM employees
) t WHERE rn <= 10;

-- 推荐:使用 LIMIT
SELECT name FROM employees LIMIT 10;

-- 3. 使用 CTE 提高可读性
WITH ranked AS (...)
SELECT * FROM ranked WHERE rank <= 5;

-- 4. 使用 EXPLAIN 分析执行计划
EXPLAIN SELECT
    name,
    RANK() OVER (ORDER BY salary DESC)
FROM employees;

-- 5. 避免在大表上使用无分区的窗口函数
-- 尽量使用 PARTITION BY 限制窗口范围

7.8 窗口函数 vs 聚合函数

特性 窗口函数 聚合函数 + GROUP BY
返回行数 保留原始行数 分组后行数减少
使用场景 分析、排名、累计 统计汇总
OVER 子句 必需 不支持
SELECT 列 可以是任意列 必须是 GROUP BY 列或聚合列
排序支持 支持 ORDER BY 不支持
窗口帧 支持 ROWS/RANGE 不支持
-- 对比示例

-- 聚合函数(行数减少)
SELECT
    department,
    AVG(salary) as avg_salary
FROM employees
GROUP BY department;
-- 返回:2 行(每个部门一行)

-- 窗口函数(保留原始行)
SELECT
    name,
    department,
    salary,
    AVG(salary) OVER (PARTITION BY department) as avg_salary
FROM employees;
-- 返回:6 行(保留每个员工)

8. JSON 增强操作(MySQL 5.7+/8.0+)

MySQL 提供了强大的 JSON 支持,包括存储、查询、修改和索引等功能。JSON 类型允许你存储半结构化数据,同时保持高效的查询性能。

flowchart LR
    subgraph JSON 操作
    A[存储 JSON] --> B[查询提取]
    B --> C[修改更新]
    C --> D[验证检查]
    D --> E[索引加速]
    end

    subgraph 应用场景
    F[用户配置] --> A
    G[日志数据] --> A
    H[动态表单] --> A
    I[API 响应] --> A
    end

    style A fill:#e3f2fd,stroke:#1565c0
    style E fill:#c8e6c9,stroke:#2e7d32

8.1 JSON 基础操作

-- 创建包含 JSON 列的表
CREATE TABLE products (
    id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(100) NOT NULL,
    price DECIMAL(10, 2),
    attributes JSON,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 插入 JSON 数据(自动验证格式)
INSERT INTO products (name, price, attributes) VALUES
('MacBook Pro', 12999.00, '{
    "brand": "Apple",
    "color": "silver",
    "specs": {
        "cpu": "M2 Pro",
        "memory": "16GB",
        "storage": "512GB SSD"
    },
    "tags": ["laptop", "premium", "business"]
}'),
('ThinkPad X1', 9999.00, '{
    "brand": "Lenovo",
    "color": "black",
    "specs": {
        "cpu": "i7-1260P",
        "memory": "32GB",
        "storage": "1TB SSD"
    },
    "tags": ["laptop", "business", "lightweight"]
}');

-- 使用 JSON_OBJECT 构建 JSON
SELECT
    id,
    name,
    JSON_OBJECT(
        'brand', 'Dell',
        'price', price,
        'in_stock', TRUE
    ) as json_info
FROM products;

-- 使用 JSON_ARRAY 构建数组
SELECT JSON_ARRAY('red', 'green', 'blue') as colors;

-- 验证 JSON 格式
SELECT JSON_VALID('{"name": "test"}') as valid;  -- 返回 1
SELECT JSON_VALID('{name: test}') as valid;      -- 返回 0

8.2 JSON 查询操作

-- 1. JSON_EXTRACT - 提取值(支持路径表达式)
SELECT
    name,
    JSON_EXTRACT(attributes, '$.brand') as brand,
    JSON_EXTRACT(attributes, '$.specs.cpu') as cpu,
    JSON_EXTRACT(attributes, '$.tags[0]') as first_tag
FROM products;

-- 使用 -> 操作符(简写形式)
SELECT
    name,
    attributes->>'$.brand' as brand,        -- ->> 返回纯文本
    attributes->'$.specs.memory' as memory   -- -> 返回 JSON 字符串
FROM products;

-- 2. JSON_CONTAINS - 检查是否包含某值
SELECT * FROM products
WHERE JSON_CONTAINS(attributes->'$.tags', '"laptop"');

-- 3. JSON_CONTAINS_PATH - 检查路径是否存在
SELECT
    name,
    JSON_CONTAINS_PATH(attributes, 'one', '$.specs.cpu') as has_cpu,
    JSON_CONTAINS_PATH(attributes, 'all', '$.specs.cpu', '$.specs.gpu') as has_all
FROM products;

-- 4. JSON_SEARCH - 搜索值
SELECT
    name,
    JSON_SEARCH(attributes, 'one', 'business') as path
FROM products;
-- 返回:$.tags[1] 或 $.tags[2]

-- 5. JSON_KEYS - 获取所有键
SELECT JSON_KEYS(attributes) as all_keys FROM products;
SELECT JSON_KEYS(attributes, '$.specs') as spec_keys FROM products;

-- 6. JSON_LENGTH - 获取长度
SELECT
    name,
    JSON_LENGTH(attributes) as total_keys,
    JSON_LENGTH(attributes->'$.tags') as tag_count
FROM products;

-- 7. JSON_TYPE - 获取 JSON 值类型
SELECT JSON_TYPE(attributes->'$.brand') as type;  -- 返回 STRING
SELECT JSON_TYPE(attributes->'$.tags') as type;   -- 返回 ARRAY

8.3 JSON 修改操作

-- 1. JSON_SET - 设置值(存在则更新,不存在则添加)
UPDATE products
SET attributes = JSON_SET(
    attributes,
    '$.price_updated', TRUE,
    '$.discount', 0.15
)
WHERE id = 1;

-- 2. JSON_INSERT - 插入值(只添加不更新)
UPDATE products
SET attributes = JSON_INSERT(
    attributes,
    '$.warehouse', 'Beijing',
    '$.stock_count', 50
)
WHERE id = 1;

-- 3. JSON_REPLACE - 替换值(只更新不添加)
UPDATE products
SET attributes = JSON_REPLACE(
    attributes,
    '$.price', 11999.00
)
WHERE id = 1;

-- 4. JSON_REMOVE - 删除值
UPDATE products
SET attributes = JSON_REMOVE(
    attributes,
    '$.discount',
    '$.price_updated'
)
WHERE id = 1;

-- 5. JSON_ARRAY_APPEND - 追加到数组
UPDATE products
SET attributes = JSON_ARRAY_APPEND(
    attributes,
    '$.tags', 'new_arrival'
)
WHERE id = 1;

-- 6. JSON_ARRAY_INSERT - 插入到数组指定位置
UPDATE products
SET attributes = JSON_ARRAY_INSERT(
    attributes,
    '$.tags[1]', 'featured'
)
WHERE id = 1;

-- 7. JSON_MERGE_PATCH - 合并 JSON(覆盖模式)
UPDATE products
SET attributes = JSON_MERGE_PATCH(
    attributes,
    '{"warranty": "2 years", "return_policy": "30 days"}'
)
WHERE id = 1;

-- 8. JSON_MERGE_PRESERVE - 合并 JSON(保留模式)
SELECT JSON_MERGE_PRESERVE(
    '{"tags": ["laptop"]}',
    '{"tags": ["business"]}'
) as merged;
-- 返回:{"tags": ["laptop", "business"]}

8.4 JSON 聚合函数

-- 1. JSON_ARRAYAGG - 聚合为数组
SELECT
    JSON_ARRAYAGG(name) as all_products
FROM products;

-- 按品牌聚合价格数组
SELECT
    attributes->>'$.brand' as brand,
    JSON_ARRAYAGG(price) as prices
FROM products
GROUP BY attributes->>'$.brand';

-- 2. JSON_OBJECTAGG - 聚合为对象
SELECT JSON_OBJECTAGG(id, name) as product_map
FROM products;

-- 构建嵌套 JSON 结构
SELECT
    JSON_OBJECT(
        'brand', attributes->>'$.brand',
        'products', JSON_ARRAYAGG(
            JSON_OBJECT(
                'id', id,
                'name', name,
                'price', price
            )
        )
    ) as brand_products
FROM products
GROUP BY attributes->>'$.brand';

-- 3. 组合使用示例
CREATE TABLE orders (
    id INT PRIMARY KEY,
    customer_id INT,
    product_id INT,
    quantity INT,
    order_date DATE
);

INSERT INTO orders VALUES
(1, 100, 1, 2, '2024-01-15'),
(2, 100, 2, 1, '2024-01-16'),
(3, 101, 1, 3, '2024-01-17');

-- 为每个客户生成订单 JSON 报告
SELECT
    customer_id,
    JSON_OBJECT(
        'total_orders', COUNT(*),
        'total_quantity', SUM(quantity),
        'orders', JSON_ARRAYAGG(
            JSON_OBJECT(
                'order_id', id,
                'product_id', product_id,
                'quantity', quantity,
                'date', order_date
            )
        )
    ) as customer_report
FROM orders
GROUP BY customer_id;

8.5 JSON 索引优化

-- 1. 虚拟列 + 索引(推荐方式)
ALTER TABLE products
ADD COLUMN brand VARCHAR(50)
GENERATED ALWAYS AS (attributes->>'$.brand') VIRTUAL;

CREATE INDEX idx_brand ON products(brand);

-- 查询自动使用索引
SELECT * FROM products WHERE brand = 'Apple';

-- 2. 多列索引
ALTER TABLE products
ADD COLUMN cpu VARCHAR(50)
GENERATED ALWAYS AS (attributes->>'$.specs.cpu') VIRTUAL;

CREATE INDEX idx_brand_cpu ON products(brand, cpu);

-- 3. 前缀索引(长字符串)
ALTER TABLE products
ADD COLUMN description VARCHAR(500)
GENERATED ALWAYS AS (attributes->>'$.description') VIRTUAL;

CREATE INDEX idx_desc_prefix ON products(description(50));

-- 4. 查看 JSON 列的索引使用情况
EXPLAIN SELECT * FROM products
WHERE attributes->>'$.brand' = 'Apple';

-- 5. 使用覆盖索引优化 JSON 查询
CREATE INDEX idx_covering ON products(brand, price, name);

SELECT brand, price, name FROM products WHERE brand = 'Apple';
-- Extra 显示 "Using index" 表示使用覆盖索引

8.6 JSON 实战应用场景

-- 场景 1: 动态表单数据存储
CREATE TABLE form_submissions (
    id INT PRIMARY KEY AUTO_INCREMENT,
    form_id INT,
    user_id INT,
    responses JSON,
    submitted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

INSERT INTO form_submissions (form_id, user_id, responses) VALUES
(1, 100, '{
    "q1": "非常满意",
    "q2": "界面设计",
    "q3": ["易用", "美观", "快速"],
    "rating": 5
}');

-- 查询评分大于 4 的提交
SELECT * FROM form_submissions
WHERE responses->>'$.rating' > 4;

-- 场景 2: 日志数据存储
CREATE TABLE application_logs (
    id BIGINT PRIMARY KEY AUTO_INCREMENT,
    log_level VARCHAR(20),
    log_data JSON,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

INSERT INTO application_logs (log_level, log_data) VALUES
('ERROR', '{
    "error_code": "DB001",
    "message": "Connection timeout",
    "stack_trace": "...",
    "user_id": 123,
    "request_id": "abc-123"
}');

-- 查询特定错误码的日志
SELECT COUNT(*) FROM application_logs
WHERE log_data->>'$.error_code' = 'DB001';

-- 场景 3: 电商商品多规格
CREATE TABLE product_variants (
    id INT PRIMARY KEY,
    product_id INT,
    variant_data JSON
);

INSERT INTO product_variants VALUES
(1, 100, '{
    "sku": "MBP-M2-16-512-SV",
    "options": {
        "color": "Silver",
        "memory": "16GB",
        "storage": "512GB"
    },
    "price": 12999,
    "stock": 50
}');

-- 查询特定配置的商品
SELECT * FROM product_variants
WHERE variant_data->>'$.options.memory' = '16GB'
AND variant_data->>'$.options.color' = 'Silver';

-- 场景 4: 用户画像标签系统
CREATE TABLE user_profiles (
    user_id INT PRIMARY KEY,
    profile_data JSON
);

INSERT INTO user_profiles VALUES
(1, '{
    "demographics": {
        "age": 30,
        "gender": "M",
        "city": "Beijing"
    },
    "interests": ["technology", "music", "travel"],
    "purchase_history": {
        "total_orders": 25,
        "avg_order_value": 500,
        "last_purchase": "2024-01-15"
    },
    "preferences": {
        "newsletter": true,
        "sms_notifications": false
    }
}');

-- 查询北京地区对科技感兴趣的用户
SELECT user_id FROM user_profiles
WHERE profile_data->>'$.demographics.city' = 'Beijing'
AND JSON_SEARCH(profile_data->'$.interests', 'one', 'technology') IS NOT NULL;

-- 场景 5: API 响应缓存
CREATE TABLE api_cache (
    endpoint VARCHAR(200) PRIMARY KEY,
    response_data JSON,
    cache_key VARCHAR(100),
    expires_at TIMESTAMP
);

INSERT INTO api_cache VALUES
('/api/users/1', '{"id": 1, "name": "John", "email": "john@example.com"}', 'user_1', NOW() + INTERVAL 1 HOUR);

-- 查询未过期的缓存
SELECT response_data FROM api_cache
WHERE endpoint = '/api/users/1'
AND expires_at > NOW();

8.7 JSON 最佳实践

场景 推荐方案 说明
频繁查询的字段 虚拟列 + 索引 提取为虚拟列并创建索引
灵活配置数据 JSON 类型 用户偏好、系统配置
动态表单 JSON 类型 字段不固定的场景
日志存储 JSON 类型 结构化日志数据
频繁更新的字段 普通列 避免 JSON 频繁解析开销
需要外键约束 普通列 JSON 不支持外键
大数据量查询 普通列 + 索引 JSON 查询性能较低
-- JSON 使用注意事项:

-- 1. JSON 验证失败会报错
INSERT INTO products (name, attributes)
VALUES ('Test', '{invalid json}');  -- ❌ 报错

-- 2. JSON 路径区分大小写
SELECT attributes->'$.Brand' FROM products;  -- 返回 NULL
SELECT attributes->'$.brand' FROM products;  -- ✅ 正确

-- 3. 使用 ->> 获取纯文本值
SELECT attributes->'$.brand' FROM products;   -- 返回 "Apple"(带引号)
SELECT attributes->>'$.brand' FROM products;  -- 返回 Apple(纯文本)

-- 4. JSON 比较需要类型转换
SELECT * FROM products
WHERE attributes->>'$.specs.memory' = '16GB';  -- ✅ 字符串比较

-- 5. 批量更新 JSON 使用 JSON_MERGE_PATCH
UPDATE products
SET attributes = JSON_MERGE_PATCH(
    attributes,
    '{"updated": true, "update_time": "2024-01-15"}'
);