高级特性
存储引擎对比
| 特性 | InnoDB | MyISAM | Memory | CSV |
|---|---|---|---|---|
| 事务支持 | ✅ ACID | ❌ | ❌ | ❌ |
| 行锁 | ✅ | 表锁 | ✅ | 表锁 |
| 外键 | ✅ | ❌ | ❌ | ❌ |
| 全文索引 | ✅ 5.6+ | ✅ | ❌ | ❌ |
| 集群索引 | ✅ | ❌ | N/A | ❌ |
| 崩溃恢复 | ✅ 自动 | ❌ | ❌ | ❌ |
| 存储限制 | 64TB | 256TB | RAM | 无 |
| 适用场景 | 生产环境 事务需求 |
只读/日志 全文搜索 |
缓存/临时 高速访问 |
数据交换 日志分析 |
1. 索引详解(AI 驱动)
1.1 什么是索引?
索引是数据库中用于加速数据检索的数据结构。就像书籍的目录一样,索引可以帮助快速定位数据,而不需要扫描整个表。合理的索引设计可以显著提升查询性能,但也会占用存储空间并影响写入性能。
-- 查看表的当前索引状态
SHOW INDEX FROM users;
-- 分析表获取索引使用统计
ANALYZE TABLE users;
-- 查看索引统计信息
SELECT * FROM mysql.innodb_index_stats
WHERE database_name = DATABASE() AND table_name = 'users';
1.2 索引类型(点击展开)
索引类型概览
| 类型 | 关键字 | 特点 | 适用场景 |
|---|---|---|---|
| 主键索引 | PRIMARY KEY | 唯一、非空、自动创建聚簇索引 | 唯一标识记录 |
| 唯一索引 | UNIQUE | 值唯一,可为NULL | 邮箱、手机号等唯一字段 |
| 普通索引 | INDEX/KEY | 最基本的索引,无限制 | 加速WHERE查询 |
| 复合索引 | INDEX(a,b,c) | 多列组合,遵循最左前缀 | 多条件查询 |
| 全文索引 | FULLTEXT | 文本搜索,支持中文分词 | 文章、内容搜索 |
| 空间索引 | SPATIAL | 地理空间数据 | LBS、地图应用 |
| 前缀索引 | INDEX(col(10)) | 只索引前缀部分 | 长字符串列 |
1.2.1 主键索引(PRIMARY KEY)
-- 主键索引(自动创建,唯一且非空)
CREATE TABLE users (
id INT AUTO_INCREMENT PRIMARY KEY,
username VARCHAR(100)
);
-- 或者单独定义
CREATE TABLE users (
id INT AUTO_INCREMENT,
username VARCHAR(100),
PRIMARY KEY (id)
);
-- 复合主键
CREATE TABLE user_scores (
user_id INT,
game_id INT,
score INT,
PRIMARY KEY (user_id, game_id)
);
1.2.2 唯一索引(UNIQUE)
-- 创建唯一索引(确保列值唯一)
CREATE UNIQUE INDEX idx_email ON users(email);
-- 在表定义中创建
CREATE TABLE users (
id INT AUTO_INCREMENT PRIMARY KEY,
username VARCHAR(100),
email VARCHAR(255) UNIQUE
);
-- 复合唯一索引
CREATE UNIQUE INDEX idx_username_email ON users(username, email);
1.2.3 普通索引(INDEX)
-- 创建普通索引
CREATE INDEX idx_username ON users(username);
-- 复合索引(最左前缀原则)
CREATE INDEX idx_age_email ON users(age, email);
-- 使用复合索引的查询会命中索引
SELECT * FROM users WHERE age = 25; -- 命中
SELECT * FROM users WHERE age = 25 AND email = 'test@example.com'; -- 命中
SELECT * FROM users WHERE email = 'test@example.com'; -- 不命中
-- 添加索引
ALTER TABLE users ADD INDEX idx_created_at (created_at);
1.2.4 全文索引(FULLTEXT)
-- 全文索引(用于文本搜索)
CREATE FULLTEXT INDEX idx_content ON articles(content);
-- 全文搜索
SELECT * FROM articles
WHERE MATCH(content) AGAINST('MySQL 教程' IN NATURAL LANGUAGE MODE);
-- 布尔模式搜索
SELECT * FROM articles
WHERE MATCH(content, title) AGAINST('MySQL +教程 -基础' IN BOOLEAN MODE);
-- 查询扩展模式
SELECT * FROM articles
WHERE MATCH(content) AGAINST('MySQL' WITH QUERY EXPANSION);
-- 中文分词支持
CREATE TABLE articles (
id INT AUTO_INCREMENT PRIMARY KEY,
content TEXT,
FULLTEXT INDEX ft_content (content) WITH PARSER ngram
);
1.2.5 空间索引(SPATIAL)
-- 空间索引(用于地理数据)
CREATE TABLE locations (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100),
location GEOMETRY NOT NULL,
SPATIAL INDEX idx_location (location)
);
-- 插入空间数据
INSERT INTO locations (name, location) VALUES
('Beijing', ST_GeomFromText('POINT(116.4074 39.9042)')),
('Shanghai', ST_GeomFromText('POINT(121.4737 31.2304)'));
-- 空间查询
SELECT name, ST_AsText(location) as coordinates
FROM locations
WHERE ST_Distance_Sphere(
location,
ST_GeomFromText('POINT(116.4074 39.9042)')
) < 50000; -- 50公里范围内
1.2.6 前缀索引
-- 前缀索引(节省空间,适用于长字符串)
CREATE INDEX idx_email_prefix ON users(email(20));
-- 查看前缀长度建议
SELECT
COUNT(DISTINCT LEFT(email, 10)) as distinct_10,
COUNT(DISTINCT LEFT(email, 20)) as distinct_20,
COUNT(DISTINCT LEFT(email, 30)) as distinct_30
FROM users;
-- 创建合适长度的前缀索引
CREATE INDEX idx_title_prefix ON articles(title(50));
1.2.7 函数索引
-- 函数索引(对表达式建立索引)
CREATE INDEX idx_email_lower ON users((LOWER(email)));
-- 使用函数索引的查询
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';
-- 计算列索引
ALTER TABLE products ADD COLUMN discounted_price DECIMAL(10,2);
CREATE INDEX idx_discounted_price ON products((price * discount_rate));
-- JSON 列索引
CREATE INDEX idx_user_prefs ON users((JSON_EXTRACT(preferences, '$.language')));
SELECT * FROM users
WHERE JSON_EXTRACT(preferences, '$.language') = 'zh-CN';
1.3 索引设计原则
1.3.1 选择合适的列建立索引
-- 建议索引的列类型:
-- 1. 经常在 WHERE 条件中使用的列
CREATE INDEX idx_status ON orders(status);
-- 2. 经常在 JOIN 条件中使用的列
CREATE INDEX idx_user_id ON orders(user_id);
-- 3. 经常在 ORDER BY 中使用的列
CREATE INDEX idx_created_at ON posts(created_at);
-- 4. 经常在 GROUP BY 中使用的列
CREATE INDEX idx_category ON posts(category_id);
-- 5. 唯一性高的列
CREATE UNIQUE INDEX idx_username ON users(username);
1.3.2 复合索引的列顺序
-- 复合索引设计原则:
-- 1. 将选择性高的列放在前面
CREATE INDEX idx_status_created ON orders(status, created_at);
-- 2. 考虑查询的列顺序
-- 常见查询:SELECT * FROM orders WHERE user_id = ? AND status = ?
CREATE INDEX idx_user_status ON orders(user_id, status);
-- 3. 最左前缀原则
-- 索引 (user_id, status, created_at)
-- 可以使用索引的查询:
SELECT * FROM orders WHERE user_id = 1;
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending';
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending' AND created_at > '2024-01-01';
-- 不可以使用索引的查询:
SELECT * FROM orders WHERE status = 'pending'; -- 跳过最左列
SELECT * FROM orders WHERE created_at > '2024-01-01'; -- 跳过前两列
1.3.3 覆盖索引
-- 覆盖索引(索引包含查询所需的所有列)
-- 索引包含:user_id, status, total_amount
CREATE INDEX idx_user_status_amount ON orders(user_id, status, total_amount);
-- 查询只使用索引,不需要回表
SELECT user_id, status, total_amount
FROM orders
WHERE user_id = 1 AND status = 'completed';
-- 使用 EXPLAIN 验证是否使用覆盖索引
EXPLAIN SELECT user_id, status, total_amount
FROM orders WHERE user_id = 1 AND status = 'completed';
-- Extra 字段显示 "Using index" 表示使用了覆盖索引
1.4 索引管理
1.4.1 查看索引信息
-- 查看表的所有索引
SHOW INDEX FROM users;
-- 查看索引的统计信息
SELECT * FROM mysql.innodb_index_stats
WHERE database_name = DATABASE() AND table_name = 'users';
-- 查看索引使用情况
SELECT
table_name,
index_name,
cardinality,
nullable,
index_type
FROM information_schema.statistics
WHERE table_schema = DATABASE()
ORDER BY table_name, index_name;
-- 查看未使用的索引
SELECT * FROM sys.schema_unused_indexes
WHERE object_schema = DATABASE();
1.4.2 索引维护
-- 分析表(更新索引统计信息)
ANALYZE TABLE users;
-- 优化表(重建表和索引)
OPTIMIZE TABLE users;
-- 重建索引
ALTER TABLE users ENGINE=InnoDB;
-- 清理碎片
ALTER TABLE users ENGINE=InnoDB;
-- 禁用索引(批量导入时提升性能)
ALTER TABLE users DISABLE KEYS;
-- 批量插入数据
ALTER TABLE users ENABLE KEYS;
1.4.3 索引删除
-- 删除索引
DROP INDEX idx_username ON users;
-- 删除主键(如果有自增列需要先删除)
ALTER TABLE users DROP PRIMARY KEY;
-- 删除多个索引
ALTER TABLE users
DROP INDEX idx_email,
DROP INDEX idx_age;
1.5 AI 智能索引优化
-- AI 自动索引推荐
SELECT * FROM sys.ai_index_recommendations
WHERE table_name = 'users'
ORDER BY potential_improvement DESC;
-- 应用 AI 推荐的索引
CALL sys.apply_ai_index_recommendations('users');
-- 查看 AI 优化建议详情
SELECT * FROM sys.ai_query_analysis
WHERE query_time > NOW() - INTERVAL 1 DAY;
-- AI 实时监控索引效果
SELECT
index_name,
used_count,
avg_access_time,
improvement_percentage
FROM sys.ai_index_performance_monitor
WHERE table_name = 'users';
1.6 索引性能优化技巧
1.6.1 隐藏索引(测试性能)
-- 创建隐藏索引(不实际使用,用于测试)
CREATE INDEX idx_test ON users(username) INVISIBLE;
-- 将已有索引设为隐藏
ALTER TABLE users ALTER INDEX idx_username INVISIBLE;
-- 显示隐藏索引
ALTER TABLE users ALTER INDEX idx_username VISIBLE;
-- 查看隐藏索引
SELECT index_name, is_visible
FROM information_schema.statistics
WHERE table_schema = DATABASE() AND table_name = 'users' AND is_visible = 'NO';
1.6.2 索引提示
-- 强制使用指定索引
SELECT * FROM users USE INDEX (idx_username) WHERE username = 'test';
-- 强制不使用索引(全表扫描)
SELECT * FROM users IGNORE INDEX (idx_username) WHERE username = 'test';
-- 指定多个索引
SELECT * FROM users USE INDEX (idx_username, idx_email)
WHERE username = 'test' OR email = 'test@example.com';
1.6.3 索引下推优化
-- MySQL 8.0+ 支持索引下推
-- 对于复合索引,条件过滤在存储引擎层完成
-- 示例:索引 (name, age)
-- 查询:SELECT * FROM users WHERE name = 'test' AND age > 25
-- MySQL 会先在索引中过滤 age > 25,减少回表次数
-- 查看是否使用了索引下推
EXPLAIN SELECT * FROM users
WHERE name = 'test' AND age > 25;
-- Extra 字段显示 "Using index condition" 表示使用了索引下推
1.7 索引最佳实践
-- 最佳实践总结:
-- 1. 为 WHERE、JOIN、ORDER BY、GROUP BY 中的列创建索引
CREATE INDEX idx_user_status ON orders(user_id, status);
-- 2. 避免在区分度低的列上创建索引(如性别、布尔值)
-- 不推荐:CREATE INDEX idx_gender ON users(gender);
-- 3. 不要过度索引(索引会占用空间并降低写入性能)
-- 定期检查未使用的索引并删除
SELECT * FROM sys.schema_unused_indexes;
-- 4. 使用 EXPLAIN 分析查询计划
EXPLAIN SELECT * FROM users WHERE age > 25;
-- 5. 定期维护索引
ANALYZE TABLE users; -- 更新统计信息
OPTIMIZE TABLE users; -- 重建表和索引
-- 6. 使用覆盖索引减少回表
CREATE INDEX idx_covering ON orders(user_id, status, total_amount);
-- 7. 合理使用复合索引,遵循最左前缀原则
CREATE INDEX idx_user_created ON orders(user_id, created_at);
-- 8. 大表删除和更新时考虑索引影响
-- 可以先禁用索引,操作完成后再启用
ALTER TABLE large_table DISABLE KEYS;
-- 执行批量操作
ALTER TABLE large_table ENABLE KEYS;
-- 9. 利用 AI 智能推荐
CALL sys.recommend_indexes('your_database');
-- 10. 监控索引性能,及时调整
SELECT * FROM sys.schema_index_statistics
WHERE table_schema = DATABASE();
1.8 索引常见问题
1.8.1 索引失效的场景
-- 索引失效的常见场景:
-- 1. 使用函数或表达式
SELECT * FROM users WHERE LOWER(username) = 'test'; -- username 索引失效
-- 解决:创建函数索引
CREATE INDEX idx_username_lower ON users((LOWER(username)));
-- 2. 类型不匹配
SELECT * FROM users WHERE id = '1'; -- id 是数字类型,字符串导致索引失效
-- 解决:保持类型一致
SELECT * FROM users WHERE id = 1;
-- 3. LIKE 查询以通配符开头
SELECT * FROM users WHERE username LIKE '%test%'; -- 索引失效
-- 解决:使用全文索引或避免前导通配符
SELECT * FROM users WHERE username LIKE 'test%'; -- 索引有效
-- 4. OR 条件
SELECT * FROM users WHERE username = 'test' OR email = 'test@example.com';
-- 如果两个字段都有索引,会分别使用,但效率可能不如 UNION
-- 解决:使用 UNION ALL
SELECT * FROM users WHERE username = 'test'
UNION ALL
SELECT * FROM users WHERE email = 'test@example.com';
-- 5. NOT IN, NOT EXISTS, <>
SELECT * FROM users WHERE username <> 'test'; -- 索引可能失效
-- 6. 查询条件中包含 NULL
SELECT * FROM users WHERE username IS NULL; -- 如果 username 有索引,可以命中
1.8.2 索引碎片问题
-- 检测索引碎片
SELECT
table_name,
engine,
ROUND(data_length / 1024 / 1024, 2) as data_mb,
ROUND(index_length / 1024 / 1024, 2) as index_mb,
ROUND(data_free / 1024 / 1024, 2) as free_mb
FROM information_schema.tables
WHERE table_schema = DATABASE()
ORDER BY data_free DESC;
-- 重建索引和表(消除碎片)
OPTIMIZE TABLE users;
-- 或者重建表
ALTER TABLE users ENGINE=InnoDB;
-- 对于大表,可以使用 pt-online-schema-change 在线重建
-- pt-online-schema-change --alter "ENGINE=InnoDB" D=your_database,t=users --execute
2. 事务处理(支持分布式事务)
事务是数据库管理系统执行过程中的一个逻辑单位,由一系列SQL语句组成。事务具有ACID特性:原子性(Atomicity)、一致性(Consistency)、隔离性(Isolation)、持久性(Durability)。
flowchart TD
A[开始事务] --> B{执行SQL语句}
B --> C[写UNDO日志]
C --> D[修改数据缓存]
D --> E{操作成功?}
E -->|是| F{提交还是回滚?}
E -->|否| G[回滚事务]
G --> H[读取UNDO日志]
H --> I[恢复数据]
I --> J[结束事务]
F -->|COMMIT| K[写入COMMIT标记]
K --> L[刷新REDO日志到磁盘]
L --> M[释放锁]
M --> J
F -->|ROLLBACK| G
style A fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
style J fill:#c8e6c9,stroke:#2e7d32,stroke-width:2px
style G fill:#ffcdd2,stroke:#c62828,stroke-width:2px
style L fill:#c8e6c9,stroke:#2e7d32,stroke-width:2px
ACID 特性说明:
| 特性 | 描述 | 实现原理 |
|---|---|---|
| 原子性 (A) | 事务中的所有操作要么全部成功,要么全部失败 | UNDO日志 |
| 一致性 (C) | 事务执行前后,数据库状态保持一致 | 约束、触发器 |
| 隔离性 (I) | 并发事务之间互不干扰 | 锁机制、MVCC |
| 持久性 (D) | 事务提交后,修改永久保存 | REDO日志 |
-- 开始事务
START TRANSACTION;
-- 执行 SQL 语句
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
-- 提交事务(自动使用 2PC)
COMMIT;
-- 或回滚事务
ROLLBACK;
-- 保存点
SAVEPOINT sp1;
UPDATE accounts SET balance = balance - 50 WHERE id = 1;
-- 回滚到保存点
ROLLBACK TO sp1;
-- 分布式事务(XA 事务改进)
XA START 'xid1';
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
XA END 'xid1';
XA PREPARE 'xid1';
XA COMMIT 'xid1';
3. 视图(物化视图)
-- 创建普通视图
CREATE VIEW user_summary AS
SELECT
username,
email,
TIMESTAMPDIFF(YEAR, created_at, NOW()) as years_active
FROM users;
-- 创建物化视图(MySQL 9.x 新增)
CREATE MATERIALIZED VIEW mv_user_stats
REFRESH EVERY 1 HOUR
AS
SELECT
COUNT(*) as total_users,
AVG(age) as avg_age,
MAX(created_at) as last_signup
FROM users;
-- 手动刷新物化视图
REFRESH MATERIALIZED VIEW mv_user_stats;
-- 使用视图
SELECT * FROM user_summary;
SELECT * FROM mv_user_stats;
-- 删除视图
DROP VIEW user_summary;
DROP MATERIALIZED VIEW mv_user_stats;
4. 存储过程(支持 JavaScript)
-- 传统存储过程
DELIMITER //
CREATE PROCEDURE get_users_by_age(IN min_age INT)
BEGIN
SELECT * FROM users WHERE age >= min_age;
END //
DELIMITER ;
-- JavaScript 存储过程(MySQL 9.x 新增)
DELIMITER //
CREATE PROCEDURE analyze_user_data()
LANGUAGE JAVASCRIPT AS
$$
const result = session.sql('SELECT * FROM users').execute();
const analysis = {
total: result.length,
avg_age: result.reduce((sum, row) => sum + row.age, 0) / result.length
};
return JSON.stringify(analysis);
$$ //
DELIMITER ;
-- 调用存储过程
CALL get_users_by_age(25);
CALL analyze_user_data();
-- 删除存储过程
DROP PROCEDURE get_users_by_age;
DROP PROCEDURE analyze_user_data;
5. 触发器(支持批量操作)
-- 创建触发器
DELIMITER //
CREATE TRIGGER before_user_insert
BEFORE INSERT ON users
FOR EACH ROW
BEGIN
IF NEW.age < 0 THEN
SET NEW.age = 0;
END IF;
IF NEW.email IS NULL THEN
SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Email 不能为空';
END IF;
END //
DELIMITER ;
-- 批量操作触发器(FOR EACH STATEMENT)
DELIMITER //
CREATE TRIGGER log_bulk_operation
AFTER INSERT ON users
FOR EACH STATEMENT
BEGIN
INSERT INTO audit_log (operation, row_count, timestamp)
VALUES ('BULK_INSERT', ROW_COUNT(), NOW());
END //
DELIMITER ;
-- 删除触发器
DROP TRIGGER before_user_insert;
6. 连接查询(LATERAL JOIN)
-- 内连接
SELECT u.username, o.order_id
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- LATERAL JOIN(MySQL 9.x 新增)
SELECT u.username, latest_orders.*
FROM users u
LEFT JOIN LATERAL (
SELECT o.order_id, o.total_amount
FROM orders o
WHERE o.user_id = u.id
ORDER BY o.order_date DESC
LIMIT 3
) AS latest_orders ON TRUE;
-- 递归 CTE(查询层级数据)
WITH RECURSIVE category_tree AS (
SELECT id, name, parent_id, 1 as level
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT c.id, c.name, c.parent_id, ct.level + 1
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree ORDER BY level, name;
7. 窗口函数(Window Functions)
窗口函数是 MySQL 8.0+ 引入的强大功能,用于对一组相关的行执行计算,同时保留原始行的结构。窗口函数在数据分析、排名统计、移动平均等场景中非常有用。
flowchart LR
subgraph 传统聚合
A1[GROUP BY] --> A2[分组聚合]
A2 --> A3[返回单行结果]
end
subgraph 窗口函数
B1[OVER 子句] --> B2[定义窗口范围]
B2 --> B3[返回多行结果]
end
style A1 fill:#e3f2fd,stroke:#1565c0
style A3 fill:#ffcdd2,stroke:#c62828
style B1 fill:#c8e6c9,stroke:#2e7d32
style B3 fill:#c8e6c9,stroke:#2e7d32
7.1 窗口函数基础语法
-- 窗口函数基本语法
SELECT
column1,
column2,
window_function() OVER (
PARTITION BY partition_column
ORDER BY order_column
ROWS BETWEEN start AND end
) as result_column
FROM table_name;
-- OVER 子句说明:
-- PARTITION BY: 将数据分组(类似 GROUP BY,但不减少行数)
-- ORDER BY: 窗口内排序
-- ROWS/RANGE: 定义窗口的物理范围
-- 示例数据准备
CREATE TABLE employees (
id INT PRIMARY KEY,
name VARCHAR(50),
department VARCHAR(50),
salary DECIMAL(10,2),
hire_date DATE
);
INSERT INTO employees VALUES
(1, '张三', '技术部', 15000, '2020-01-15'),
(2, '李四', '技术部', 18000, '2019-03-20'),
(3, '王五', '技术部', 12000, '2021-06-10'),
(4, '赵六', '销售部', 10000, '2020-08-05'),
(5, '钱七', '销售部', 13000, '2018-11-12'),
(6, '孙八', '销售部', 11000, '2022-02-28');
7.2 排名函数
-- 1. ROW_NUMBER(): 连续排名,无并列
SELECT
name,
department,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as row_num
FROM employees;
-- 结果示例:
-- 张三 技术部 15000 2
-- 李四 技术部 18000 1
-- 王五 技术部 12000 3
-- 2. RANK(): 跳跃排名,有并列
SELECT
name,
department,
salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank
FROM employees;
-- 如果有并列:1, 2, 2, 4(跳过 3)
-- 3. DENSE_RANK(): 连续排名,有并列
SELECT
name,
department,
salary,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dense_rank
FROM employees;
-- 如果有并列:1, 2, 2, 3(不跳过)
-- 4. NTILE(n): 将数据分成 n 个桶
SELECT
name,
salary,
NTILE(3) OVER (ORDER BY salary DESC) as salary_quartile
FROM employees;
-- 将员工按薪资分为 3 组(高、中、低)
7.3 聚合窗口函数
-- 1. SUM() 窗口聚合
SELECT
name,
department,
salary,
SUM(salary) OVER (PARTITION BY department) as dept_total,
SUM(salary) OVER (ORDER BY salary DESC) as running_total
FROM employees;
-- dept_total: 部门总薪资(每行相同)
-- running_total: 累计薪资(从高到低累加)
-- 2. AVG() 窗口聚合
SELECT
name,
department,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg,
AVG(salary) OVER (ORDER BY hire_date) as running_avg
FROM employees;
-- dept_avg: 部门平均薪资
-- running_avg: 按入职时间累计平均
-- 3. COUNT() 窗口聚合
SELECT
name,
department,
salary,
COUNT(*) OVER (PARTITION BY department) as dept_count,
COUNT(*) OVER (ORDER BY hire_date) as employee_sequence
FROM employees;
-- 4. MIN/MAX() 窗口聚合
SELECT
name,
department,
salary,
MIN(salary) OVER (PARTITION BY department) as dept_min,
MAX(salary) OVER (PARTITION BY department) as dept_max,
MAX(salary) OVER () as company_max
FROM employees;
7.4 值函数
-- 1. LAG(): 访问前一行数据
SELECT
name,
hire_date,
salary,
LAG(salary, 1) OVER (ORDER BY hire_date) as prev_salary,
salary - LAG(salary, 1) OVER (ORDER BY hire_date) as salary_diff
FROM employees;
-- 计算与前一位入职员工的薪资差
-- 2. LEAD(): 访问后一行数据
SELECT
name,
hire_date,
salary,
LEAD(salary, 1) OVER (ORDER BY hire_date) as next_salary
FROM employees;
-- 3. FIRST_VALUE(): 获取窗口第一行
SELECT
name,
department,
salary,
FIRST_VALUE(name) OVER (PARTITION BY department ORDER BY salary DESC) as highest_paid
FROM employees;
-- 4. LAST_VALUE(): 获取窗口最后一行
SELECT
name,
department,
salary,
LAST_VALUE(name) OVER (
PARTITION BY department
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as lowest_paid
FROM employees;
-- 注意:LAST_VALUE 需要指定完整的窗口范围
-- 5. NTH_VALUE(): 获取第 N 行
SELECT
name,
salary,
NTH_VALUE(name, 2) OVER (ORDER BY salary DESC) as second_highest
FROM employees;
7.5 窗口帧定义
-- 窗口帧(Window Frame)定义窗口的具体范围
-- 1. ROWS vs RANGE
-- ROWS: 基于物理行位置
-- RANGE: 基于逻辑值范围
-- 2. 常用边界
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -- 从开始到当前行
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW -- 前 3 行到当前行
ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING -- 当前行到后 3 行
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING -- 前后各 1 行(共 3 行)
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING -- 整个分区
-- 3. 移动平均示例
SELECT
hire_date,
name,
salary,
AVG(salary) OVER (
ORDER BY hire_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as moving_avg_3
FROM employees;
-- 计算 3 行移动平均值
-- 4. 累计求和示例
SELECT
hire_date,
name,
salary,
SUM(salary) OVER (
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) as running_total
FROM employees;
7.6 实战应用场景
-- 场景 1: 计算员工薪资排名和部门占比
SELECT
name,
department,
salary,
RANK() OVER (ORDER BY salary DESC) as company_rank,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank,
ROUND(salary * 100.0 / SUM(salary) OVER (), 2) as salary_percentage
FROM employees;
-- 场景 2: 识别薪资高于部门平均的员工
WITH dept_avg AS (
SELECT
name,
department,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg_salary
FROM employees
)
SELECT
name,
department,
salary,
dept_avg_salary,
CASE
WHEN salary > dept_avg_salary THEN '高于平均'
WHEN salary = dept_avg_salary THEN '等于平均'
ELSE '低于平均'
END as salary_level
FROM dept_avg;
-- 场景 3: 计算连续 N 天登录的用户
CREATE TABLE user_logins (
user_id INT,
login_date DATE
);
WITH login_groups AS (
SELECT
user_id,
login_date,
DATE_SUB(login_date, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY) as grp
FROM user_logins
)
SELECT
user_id,
COUNT(*) as consecutive_days
FROM login_groups
GROUP BY user_id, grp
HAVING COUNT(*) >= 3;
-- 场景 4: Top-N 查询(每个部门薪资前 3 名)
WITH ranked_employees AS (
SELECT
name,
department,
salary,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank
FROM employees
)
SELECT name, department, salary, dept_rank
FROM ranked_employees
WHERE dept_rank <= 3;
-- 场景 5: 计算同比/环比增长率
CREATE TABLE monthly_sales (
month DATE,
sales DECIMAL(10,2)
);
SELECT
month,
sales,
LAG(sales, 1) OVER (ORDER BY month) as prev_month_sales,
ROUND(
(sales - LAG(sales, 1) OVER (ORDER BY month)) * 100.0 /
LAG(sales, 1) OVER (ORDER BY month), 2
) as growth_rate
FROM monthly_sales;
7.7 窗口函数性能优化
-- 1. 使用合适的索引
CREATE INDEX idx_dept_salary ON employees(department, salary);
-- 2. 避免不必要的窗口计算
-- 不推荐:在子查询中重复计算
SELECT * FROM (
SELECT name, ROW_NUMBER() OVER () as rn FROM employees
) t WHERE rn <= 10;
-- 推荐:使用 LIMIT
SELECT name FROM employees LIMIT 10;
-- 3. 使用 CTE 提高可读性
WITH ranked AS (...)
SELECT * FROM ranked WHERE rank <= 5;
-- 4. 使用 EXPLAIN 分析执行计划
EXPLAIN SELECT
name,
RANK() OVER (ORDER BY salary DESC)
FROM employees;
-- 5. 避免在大表上使用无分区的窗口函数
-- 尽量使用 PARTITION BY 限制窗口范围
7.8 窗口函数 vs 聚合函数
| 特性 | 窗口函数 | 聚合函数 + GROUP BY |
|---|---|---|
| 返回行数 | 保留原始行数 | 分组后行数减少 |
| 使用场景 | 分析、排名、累计 | 统计汇总 |
| OVER 子句 | 必需 | 不支持 |
| SELECT 列 | 可以是任意列 | 必须是 GROUP BY 列或聚合列 |
| 排序支持 | 支持 ORDER BY | 不支持 |
| 窗口帧 | 支持 ROWS/RANGE | 不支持 |
-- 对比示例
-- 聚合函数(行数减少)
SELECT
department,
AVG(salary) as avg_salary
FROM employees
GROUP BY department;
-- 返回:2 行(每个部门一行)
-- 窗口函数(保留原始行)
SELECT
name,
department,
salary,
AVG(salary) OVER (PARTITION BY department) as avg_salary
FROM employees;
-- 返回:6 行(保留每个员工)
8. JSON 增强操作(MySQL 5.7+/8.0+)
MySQL 提供了强大的 JSON 支持,包括存储、查询、修改和索引等功能。JSON 类型允许你存储半结构化数据,同时保持高效的查询性能。
flowchart LR
subgraph JSON 操作
A[存储 JSON] --> B[查询提取]
B --> C[修改更新]
C --> D[验证检查]
D --> E[索引加速]
end
subgraph 应用场景
F[用户配置] --> A
G[日志数据] --> A
H[动态表单] --> A
I[API 响应] --> A
end
style A fill:#e3f2fd,stroke:#1565c0
style E fill:#c8e6c9,stroke:#2e7d32
8.1 JSON 基础操作
-- 创建包含 JSON 列的表
CREATE TABLE products (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
price DECIMAL(10, 2),
attributes JSON,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 插入 JSON 数据(自动验证格式)
INSERT INTO products (name, price, attributes) VALUES
('MacBook Pro', 12999.00, '{
"brand": "Apple",
"color": "silver",
"specs": {
"cpu": "M2 Pro",
"memory": "16GB",
"storage": "512GB SSD"
},
"tags": ["laptop", "premium", "business"]
}'),
('ThinkPad X1', 9999.00, '{
"brand": "Lenovo",
"color": "black",
"specs": {
"cpu": "i7-1260P",
"memory": "32GB",
"storage": "1TB SSD"
},
"tags": ["laptop", "business", "lightweight"]
}');
-- 使用 JSON_OBJECT 构建 JSON
SELECT
id,
name,
JSON_OBJECT(
'brand', 'Dell',
'price', price,
'in_stock', TRUE
) as json_info
FROM products;
-- 使用 JSON_ARRAY 构建数组
SELECT JSON_ARRAY('red', 'green', 'blue') as colors;
-- 验证 JSON 格式
SELECT JSON_VALID('{"name": "test"}') as valid; -- 返回 1
SELECT JSON_VALID('{name: test}') as valid; -- 返回 0
8.2 JSON 查询操作
-- 1. JSON_EXTRACT - 提取值(支持路径表达式)
SELECT
name,
JSON_EXTRACT(attributes, '$.brand') as brand,
JSON_EXTRACT(attributes, '$.specs.cpu') as cpu,
JSON_EXTRACT(attributes, '$.tags[0]') as first_tag
FROM products;
-- 使用 -> 操作符(简写形式)
SELECT
name,
attributes->>'$.brand' as brand, -- ->> 返回纯文本
attributes->'$.specs.memory' as memory -- -> 返回 JSON 字符串
FROM products;
-- 2. JSON_CONTAINS - 检查是否包含某值
SELECT * FROM products
WHERE JSON_CONTAINS(attributes->'$.tags', '"laptop"');
-- 3. JSON_CONTAINS_PATH - 检查路径是否存在
SELECT
name,
JSON_CONTAINS_PATH(attributes, 'one', '$.specs.cpu') as has_cpu,
JSON_CONTAINS_PATH(attributes, 'all', '$.specs.cpu', '$.specs.gpu') as has_all
FROM products;
-- 4. JSON_SEARCH - 搜索值
SELECT
name,
JSON_SEARCH(attributes, 'one', 'business') as path
FROM products;
-- 返回:$.tags[1] 或 $.tags[2]
-- 5. JSON_KEYS - 获取所有键
SELECT JSON_KEYS(attributes) as all_keys FROM products;
SELECT JSON_KEYS(attributes, '$.specs') as spec_keys FROM products;
-- 6. JSON_LENGTH - 获取长度
SELECT
name,
JSON_LENGTH(attributes) as total_keys,
JSON_LENGTH(attributes->'$.tags') as tag_count
FROM products;
-- 7. JSON_TYPE - 获取 JSON 值类型
SELECT JSON_TYPE(attributes->'$.brand') as type; -- 返回 STRING
SELECT JSON_TYPE(attributes->'$.tags') as type; -- 返回 ARRAY
8.3 JSON 修改操作
-- 1. JSON_SET - 设置值(存在则更新,不存在则添加)
UPDATE products
SET attributes = JSON_SET(
attributes,
'$.price_updated', TRUE,
'$.discount', 0.15
)
WHERE id = 1;
-- 2. JSON_INSERT - 插入值(只添加不更新)
UPDATE products
SET attributes = JSON_INSERT(
attributes,
'$.warehouse', 'Beijing',
'$.stock_count', 50
)
WHERE id = 1;
-- 3. JSON_REPLACE - 替换值(只更新不添加)
UPDATE products
SET attributes = JSON_REPLACE(
attributes,
'$.price', 11999.00
)
WHERE id = 1;
-- 4. JSON_REMOVE - 删除值
UPDATE products
SET attributes = JSON_REMOVE(
attributes,
'$.discount',
'$.price_updated'
)
WHERE id = 1;
-- 5. JSON_ARRAY_APPEND - 追加到数组
UPDATE products
SET attributes = JSON_ARRAY_APPEND(
attributes,
'$.tags', 'new_arrival'
)
WHERE id = 1;
-- 6. JSON_ARRAY_INSERT - 插入到数组指定位置
UPDATE products
SET attributes = JSON_ARRAY_INSERT(
attributes,
'$.tags[1]', 'featured'
)
WHERE id = 1;
-- 7. JSON_MERGE_PATCH - 合并 JSON(覆盖模式)
UPDATE products
SET attributes = JSON_MERGE_PATCH(
attributes,
'{"warranty": "2 years", "return_policy": "30 days"}'
)
WHERE id = 1;
-- 8. JSON_MERGE_PRESERVE - 合并 JSON(保留模式)
SELECT JSON_MERGE_PRESERVE(
'{"tags": ["laptop"]}',
'{"tags": ["business"]}'
) as merged;
-- 返回:{"tags": ["laptop", "business"]}
8.4 JSON 聚合函数
-- 1. JSON_ARRAYAGG - 聚合为数组
SELECT
JSON_ARRAYAGG(name) as all_products
FROM products;
-- 按品牌聚合价格数组
SELECT
attributes->>'$.brand' as brand,
JSON_ARRAYAGG(price) as prices
FROM products
GROUP BY attributes->>'$.brand';
-- 2. JSON_OBJECTAGG - 聚合为对象
SELECT JSON_OBJECTAGG(id, name) as product_map
FROM products;
-- 构建嵌套 JSON 结构
SELECT
JSON_OBJECT(
'brand', attributes->>'$.brand',
'products', JSON_ARRAYAGG(
JSON_OBJECT(
'id', id,
'name', name,
'price', price
)
)
) as brand_products
FROM products
GROUP BY attributes->>'$.brand';
-- 3. 组合使用示例
CREATE TABLE orders (
id INT PRIMARY KEY,
customer_id INT,
product_id INT,
quantity INT,
order_date DATE
);
INSERT INTO orders VALUES
(1, 100, 1, 2, '2024-01-15'),
(2, 100, 2, 1, '2024-01-16'),
(3, 101, 1, 3, '2024-01-17');
-- 为每个客户生成订单 JSON 报告
SELECT
customer_id,
JSON_OBJECT(
'total_orders', COUNT(*),
'total_quantity', SUM(quantity),
'orders', JSON_ARRAYAGG(
JSON_OBJECT(
'order_id', id,
'product_id', product_id,
'quantity', quantity,
'date', order_date
)
)
) as customer_report
FROM orders
GROUP BY customer_id;
8.5 JSON 索引优化
-- 1. 虚拟列 + 索引(推荐方式)
ALTER TABLE products
ADD COLUMN brand VARCHAR(50)
GENERATED ALWAYS AS (attributes->>'$.brand') VIRTUAL;
CREATE INDEX idx_brand ON products(brand);
-- 查询自动使用索引
SELECT * FROM products WHERE brand = 'Apple';
-- 2. 多列索引
ALTER TABLE products
ADD COLUMN cpu VARCHAR(50)
GENERATED ALWAYS AS (attributes->>'$.specs.cpu') VIRTUAL;
CREATE INDEX idx_brand_cpu ON products(brand, cpu);
-- 3. 前缀索引(长字符串)
ALTER TABLE products
ADD COLUMN description VARCHAR(500)
GENERATED ALWAYS AS (attributes->>'$.description') VIRTUAL;
CREATE INDEX idx_desc_prefix ON products(description(50));
-- 4. 查看 JSON 列的索引使用情况
EXPLAIN SELECT * FROM products
WHERE attributes->>'$.brand' = 'Apple';
-- 5. 使用覆盖索引优化 JSON 查询
CREATE INDEX idx_covering ON products(brand, price, name);
SELECT brand, price, name FROM products WHERE brand = 'Apple';
-- Extra 显示 "Using index" 表示使用覆盖索引
8.6 JSON 实战应用场景
-- 场景 1: 动态表单数据存储
CREATE TABLE form_submissions (
id INT PRIMARY KEY AUTO_INCREMENT,
form_id INT,
user_id INT,
responses JSON,
submitted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO form_submissions (form_id, user_id, responses) VALUES
(1, 100, '{
"q1": "非常满意",
"q2": "界面设计",
"q3": ["易用", "美观", "快速"],
"rating": 5
}');
-- 查询评分大于 4 的提交
SELECT * FROM form_submissions
WHERE responses->>'$.rating' > 4;
-- 场景 2: 日志数据存储
CREATE TABLE application_logs (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
log_level VARCHAR(20),
log_data JSON,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO application_logs (log_level, log_data) VALUES
('ERROR', '{
"error_code": "DB001",
"message": "Connection timeout",
"stack_trace": "...",
"user_id": 123,
"request_id": "abc-123"
}');
-- 查询特定错误码的日志
SELECT COUNT(*) FROM application_logs
WHERE log_data->>'$.error_code' = 'DB001';
-- 场景 3: 电商商品多规格
CREATE TABLE product_variants (
id INT PRIMARY KEY,
product_id INT,
variant_data JSON
);
INSERT INTO product_variants VALUES
(1, 100, '{
"sku": "MBP-M2-16-512-SV",
"options": {
"color": "Silver",
"memory": "16GB",
"storage": "512GB"
},
"price": 12999,
"stock": 50
}');
-- 查询特定配置的商品
SELECT * FROM product_variants
WHERE variant_data->>'$.options.memory' = '16GB'
AND variant_data->>'$.options.color' = 'Silver';
-- 场景 4: 用户画像标签系统
CREATE TABLE user_profiles (
user_id INT PRIMARY KEY,
profile_data JSON
);
INSERT INTO user_profiles VALUES
(1, '{
"demographics": {
"age": 30,
"gender": "M",
"city": "Beijing"
},
"interests": ["technology", "music", "travel"],
"purchase_history": {
"total_orders": 25,
"avg_order_value": 500,
"last_purchase": "2024-01-15"
},
"preferences": {
"newsletter": true,
"sms_notifications": false
}
}');
-- 查询北京地区对科技感兴趣的用户
SELECT user_id FROM user_profiles
WHERE profile_data->>'$.demographics.city' = 'Beijing'
AND JSON_SEARCH(profile_data->'$.interests', 'one', 'technology') IS NOT NULL;
-- 场景 5: API 响应缓存
CREATE TABLE api_cache (
endpoint VARCHAR(200) PRIMARY KEY,
response_data JSON,
cache_key VARCHAR(100),
expires_at TIMESTAMP
);
INSERT INTO api_cache VALUES
('/api/users/1', '{"id": 1, "name": "John", "email": "john@example.com"}', 'user_1', NOW() + INTERVAL 1 HOUR);
-- 查询未过期的缓存
SELECT response_data FROM api_cache
WHERE endpoint = '/api/users/1'
AND expires_at > NOW();
8.7 JSON 最佳实践
| 场景 | 推荐方案 | 说明 |
|---|---|---|
| 频繁查询的字段 | 虚拟列 + 索引 | 提取为虚拟列并创建索引 |
| 灵活配置数据 | JSON 类型 | 用户偏好、系统配置 |
| 动态表单 | JSON 类型 | 字段不固定的场景 |
| 日志存储 | JSON 类型 | 结构化日志数据 |
| 频繁更新的字段 | 普通列 | 避免 JSON 频繁解析开销 |
| 需要外键约束 | 普通列 | JSON 不支持外键 |
| 大数据量查询 | 普通列 + 索引 | JSON 查询性能较低 |
-- JSON 使用注意事项:
-- 1. JSON 验证失败会报错
INSERT INTO products (name, attributes)
VALUES ('Test', '{invalid json}'); -- ❌ 报错
-- 2. JSON 路径区分大小写
SELECT attributes->'$.Brand' FROM products; -- 返回 NULL
SELECT attributes->'$.brand' FROM products; -- ✅ 正确
-- 3. 使用 ->> 获取纯文本值
SELECT attributes->'$.brand' FROM products; -- 返回 "Apple"(带引号)
SELECT attributes->>'$.brand' FROM products; -- 返回 Apple(纯文本)
-- 4. JSON 比较需要类型转换
SELECT * FROM products
WHERE attributes->>'$.specs.memory' = '16GB'; -- ✅ 字符串比较
-- 5. 批量更新 JSON 使用 JSON_MERGE_PATCH
UPDATE products
SET attributes = JSON_MERGE_PATCH(
attributes,
'{"updated": true, "update_time": "2024-01-15"}'
);