SCST命令处理流程详解

概述

SCST (SCSI Target Subsystem for Linux) 的核心是一个精心设计的命令处理引擎。本文将深入剖析SCST如何从接收SCSI命令到完成响应的整个处理流程,包括状态机转换、线程模型、执行上下文切换等关键机制。

命令生命周期概览

一个SCSI命令在SCST中的完整生命周期可以分为以下几个阶段:

sequenceDiagram
    participant I as Initiator
    participant TD as Target Driver
    participant SC as SCST Core
    participant DH as Device Handler
    participant BE as Backend Storage

    I->>TD: SCSI Command (CDB)
    TD->>SC: scst_rx_cmd()
    activate SC
    SC->>SC: Allocate scst_cmd
    SC->>SC: LUN translation
    SC->>DH: parse()
    DH-->>SC: Parse result

    alt Write Command
        SC->>TD: rdy_to_xfer()
        TD->>I: R2T (Ready To Transfer)
        I->>TD: Write Data
        TD->>SC: scst_rx_data()
    end

    SC->>DH: exec()
    DH->>BE: I/O operation
    BE-->>DH: I/O complete
    DH->>SC: scst_cmd_done()
    SC->>DH: dev_done()

    SC->>TD: xmit_response()
    TD->>I: SCSI Response
    deactivate SC

    SC->>SC: Free scst_cmd

命令接收:scst_rx_cmd()

接口定义

1
2
3
4
5
6
struct scst_cmd *scst_rx_cmd(struct scst_session *sess,
const uint8_t *lun,
int lun_len,
const uint8_t *cdb,
unsigned int cdb_len,
bool atomic);

参数说明:

  • sess - 命令所属的会话
  • lun - 逻辑单元号(Logical Unit Number)
  • cdb - SCSI命令描述块(Command Descriptor Block)
  • cdb_len - CDB长度
  • atomic - 是否在原子上下文调用

处理流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
struct scst_cmd *scst_rx_cmd(struct scst_session *sess,
const uint8_t *lun, int lun_len,
const uint8_t *cdb, unsigned int cdb_len,
bool atomic)
{
struct scst_cmd *cmd;

/* 1. 分配命令结构 */
cmd = scst_alloc_cmd(sess->tgt->tgtt, atomic ? GFP_ATOMIC : GFP_KERNEL);
if (!cmd)
return NULL;

/* 2. 初始化基本字段 */
cmd->sess = sess;
cmd->tgt = sess->tgt;
cmd->atomic = atomic;

/* 3. 复制CDB */
memcpy(cmd->cdb, cdb, cdb_len);
cmd->cdb_len = cdb_len;

/* 4. 设置初始状态 */
cmd->state = SCST_CMD_STATE_INIT_WAIT;

/* 5. 增加会话命令计数 */
atomic_inc(&sess->num_cmds);

/* 6. 添加到会话命令列表 */
spin_lock_irq(&sess->sess_list_lock);
list_add_tail(&cmd->sess_cmd_list_entry, &sess->sess_cmd_list);
spin_unlock_irq(&sess->sess_list_lock);

return cmd;
}

命令状态机详解

状态机架构

SCST的命令状态机分为两类状态:

  1. Active States(主动状态) - SCST主动推进处理
  2. Passive States(被动状态) - 等待外部事件
stateDiagram-v2
    direction LR

    [*] --> INIT_WAIT: 命令到达

    state "被动状态" as Passive {
        INIT_WAIT --> INIT: scst_cmd_init_done()
        INIT --> DATA_WAIT: WRITE命令
        DATA_WAIT --> REAL_EXEC: scst_rx_data()
        REAL_EXEC --> EXEC_WAIT: 提交I/O
        EXEC_WAIT --> DEV_DONE: I/O完成
    }

    state "主动状态" as Active {
        INIT --> PARSE: LUN解析完成
        PARSE --> PREPARE_SPACE: CDB解析
        PREPARE_SPACE --> RDY_TO_XFER: WRITE数据缓冲
        PREPARE_SPACE --> TGT_PRE_EXEC: READ路径
        RDY_TO_XFER --> DATA_WAIT: rdy_to_xfer()调用
        TGT_PRE_EXEC --> EXEC_CHECK_SN: 预执行完成
        EXEC_CHECK_SN --> REAL_EXEC: SN检查通过
        DEV_DONE --> PRE_XMIT_RESP: dev_done()
        PRE_XMIT_RESP --> XMIT_RESP: 检查通过
        XMIT_RESP --> XMIT_WAIT: xmit_response()
    }

    XMIT_WAIT --> FINISHED: 传输完成
    FINISHED --> [*]

关键状态转换

1. INIT_WAIT → INIT

Target驱动调用scst_cmd_init_done()触发:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void scst_cmd_init_done(struct scst_cmd *cmd,
enum scst_exec_context pref_context)
{
unsigned long flags;

TRACE_ENTRY();

/* 设置优先执行上下文 */
cmd->pref_context = pref_context;

/* 状态转换 */
cmd->state = SCST_CMD_STATE_INIT;

/* 加入命令处理队列 */
spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags);
list_add_tail(&cmd->cmd_list_entry, &cmd->cmd_threads->active_cmd_list);
spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags);

/* 唤醒处理线程 */
wake_up(&cmd->cmd_threads->cmd_list_waitQ);

TRACE_EXIT();
}

2. INIT → PARSE

LUN转换完成后,进入CDB解析状态:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
static int scst_cmd_thread(void *arg)
{
struct scst_cmd_thread_t *thr = arg;

while (!kthread_should_stop()) {
wait_event_interruptible(thr->cmd_list_waitQ,
!list_empty(&thr->active_cmd_list) ||
kthread_should_stop());

while ((cmd = get_next_cmd(thr)) != NULL) {
switch (cmd->state) {
case SCST_CMD_STATE_INIT:
/* LUN转换 */
scst_translate_lun(cmd);
cmd->state = SCST_CMD_STATE_PARSE;
/* fall through */

case SCST_CMD_STATE_PARSE:
/* 调用设备处理器的parse()回调 */
res = cmd->dev->handler->parse(cmd);
if (res == SCST_CMD_STATE_NEED_THREAD_CTX) {
/* 需要切换到线程上下文 */
scst_schedule_cmd_for_thread_exec(cmd);
} else {
cmd->state = SCST_CMD_STATE_PREPARE_SPACE;
}
break;

/* ... 其他状态处理 ... */
}
}
}
}

3. PARSE → PREPARE_SPACE

CDB解析完成后,分配数据缓冲区:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
static void scst_prepare_space(struct scst_cmd *cmd)
{
int res;

/* 检查是否需要数据缓冲区 */
if (cmd->bufflen == 0) {
cmd->state = SCST_CMD_STATE_TGT_PRE_EXEC;
return;
}

/* 分配scatter-gather列表 */
res = scst_alloc_space(cmd);
if (res == -ENOMEM) {
/* 内存不足,稍后重试 */
scst_set_busy(cmd);
cmd->state = SCST_CMD_STATE_FINISHED;
return;
}

/* WRITE命令需要先接收数据 */
if (cmd->data_direction == SCST_DATA_WRITE) {
cmd->state = SCST_CMD_STATE_RDY_TO_XFER;
} else {
cmd->state = SCST_CMD_STATE_TGT_PRE_EXEC;
}
}

4. RDY_TO_XFER → DATA_WAIT (WRITE路径)

通知initiator可以发送数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
static void scst_rdy_to_xfer(struct scst_cmd *cmd)
{
int res;

/* 调用target驱动的rdy_to_xfer()回调 */
res = cmd->tgt->tgtt->rdy_to_xfer(cmd);

switch (res) {
case SCST_TGT_RES_SUCCESS:
/* 进入等待数据状态 */
cmd->state = SCST_CMD_STATE_DATA_WAIT;
break;

case SCST_TGT_RES_QUEUE_FULL:
/* 队列满,稍后重试 */
scst_set_busy(cmd);
cmd->state = SCST_CMD_STATE_FINISHED;
break;

default:
/* 错误处理 */
scst_set_cmd_error(cmd, SCST_LOAD_SENSE(scst_sense_hardw_error));
cmd->state = SCST_CMD_STATE_FINISHED;
break;
}
}

5. EXEC_WAIT → DEV_DONE

I/O完成后的回调路径:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void scst_cmd_done(struct scst_cmd *cmd, enum scst_exec_context pref_context)
{
unsigned long flags;

TRACE_ENTRY();

/* 标记命令已完成 */
cmd->completed = 1;

/* 更新执行上下文偏好 */
cmd->pref_context = pref_context;

/* 状态转换 */
cmd->state = SCST_CMD_STATE_DEV_DONE;

/* 加入活跃命令列表 */
spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags);
list_add_tail(&cmd->cmd_list_entry, &cmd->cmd_threads->active_cmd_list);
spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags);

/* 唤醒处理线程 */
wake_up(&cmd->cmd_threads->cmd_list_waitQ);

TRACE_EXIT();
}

线程模型

Per-CPU线程池

SCST使用per-CPU线程池来处理命令,提升CPU亲和性:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
struct scst_cmd_threads {
/* Per-CPU信息数组 */
struct scst_percpu_info *percpu_infos;
int nr_threads;

/* 活跃命令列表 */
struct list_head active_cmd_list;
spinlock_t cmd_list_lock;

/* 等待队列 */
wait_queue_head_t cmd_list_waitQ;

/* 优先级 */
int cmd_thread_priority;
};

struct scst_percpu_info {
/* 该CPU的线程 */
struct scst_cmd_thread_t *cmd_thread;

/* CPU ID */
int cpu;
};

线程分配策略:

1
2
3
4
5
6
7
8
static struct scst_cmd_thread_t *scst_get_cmd_thread(struct scst_cmd *cmd)
{
struct scst_cmd_threads *cmd_threads = cmd->cmd_threads;
int cpu = smp_processor_id();

/* 返回当前CPU的线程 */
return cmd_threads->percpu_infos[cpu].cmd_thread;
}

线程主循环

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
static int scst_cmd_thread(void *arg)
{
struct scst_cmd_thread_t *thr = arg;
struct scst_cmd *cmd;

TRACE_ENTRY();

current->flags |= PF_NOFREEZE;

/* 设置线程优先级 */
if (thr->cmd_threads->cmd_thread_priority) {
struct sched_param param = {
.sched_priority = thr->cmd_threads->cmd_thread_priority
};
sched_setscheduler(current, SCHED_FIFO, &param);
}

/* 主处理循环 */
while (!kthread_should_stop()) {
/* 等待命令到达 */
wait_event_interruptible(
thr->cmd_threads->cmd_list_waitQ,
!list_empty(&thr->cmd_threads->active_cmd_list) ||
kthread_should_stop());

/* 处理所有待处理命令 */
while ((cmd = scst_get_next_cmd(thr)) != NULL) {
scst_process_cmd_state(cmd);
}
}

TRACE_EXIT();
return 0;
}

执行上下文管理

上下文类型

1
2
3
4
5
enum scst_exec_context {
SCST_CONTEXT_DIRECT, /* 调用者上下文(可能是中断) */
SCST_CONTEXT_TASKLET, /* Tasklet上下文 */
SCST_CONTEXT_THREAD /* 内核线程上下文 */
};

上下文切换机制

当设备处理器返回SCST_CMD_STATE_NEED_THREAD_CTX时:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
static void scst_process_cmd_state(struct scst_cmd *cmd)
{
int res;

switch (cmd->state) {
case SCST_CMD_STATE_PARSE:
res = cmd->dev->handler->parse(cmd);

if (res == SCST_CMD_STATE_NEED_THREAD_CTX) {
/* 当前在原子上下文,需要切换到线程上下文 */
if (cmd->atomic) {
/* 调度到线程执行 */
scst_schedule_cmd_for_thread_exec(cmd);
return; /* 不继续处理,等待线程重新调度 */
}
}

cmd->state = SCST_CMD_STATE_PREPARE_SPACE;
/* 继续处理 */
break;

/* ... 其他状态 ... */
}
}

上下文切换实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static void scst_schedule_cmd_for_thread_exec(struct scst_cmd *cmd)
{
unsigned long flags;

/* 标记为非原子上下文 */
cmd->atomic = 0;

/* 设置线程上下文偏好 */
cmd->pref_context = SCST_CONTEXT_THREAD;

/* 加入线程队列 */
spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags);
list_add_tail(&cmd->cmd_list_entry, &cmd->cmd_threads->active_cmd_list);
spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags);

/* 唤醒线程 */
wake_up(&cmd->cmd_threads->cmd_list_waitQ);
}

命令序列号管理

SCST使用序列号(Serial Number)来维护命令执行顺序:

序列号结构

1
2
3
4
5
6
7
8
9
10
11
12
13
struct scst_order_data {
/* 期望的下一个序列号 */
atomic_t expected_sn;

/* 不按序执行的命令计数 */
atomic_t num_free_running_cmds;

/* 序列号锁 */
spinlock_t sn_lock;

/* 延迟命令列表(等待序列号) */
struct list_head deferred_cmd_list;
};

序列号检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
static void scst_check_sn(struct scst_cmd *cmd)
{
struct scst_order_data *order_data = cmd->tgt_dev->order_data;
unsigned long flags;

/* SIMPLE、HEAD_OF_QUEUE等不需要序列号 */
if (cmd->queue_type == SCST_CMD_QUEUE_SIMPLE ||
cmd->queue_type == SCST_CMD_QUEUE_HEAD_OF_QUEUE) {
atomic_inc(&order_data->num_free_running_cmds);
cmd->state = SCST_CMD_STATE_REAL_EXEC;
return;
}

/* ORDERED命令需要严格按序 */
spin_lock_irqsave(&order_data->sn_lock, flags);

if (cmd->sn == atomic_read(&order_data->expected_sn)) {
/* 序列号匹配,可以执行 */
cmd->state = SCST_CMD_STATE_REAL_EXEC;
atomic_inc(&order_data->expected_sn);
} else {
/* 序列号不匹配,加入延迟列表 */
list_add_tail(&cmd->sn_cmd_list_entry, &order_data->deferred_cmd_list);
cmd->state = SCST_CMD_STATE_SN_WAIT;
}

spin_unlock_irqrestore(&order_data->sn_lock, flags);
}

READ命令完整流程示例

以下是一个READ(10)命令的完整处理流程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/* 1. Target驱动接收命令 */
static int iscsi_rx_cmd(struct iscsi_cmnd *cmnd)
{
struct scst_cmd *scst_cmd;

/* 调用SCST接收命令 */
scst_cmd = scst_rx_cmd(cmnd->conn->session->scst_sess,
cmnd->lun, cmnd->lun_len,
cmnd->scsi_cmd, cmnd->scsi_cmd_len,
true); /* 在原子上下文 */

cmnd->scst_cmd = scst_cmd;

/* 通知SCST命令初始化完成 */
scst_cmd_init_done(scst_cmd, SCST_CONTEXT_THREAD);

return 0;
}

/* 2. 设备处理器解析READ命令 */
static int vdisk_parse_read(struct scst_cmd *cmd)
{
/* 解析CDB获取LBA和传输长度 */
uint64_t lba = get_unaligned_be32(&cmd->cdb[2]);
uint32_t transfer_len = get_unaligned_be16(&cmd->cdb[7]);

/* 设置数据方向和长度 */
cmd->data_direction = SCST_DATA_READ;
cmd->bufflen = transfer_len * cmd->dev->block_size;

/* 设置数据传输操作 */
cmd->op_flags = SCST_TRANSFER_LEN_TYPE_FIXED;

return SCST_CMD_STATE_DEFAULT;
}

/* 3. SCST分配数据缓冲区 */
static int scst_alloc_space(struct scst_cmd *cmd)
{
/* 从SGV池分配scatter-gather缓冲区 */
cmd->sg = sgv_pool_alloc(cmd->tgt_dev->pool,
cmd->bufflen,
GFP_KERNEL,
&cmd->sg_cnt);

return cmd->sg ? 0 : -ENOMEM;
}

/* 4. 设备处理器执行READ */
static int vdisk_exec_read(struct scst_cmd *cmd)
{
struct scst_vdisk_dev *virt_dev = cmd->dev->dh_priv;
loff_t offset = cmd->lba * cmd->dev->block_size;

/* 直接I/O读取 */
if (virt_dev->fd) {
/* 从文件读取 */
kernel_read(virt_dev->fd, cmd->sg, cmd->bufflen, &offset);
} else {
/* 从块设备读取 */
blockio_exec_read(cmd, virt_dev->bdev, offset);
}

/* I/O完成后调用 */
cmd->completed = 1;
scst_cmd_done(cmd, SCST_CONTEXT_THREAD);

return SCST_EXEC_COMPLETED;
}

/* 5. Target驱动发送响应 */
static int iscsi_xmit_response(struct scst_cmd *scst_cmd)
{
struct iscsi_cmnd *cmnd = scst_cmd->tgt_priv;

/* 构造SCSI Response PDU */
iscsi_cmnd_create_response(cmnd);

/* 设置数据发送回调 */
cmnd->data_out_cb = iscsi_data_out_end;

/* 发送数据和响应 */
iscsi_send_data_rsp(cmnd, scst_cmd->sg, scst_cmd->bufflen);

return SCST_TGT_RES_SUCCESS;
}

/* 6. 传输完成后 */
static void iscsi_data_out_end(struct iscsi_cmnd *cmnd)
{
/* 通知SCST传输完成 */
scst_tgt_cmd_done(cmnd->scst_cmd, SCST_CONTEXT_TASKLET);
}

/* 7. SCST释放资源 */
static void scst_finish_cmd(struct scst_cmd *cmd)
{
/* 释放scatter-gather缓冲区 */
if (cmd->sg)
sgv_pool_free(cmd->sg, cmd->sg_cnt, cmd->tgt_dev->pool);

/* 从会话列表移除 */
spin_lock(&cmd->sess->sess_list_lock);
list_del(&cmd->sess_cmd_list_entry);
spin_unlock(&cmd->sess->sess_list_lock);

/* 递减会话命令计数 */
atomic_dec(&cmd->sess->num_cmds);

/* 释放命令结构 */
scst_free_cmd(cmd);
}

性能优化技术

1. 命令批处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static void scst_process_cmd_batch(struct scst_cmd_thread_t *thr)
{
struct scst_cmd *cmd;
LIST_HEAD(local_list);

/* 一次性获取多个命令 */
spin_lock(&thr->cmd_threads->cmd_list_lock);
list_splice_init(&thr->cmd_threads->active_cmd_list, &local_list);
spin_unlock(&thr->cmd_threads->cmd_list_lock);

/* 批量处理 */
list_for_each_entry(cmd, &local_list, cmd_list_entry) {
scst_process_cmd_state(cmd);
}
}

2. 零拷贝优化

直接使用bio的页面,避免额外拷贝:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static void scst_bio_to_sg(struct scst_cmd *cmd, struct bio *bio)
{
struct bio_vec bvec;
struct bvec_iter iter;
int i = 0;

bio_for_each_segment(bvec, bio, iter) {
/* 直接引用bio的页面 */
sg_set_page(&cmd->sg[i++],
bvec.bv_page,
bvec.bv_len,
bvec.bv_offset);
}

cmd->sg_cnt = i;
}

3. CPU亲和性

命令在同一CPU上处理,提升缓存命中率:

1
2
3
4
5
6
7
8
9
10
static void scst_set_cmd_cpu_affinity(struct scst_cmd *cmd)
{
int cpu = smp_processor_id();

/* 记录分配CPU */
cmd->cpu = cpu;

/* 分配到该CPU的线程 */
cmd->cmd_thr = cmd->cmd_threads->percpu_infos[cpu].cmd_thread;
}

错误处理

Sense数据设置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
void scst_set_cmd_error(struct scst_cmd *cmd, int key, int asc, int ascq)
{
/* 构造sense数据 */
cmd->sense[0] = 0x70; /* Response Code */
cmd->sense[2] = key; /* Sense Key */
cmd->sense[7] = 0x0a; /* Additional Sense Length */
cmd->sense[12] = asc; /* ASC */
cmd->sense[13] = ascq; /* ASCQ */

cmd->sense_valid_len = 18;

/* 设置CHECK CONDITION状态 */
cmd->status = SAM_STAT_CHECK_CONDITION;
}

命令超时处理

1
2
3
4
5
6
7
8
9
10
11
static void scst_cmd_timeout_handler(struct timer_list *t)
{
struct scst_cmd *cmd = from_timer(cmd, t, timeout_timer);

PRINT_ERROR("Command timeout: cmd %p, state %d, age %ld ms",
cmd, cmd->state,
(jiffies - cmd->start_time) * 1000 / HZ);

/* 中止命令 */
scst_abort_cmd(cmd, NULL, false, false);
}

总结

SCST的命令处理流程展示了一个高性能存储系统的典型设计:

  1. 精确的状态机 - 清晰定义每个处理阶段
  2. 灵活的上下文管理 - 平衡性能与功能需求
  3. 高效的线程模型 - Per-CPU线程池提升并发性能
  4. 序列号管理 - 保证命令执行顺序正确性
  5. 零拷贝传输 - 最小化数据移动开销

理解这些机制对于调试SCST问题、优化性能以及开发新的target驱动或设备处理器都至关重要。

在下一篇文章中,我们将深入分析SCST的框架设计,包括target驱动接口、设备处理器接口以及扩展机制。

参考资料

  • SCST源代码:scst/src/scst_lib.c
  • 命令处理核心:scst/src/scst_main.c
  • iSCSI Target驱动:iscsi-scst/kernel/iscsi.c