概述
SCST (SCSI Target Subsystem for Linux) 的核心是一个精心设计的命令处理引擎。本文将深入剖析SCST如何从接收SCSI命令到完成响应的整个处理流程,包括状态机转换、线程模型、执行上下文切换等关键机制。
命令生命周期概览
一个SCSI命令在SCST中的完整生命周期可以分为以下几个阶段:
sequenceDiagram
participant I as Initiator
participant TD as Target Driver
participant SC as SCST Core
participant DH as Device Handler
participant BE as Backend Storage
I->>TD: SCSI Command (CDB)
TD->>SC: scst_rx_cmd()
activate SC
SC->>SC: Allocate scst_cmd
SC->>SC: LUN translation
SC->>DH: parse()
DH-->>SC: Parse result
alt Write Command
SC->>TD: rdy_to_xfer()
TD->>I: R2T (Ready To Transfer)
I->>TD: Write Data
TD->>SC: scst_rx_data()
end
SC->>DH: exec()
DH->>BE: I/O operation
BE-->>DH: I/O complete
DH->>SC: scst_cmd_done()
SC->>DH: dev_done()
SC->>TD: xmit_response()
TD->>I: SCSI Response
deactivate SC
SC->>SC: Free scst_cmd
命令接收:scst_rx_cmd()
接口定义
1 2 3 4 5 6
| struct scst_cmd *scst_rx_cmd(struct scst_session *sess, const uint8_t *lun, int lun_len, const uint8_t *cdb, unsigned int cdb_len, bool atomic);
|
参数说明:
sess - 命令所属的会话
lun - 逻辑单元号(Logical Unit Number)
cdb - SCSI命令描述块(Command Descriptor Block)
cdb_len - CDB长度
atomic - 是否在原子上下文调用
处理流程
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| struct scst_cmd *scst_rx_cmd(struct scst_session *sess, const uint8_t *lun, int lun_len, const uint8_t *cdb, unsigned int cdb_len, bool atomic) { struct scst_cmd *cmd;
cmd = scst_alloc_cmd(sess->tgt->tgtt, atomic ? GFP_ATOMIC : GFP_KERNEL); if (!cmd) return NULL;
cmd->sess = sess; cmd->tgt = sess->tgt; cmd->atomic = atomic;
memcpy(cmd->cdb, cdb, cdb_len); cmd->cdb_len = cdb_len;
cmd->state = SCST_CMD_STATE_INIT_WAIT;
atomic_inc(&sess->num_cmds);
spin_lock_irq(&sess->sess_list_lock); list_add_tail(&cmd->sess_cmd_list_entry, &sess->sess_cmd_list); spin_unlock_irq(&sess->sess_list_lock);
return cmd; }
|
命令状态机详解
状态机架构
SCST的命令状态机分为两类状态:
- Active States(主动状态) - SCST主动推进处理
- Passive States(被动状态) - 等待外部事件
stateDiagram-v2
direction LR
[*] --> INIT_WAIT: 命令到达
state "被动状态" as Passive {
INIT_WAIT --> INIT: scst_cmd_init_done()
INIT --> DATA_WAIT: WRITE命令
DATA_WAIT --> REAL_EXEC: scst_rx_data()
REAL_EXEC --> EXEC_WAIT: 提交I/O
EXEC_WAIT --> DEV_DONE: I/O完成
}
state "主动状态" as Active {
INIT --> PARSE: LUN解析完成
PARSE --> PREPARE_SPACE: CDB解析
PREPARE_SPACE --> RDY_TO_XFER: WRITE数据缓冲
PREPARE_SPACE --> TGT_PRE_EXEC: READ路径
RDY_TO_XFER --> DATA_WAIT: rdy_to_xfer()调用
TGT_PRE_EXEC --> EXEC_CHECK_SN: 预执行完成
EXEC_CHECK_SN --> REAL_EXEC: SN检查通过
DEV_DONE --> PRE_XMIT_RESP: dev_done()
PRE_XMIT_RESP --> XMIT_RESP: 检查通过
XMIT_RESP --> XMIT_WAIT: xmit_response()
}
XMIT_WAIT --> FINISHED: 传输完成
FINISHED --> [*]
关键状态转换
1. INIT_WAIT → INIT
Target驱动调用scst_cmd_init_done()触发:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| void scst_cmd_init_done(struct scst_cmd *cmd, enum scst_exec_context pref_context) { unsigned long flags;
TRACE_ENTRY();
cmd->pref_context = pref_context;
cmd->state = SCST_CMD_STATE_INIT;
spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags); list_add_tail(&cmd->cmd_list_entry, &cmd->cmd_threads->active_cmd_list); spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags);
wake_up(&cmd->cmd_threads->cmd_list_waitQ);
TRACE_EXIT(); }
|
2. INIT → PARSE
LUN转换完成后,进入CDB解析状态:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| static int scst_cmd_thread(void *arg) { struct scst_cmd_thread_t *thr = arg;
while (!kthread_should_stop()) { wait_event_interruptible(thr->cmd_list_waitQ, !list_empty(&thr->active_cmd_list) || kthread_should_stop());
while ((cmd = get_next_cmd(thr)) != NULL) { switch (cmd->state) { case SCST_CMD_STATE_INIT: scst_translate_lun(cmd); cmd->state = SCST_CMD_STATE_PARSE;
case SCST_CMD_STATE_PARSE: res = cmd->dev->handler->parse(cmd); if (res == SCST_CMD_STATE_NEED_THREAD_CTX) { scst_schedule_cmd_for_thread_exec(cmd); } else { cmd->state = SCST_CMD_STATE_PREPARE_SPACE; } break;
} } } }
|
3. PARSE → PREPARE_SPACE
CDB解析完成后,分配数据缓冲区:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| static void scst_prepare_space(struct scst_cmd *cmd) { int res;
if (cmd->bufflen == 0) { cmd->state = SCST_CMD_STATE_TGT_PRE_EXEC; return; }
res = scst_alloc_space(cmd); if (res == -ENOMEM) { scst_set_busy(cmd); cmd->state = SCST_CMD_STATE_FINISHED; return; }
if (cmd->data_direction == SCST_DATA_WRITE) { cmd->state = SCST_CMD_STATE_RDY_TO_XFER; } else { cmd->state = SCST_CMD_STATE_TGT_PRE_EXEC; } }
|
4. RDY_TO_XFER → DATA_WAIT (WRITE路径)
通知initiator可以发送数据:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| static void scst_rdy_to_xfer(struct scst_cmd *cmd) { int res;
res = cmd->tgt->tgtt->rdy_to_xfer(cmd);
switch (res) { case SCST_TGT_RES_SUCCESS: cmd->state = SCST_CMD_STATE_DATA_WAIT; break;
case SCST_TGT_RES_QUEUE_FULL: scst_set_busy(cmd); cmd->state = SCST_CMD_STATE_FINISHED; break;
default: scst_set_cmd_error(cmd, SCST_LOAD_SENSE(scst_sense_hardw_error)); cmd->state = SCST_CMD_STATE_FINISHED; break; } }
|
5. EXEC_WAIT → DEV_DONE
I/O完成后的回调路径:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| void scst_cmd_done(struct scst_cmd *cmd, enum scst_exec_context pref_context) { unsigned long flags;
TRACE_ENTRY();
cmd->completed = 1;
cmd->pref_context = pref_context;
cmd->state = SCST_CMD_STATE_DEV_DONE;
spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags); list_add_tail(&cmd->cmd_list_entry, &cmd->cmd_threads->active_cmd_list); spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags);
wake_up(&cmd->cmd_threads->cmd_list_waitQ);
TRACE_EXIT(); }
|
线程模型
Per-CPU线程池
SCST使用per-CPU线程池来处理命令,提升CPU亲和性:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| struct scst_cmd_threads { struct scst_percpu_info *percpu_infos; int nr_threads;
struct list_head active_cmd_list; spinlock_t cmd_list_lock;
wait_queue_head_t cmd_list_waitQ;
int cmd_thread_priority; };
struct scst_percpu_info { struct scst_cmd_thread_t *cmd_thread;
int cpu; };
|
线程分配策略:
1 2 3 4 5 6 7 8
| static struct scst_cmd_thread_t *scst_get_cmd_thread(struct scst_cmd *cmd) { struct scst_cmd_threads *cmd_threads = cmd->cmd_threads; int cpu = smp_processor_id();
return cmd_threads->percpu_infos[cpu].cmd_thread; }
|
线程主循环
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| static int scst_cmd_thread(void *arg) { struct scst_cmd_thread_t *thr = arg; struct scst_cmd *cmd;
TRACE_ENTRY();
current->flags |= PF_NOFREEZE;
if (thr->cmd_threads->cmd_thread_priority) { struct sched_param param = { .sched_priority = thr->cmd_threads->cmd_thread_priority }; sched_setscheduler(current, SCHED_FIFO, ¶m); }
while (!kthread_should_stop()) { wait_event_interruptible( thr->cmd_threads->cmd_list_waitQ, !list_empty(&thr->cmd_threads->active_cmd_list) || kthread_should_stop());
while ((cmd = scst_get_next_cmd(thr)) != NULL) { scst_process_cmd_state(cmd); } }
TRACE_EXIT(); return 0; }
|
执行上下文管理
上下文类型
1 2 3 4 5
| enum scst_exec_context { SCST_CONTEXT_DIRECT, SCST_CONTEXT_TASKLET, SCST_CONTEXT_THREAD };
|
上下文切换机制
当设备处理器返回SCST_CMD_STATE_NEED_THREAD_CTX时:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| static void scst_process_cmd_state(struct scst_cmd *cmd) { int res;
switch (cmd->state) { case SCST_CMD_STATE_PARSE: res = cmd->dev->handler->parse(cmd);
if (res == SCST_CMD_STATE_NEED_THREAD_CTX) { if (cmd->atomic) { scst_schedule_cmd_for_thread_exec(cmd); return; } }
cmd->state = SCST_CMD_STATE_PREPARE_SPACE; break;
} }
|
上下文切换实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| static void scst_schedule_cmd_for_thread_exec(struct scst_cmd *cmd) { unsigned long flags;
cmd->atomic = 0;
cmd->pref_context = SCST_CONTEXT_THREAD;
spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags); list_add_tail(&cmd->cmd_list_entry, &cmd->cmd_threads->active_cmd_list); spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags);
wake_up(&cmd->cmd_threads->cmd_list_waitQ); }
|
命令序列号管理
SCST使用序列号(Serial Number)来维护命令执行顺序:
序列号结构
1 2 3 4 5 6 7 8 9 10 11 12 13
| struct scst_order_data { atomic_t expected_sn;
atomic_t num_free_running_cmds;
spinlock_t sn_lock;
struct list_head deferred_cmd_list; };
|
序列号检查
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| static void scst_check_sn(struct scst_cmd *cmd) { struct scst_order_data *order_data = cmd->tgt_dev->order_data; unsigned long flags;
if (cmd->queue_type == SCST_CMD_QUEUE_SIMPLE || cmd->queue_type == SCST_CMD_QUEUE_HEAD_OF_QUEUE) { atomic_inc(&order_data->num_free_running_cmds); cmd->state = SCST_CMD_STATE_REAL_EXEC; return; }
spin_lock_irqsave(&order_data->sn_lock, flags);
if (cmd->sn == atomic_read(&order_data->expected_sn)) { cmd->state = SCST_CMD_STATE_REAL_EXEC; atomic_inc(&order_data->expected_sn); } else { list_add_tail(&cmd->sn_cmd_list_entry, &order_data->deferred_cmd_list); cmd->state = SCST_CMD_STATE_SN_WAIT; }
spin_unlock_irqrestore(&order_data->sn_lock, flags); }
|
READ命令完整流程示例
以下是一个READ(10)命令的完整处理流程:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
| static int iscsi_rx_cmd(struct iscsi_cmnd *cmnd) { struct scst_cmd *scst_cmd;
scst_cmd = scst_rx_cmd(cmnd->conn->session->scst_sess, cmnd->lun, cmnd->lun_len, cmnd->scsi_cmd, cmnd->scsi_cmd_len, true);
cmnd->scst_cmd = scst_cmd;
scst_cmd_init_done(scst_cmd, SCST_CONTEXT_THREAD);
return 0; }
static int vdisk_parse_read(struct scst_cmd *cmd) { uint64_t lba = get_unaligned_be32(&cmd->cdb[2]); uint32_t transfer_len = get_unaligned_be16(&cmd->cdb[7]);
cmd->data_direction = SCST_DATA_READ; cmd->bufflen = transfer_len * cmd->dev->block_size;
cmd->op_flags = SCST_TRANSFER_LEN_TYPE_FIXED;
return SCST_CMD_STATE_DEFAULT; }
static int scst_alloc_space(struct scst_cmd *cmd) { cmd->sg = sgv_pool_alloc(cmd->tgt_dev->pool, cmd->bufflen, GFP_KERNEL, &cmd->sg_cnt);
return cmd->sg ? 0 : -ENOMEM; }
static int vdisk_exec_read(struct scst_cmd *cmd) { struct scst_vdisk_dev *virt_dev = cmd->dev->dh_priv; loff_t offset = cmd->lba * cmd->dev->block_size;
if (virt_dev->fd) { kernel_read(virt_dev->fd, cmd->sg, cmd->bufflen, &offset); } else { blockio_exec_read(cmd, virt_dev->bdev, offset); }
cmd->completed = 1; scst_cmd_done(cmd, SCST_CONTEXT_THREAD);
return SCST_EXEC_COMPLETED; }
static int iscsi_xmit_response(struct scst_cmd *scst_cmd) { struct iscsi_cmnd *cmnd = scst_cmd->tgt_priv;
iscsi_cmnd_create_response(cmnd);
cmnd->data_out_cb = iscsi_data_out_end;
iscsi_send_data_rsp(cmnd, scst_cmd->sg, scst_cmd->bufflen);
return SCST_TGT_RES_SUCCESS; }
static void iscsi_data_out_end(struct iscsi_cmnd *cmnd) { scst_tgt_cmd_done(cmnd->scst_cmd, SCST_CONTEXT_TASKLET); }
static void scst_finish_cmd(struct scst_cmd *cmd) { if (cmd->sg) sgv_pool_free(cmd->sg, cmd->sg_cnt, cmd->tgt_dev->pool);
spin_lock(&cmd->sess->sess_list_lock); list_del(&cmd->sess_cmd_list_entry); spin_unlock(&cmd->sess->sess_list_lock);
atomic_dec(&cmd->sess->num_cmds);
scst_free_cmd(cmd); }
|
性能优化技术
1. 命令批处理
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| static void scst_process_cmd_batch(struct scst_cmd_thread_t *thr) { struct scst_cmd *cmd; LIST_HEAD(local_list);
spin_lock(&thr->cmd_threads->cmd_list_lock); list_splice_init(&thr->cmd_threads->active_cmd_list, &local_list); spin_unlock(&thr->cmd_threads->cmd_list_lock);
list_for_each_entry(cmd, &local_list, cmd_list_entry) { scst_process_cmd_state(cmd); } }
|
2. 零拷贝优化
直接使用bio的页面,避免额外拷贝:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| static void scst_bio_to_sg(struct scst_cmd *cmd, struct bio *bio) { struct bio_vec bvec; struct bvec_iter iter; int i = 0;
bio_for_each_segment(bvec, bio, iter) { sg_set_page(&cmd->sg[i++], bvec.bv_page, bvec.bv_len, bvec.bv_offset); }
cmd->sg_cnt = i; }
|
3. CPU亲和性
命令在同一CPU上处理,提升缓存命中率:
1 2 3 4 5 6 7 8 9 10
| static void scst_set_cmd_cpu_affinity(struct scst_cmd *cmd) { int cpu = smp_processor_id();
cmd->cpu = cpu;
cmd->cmd_thr = cmd->cmd_threads->percpu_infos[cpu].cmd_thread; }
|
错误处理
Sense数据设置
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| void scst_set_cmd_error(struct scst_cmd *cmd, int key, int asc, int ascq) { cmd->sense[0] = 0x70; cmd->sense[2] = key; cmd->sense[7] = 0x0a; cmd->sense[12] = asc; cmd->sense[13] = ascq;
cmd->sense_valid_len = 18;
cmd->status = SAM_STAT_CHECK_CONDITION; }
|
命令超时处理
1 2 3 4 5 6 7 8 9 10 11
| static void scst_cmd_timeout_handler(struct timer_list *t) { struct scst_cmd *cmd = from_timer(cmd, t, timeout_timer);
PRINT_ERROR("Command timeout: cmd %p, state %d, age %ld ms", cmd, cmd->state, (jiffies - cmd->start_time) * 1000 / HZ);
scst_abort_cmd(cmd, NULL, false, false); }
|
总结
SCST的命令处理流程展示了一个高性能存储系统的典型设计:
- 精确的状态机 - 清晰定义每个处理阶段
- 灵活的上下文管理 - 平衡性能与功能需求
- 高效的线程模型 - Per-CPU线程池提升并发性能
- 序列号管理 - 保证命令执行顺序正确性
- 零拷贝传输 - 最小化数据移动开销
理解这些机制对于调试SCST问题、优化性能以及开发新的target驱动或设备处理器都至关重要。
在下一篇文章中,我们将深入分析SCST的框架设计,包括target驱动接口、设备处理器接口以及扩展机制。
参考资料
- SCST源代码:
scst/src/scst_lib.c
- 命令处理核心:
scst/src/scst_main.c
- iSCSI Target驱动:
iscsi-scst/kernel/iscsi.c