1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/* per device */
struct ioc {
struct rq_qos rqos;

bool enabled;

struct ioc_params params;
struct ioc_margins margins;
u32 period_us;
u32 timer_slack_ns;
u64 vrate_min;
u64 vrate_max;

spinlock_t lock;
struct timer_list timer;
struct list_head active_iocgs; /* active cgroups */
struct ioc_pcpu_stat __percpu *pcpu_stat;

enum ioc_running running;
atomic64_t vtime_rate;
u64 vtime_base_rate;
s64 vtime_err;

seqcount_spinlock_t period_seqcount;
u64 period_at; /* wallclock starttime */
u64 period_at_vtime; /* vtime starttime */

atomic64_t cur_period; /* inc'd each period */
int busy_level; /* saturation history */

bool weights_updated;
atomic_t hweight_gen; /* for lazy hweights */

/* debt forgivness */
u64 dfgv_period_at;
u64 dfgv_period_rem;
u64 dfgv_usage_us_sum;

u64 autop_too_fast_at;
u64 autop_too_slow_at;
int autop_idx;
bool user_qos_params:1;
bool user_cost_model:1;
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/* per device-cgroup pair */
struct ioc_gq {
struct blkg_policy_data pd;
struct ioc *ioc;

/*
* A iocg can get its weight from two sources - an explicit
* per-device-cgroup configuration or the default weight of the
* cgroup. `cfg_weight` is the explicit per-device-cgroup
* configuration. `weight` is the effective considering both
* sources.
*
* When an idle cgroup becomes active its `active` goes from 0 to
* `weight`. `inuse` is the surplus adjusted active weight.
* `active` and `inuse` are used to calculate `hweight_active` and
* `hweight_inuse`.
*
* `last_inuse` remembers `inuse` while an iocg is idle to persist
* surplus adjustments.
*
* `inuse` may be adjusted dynamically during period. `saved_*` are used
* to determine and track adjustments.
*/

u32 cfg_weight;
u32 weight;
u32 active;
u32 inuse;

u32 last_inuse;
s64 saved_margin;

sector_t cursor; /* to detect randio */

/*
* `vtime` is this iocg's vtime cursor which progresses as IOs are
* issued. If lagging behind device vtime, the delta represents
* the currently available IO budget. If running ahead, the
* overage.
*
* `vtime_done` is the same but progressed on completion rather
* than issue. The delta behind `vtime` represents the cost of
* currently in-flight IOs.
*/
atomic64_t vtime;
atomic64_t done_vtime;
u64 abs_vdebt;

/* current delay in effect and when it started */
u64 delay;
u64 delay_at;

/*
* The period this iocg was last active in. Used for deactivation
* and invalidating `vtime`.
*/
atomic64_t active_period;
struct list_head active_list;

/* see __propagate_weights() and current_hweight() for details */
u64 child_active_sum;
u64 child_inuse_sum;
u64 child_adjusted_sum;
int hweight_gen;
u32 hweight_active;
u32 hweight_inuse;
u32 hweight_donating;
u32 hweight_after_donation;

struct list_head walk_list;
struct list_head surplus_list;

struct wait_queue_head waitq;
struct hrtimer waitq_timer;

/* timestamp at the latest activation */
u64 activated_at;

/* statistics */
struct iocg_pcpu_stat __percpu *pcpu_stat;
struct iocg_stat stat;
struct iocg_stat last_stat;
u64 last_stat_abs_vusage;
u64 usage_delta_us;
u64 wait_since;
u64 indebt_since;
u64 indelay_since;

/* this iocg's depth in the hierarchy and ancestors including self */
int level;
struct ioc_gq *ancestors[];
};

Y_4K/y_b = a + b* X_4K / X_b

Y_4k = a* X_4k Y_b / X_b. a= Y_4k * X_b / (X_4ky_b)

Ssd iops: a_8k= 70k8k / (4k51k) = 2.745

a_16k = 70k * 16k / ( 29k * 4k) =

y = a+ bx + cx2

IOPS = y/x = 70M/x + 60 - 2 * x

BPS = 70M + 60x - 2x^2

270M = a + b * 4k + c * 16M

400M = a + b * 8k + c * 64M

460M = a + b * 16k + c * 256M

130M = 4k * b+ c * 48M

60M = 8k * b + c * 192M

200M = - c * 96M

c= -2.08

b = 57.42k

a = 460M - 918.72M + 532.48M = 74M

(y_4k, 4k).