- d0ca268 Initial commit of blktrace
Inital commit of blktrace userspace tools 包含下列几个文件:
├── blkparse.c
├── blktrace.c
├── blktrace.h
├── Makefile
└── README
README
% blktrace <dev>
--- run task to generate load to be traced ---
<SIGINT to kill>
--- Generates:
<dev>_dat.[0..ncpus] : Contains binary pdu data
<dev>_out.[0..ncpus] : Contains binary trace data
% blkparse <dev> <ncpus>
--- Generates:
<dev>_log.[0..ncpus] : Contains formatted trace data
Merged formatted trace data to stdout
Trace Categories
#define BLK_TC_SHIFT (16)
enum {
BLK_TC_READ = 1 << 0, /* reads */
BLK_TC_WRITE = 1 << 1, /* writes */
BLK_TC_BARRIER = 1 << 2, /* barrier */
BLK_TC_SYNC = 1 << 3, /* barrier */
BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
BLK_TC_REQUEUE = 1 << 5, /* requeueing */
BLK_TC_ISSUE = 1 << 6, /* issue */
BLK_TC_COMPLETE = 1 << 7, /* completions */
BLK_TC_FS = 1 << 8, /* fs requests */
BLK_TC_PC = 1 << 9, /* pc requests */
BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
};
Trace Actions
#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
enum {
__BLK_TA_QUEUE = 1, /* queued */
__BLK_TA_BACKMERGE, /* back merged to existing rq */
__BLK_TA_FRONTMERGE, /* front merge to existing rq */
__BLK_TA_GETRQ, /* allocated new request */
__BLK_TA_SLEEPRQ, /* sleeping on rq allocation */
__BLK_TA_REQUEUE, /* request requeued */
__BLK_TA_ISSUE, /* sent to driver */
__BLK_TA_COMPLETE, /* completed by driver */
};
/*
* Trace actions in full. Additionally, read or write is masked
*/
#define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
struct blk_io_trace - 从 relayfs 获取的一条 message 开始部分对应一个 struct blk_io_trace 记录
#define BLK_IO_TRACE_MAGIC (0x65617400)
#define CHECK_MAGIC(t) (((t)->magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
#define SUPPORTED_VERSION (0x02)
struct blk_io_trace {
__u32 magic;
__u32 sequence;
__u64 time;
__u64 sector;
__u32 bytes;
__u32 action;
__u32 pid;
__u16 error;
__u16 pdu_len;
};
IOCTL
struct blk_user_trace_setup {
char name[32];
__u16 act_mask;
__u32 buf_size;
__u32 buf_nr;
};
#define BLKSTARTTRACE _IOWR(0x12,115,struct blk_user_trace_setup)
#define BLKSTOPTRACE _IO(0x12,116)
blktrace - block queue tracing application
Step 1. start_trace
#define BUF_SIZE (128 *1024)
#define BUF_NR (4)
struct blk_user_trace_setup buts;
memset(&buts, sizeof(buts), 0);
buts.buf_size = BUF_SIZE;
buts.buf_nr = BUF_NR;
ioctl(devfd, BLKSTARTTRACE, &buts);
Step 2. extract 从 relayfs 读取 messages
为每个 online CPU 创建一个 thread 执行 extract()
Generates:
<dev>_dat.[0..ncpus] : Contains binary pdu data
<dev>_out.[0..ncpus] : Contains binary trace data
从 relayfs 获取的一条 message 开始部分对应一个 struct blk_io_trace 记录
Step 3. stop_trace
ioctl(devfd, BLKSTOPTRACE)
停止 blktrace
Step 4. show_stats
打印总结信息
CPU<N>: <nr of event processed for CPU_N> exents
Total: <nr of event processed for all CPUs> exents
blkparse - 将 blktrace 获取的 messages 根据 blk_io_trace header 信息进行分类统计
void dump_trace_pc(struct blk_io_trace *t)
{
switch (t->action & 0xffff) {
case __BLK_TA_QUEUE:
log_generic(t, 'Q');
break;
case __BLK_TA_GETRQ:
log_generic(t, 'G');
break;
case __BLK_TA_SLEEPRQ:
log_generic(t, 'S');
break;
case __BLK_TA_REQUEUE:
log_generic(t, 'R');
break;
case __BLK_TA_ISSUE:
log_pc(t, 'D');
break;
case __BLK_TA_COMPLETE:
log_pc(t, 'C');
break;
default:
fprintf(stderr, "Bad pc action %x\n", t->action);
return;
}
events++;
}
void dump_trace_fs(struct blk_io_trace *t)
{
int w = t->action & BLK_TC_ACT(BLK_TC_WRITE);
switch (t->action & 0xffff) {
case __BLK_TA_QUEUE:
account_q(w, t->bytes);
log_queue(t, 'Q');
break;
case __BLK_TA_BACKMERGE:
account_m(w, t->bytes);
log_merge(t, 'M');
break;
case __BLK_TA_FRONTMERGE:
account_m(w, t->bytes);
log_merge(t, 'F');
break;
case __BLK_TA_GETRQ:
log_generic(t, 'G');
break;
case __BLK_TA_SLEEPRQ:
log_generic(t, 'S');
break;
case __BLK_TA_REQUEUE:
log_queue(t, 'R');
break;
case __BLK_TA_ISSUE:
log_issue(t, 'D');
break;
case __BLK_TA_COMPLETE:
account_c(w, t->bytes);
log_complete(t, 'C');
break;
default:
fprintf(stderr, "Bad fs action %x\n", t->action);
return;
}
events++;
}