xv6 驱动和中断——输入输出

在 xv6 启动时,shell 会向屏幕打印 $ 表示等待用户输入;当用户从键盘输入 ls 时,屏幕会显示这两个字符。在这些设备工作的过程中,xv6 内核究竟扮演了什么角色,又是如何实现的呢?

设备驱动

设备驱动是内核中的代码,用于管理特定硬件设备。驱动通常分为 top 和 bottom 两部分:top 通常是内核希望与设备沟通时调用的,而 bottom 则是设备向内核发起中断时的处理程序。

当内核希望与设备进行交互时,readwrite 等系统调用最终会执行到驱动程序的 top 部分,来让设备执行操作;当操作完成后,设备产生中断,驱动的 bottom 部分开始执行,唤醒等待该设备的进程,并让设备进行新的工作。

初始化设置

内核的 main 首先调用 consoleinit 对控制台进行初始化,该函数调用 uartinit 设置 UART 的传输波特率,重置其缓冲区,最后开启接收中断发送完成中断。接下来,consoleinit 分别将 CONSOLE 设备的 readwrite 函数设置为 consolereadconsolewrite

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// kernel/console.c
void
consoleinit(void)
{
initlock(&cons.lock, "cons");

uartinit();

// connect read and write system calls
// to consoleread and consolewrite.
devsw[CONSOLE].read = consoleread;
devsw[CONSOLE].write = consolewrite;
}
// kernel/uart.c
void
uartinit(void)
{
// disable interrupts.
WriteReg(IER, 0x00);

// special mode to set baud rate.
WriteReg(LCR, LCR_BAUD_LATCH);

// LSB for baud rate of 38.4K.
WriteReg(0, 0x03);

// MSB for baud rate of 38.4K.
WriteReg(1, 0x00);

// leave set-baud mode,
// and set word length to 8 bits, no parity.
WriteReg(LCR, LCR_EIGHT_BITS);

// reset and enable FIFOs.
WriteReg(FCR, FCR_FIFO_ENABLE | FCR_FIFO_CLEAR);

// enable transmit and receive interrupts.
WriteReg(IER, IER_TX_ENABLE | IER_RX_ENABLE);

initlock(&uart_tx_lock, "uart");
}

之后,main 会调用 plicinitplicinithart,告诉 PLIC 哪些设备中断被允许,并设置当前 CPU 对哪些中断感兴趣。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// kernel/plic.c
void
plicinit(void)
{
// set desired IRQ priorities non-zero (otherwise disabled).
*(uint32*)(PLIC + UART0_IRQ*4) = 1;
*(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1;
}
void
plicinithart(void)
{
int hart = cpuid();

// set enable bits for this hart's S-mode
// for the uart and virtio disk.
*(uint32*)PLIC_SENABLE(hart) = (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ);

// set this hart's S-mode priority threshold to 0.
*(uint32*)PLIC_SPRIORITY(hart) = 0;
}

后续用于运行进程的 scheduler 中,intr_on 会打开 CPU 的中断。

1
2
3
4
5
6
// kernel/riscv.h
static inline void
intr_on()
{
w_sstatus(r_sstatus() | SSTATUS_SIE);
}

shell 的启动流程

当初始化的工作完成后,内核启动第一个进程 init。可以看到 init 先是通过 mknode 创建设备 CONSOLE,接下来将文件描述符 0,1,2 都指向 CONSOLE;然后通过 forkexec 运行 shell。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// user/init.c
int
main(void)
{
int pid, wpid;

if(open("console", O_RDWR) < 0){
mknod("console", CONSOLE, 0);
open("console", O_RDWR);
}
dup(0); // stdout
dup(0); // stderr

for(;;){
printf("init: starting sh\n");
pid = fork();
if(pid < 0){
printf("init: fork failed\n");
exit(1);
}
if(pid == 0){
exec("sh", argv);
printf("init: exec sh failed\n");
exit(1);
}

for(;;){
// this call to wait() returns if the shell exits,
// or if a parentless process exits.
wpid = wait((int *) 0);
if(wpid == pid){
// the shell exited; restart it.
break;
} else if(wpid < 0){
printf("init: wait returned an error\n");
exit(1);
} else {
// it was a parentless process; do nothing.
}
}
}
}

shell 先是检查文件描述符是否正确,然后就陷入 getcmd 的循环之中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
// user/sh.c
int
main(void)
{
static char buf[100];
int fd;

// Ensure that three file descriptors are open.
while((fd = open("console", O_RDWR)) >= 0){
if(fd >= 3){
close(fd);
break;
}
}

// Read and run input commands.
while(getcmd(buf, sizeof(buf)) >= 0){
if(buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' '){
// Chdir must be called by the parent, not the child.
buf[strlen(buf)-1] = 0; // chop \n
if(chdir(buf+3) < 0)
fprintf(2, "cannot cd %s\n", buf+3);
continue;
}
if(fork1() == 0)
runcmd(parsecmd(buf));
wait(0);
}
exit(0);
}

查看 getcmd,发现其先是调用 write 向 CONSOLE 中打印 $,然后通过 gets 获取命令。

1
2
3
4
5
6
7
8
9
10
11
// user/sh.c
int
getcmd(char *buf, int nbuf)
{
write(2, "$ ", 2);
memset(buf, 0, nbuf);
gets(buf, nbuf);
if(buf[0] == 0) // EOF
return -1;
return 0;
}

shell 的输入输出

write

先来关注 write。我们知道 write 是系统调用,会执行到 sys_write,而 sys_write 则调用了 filewrite

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// kernel/sysfile.c
uint64
sys_write(void)
{
struct file *f;
int n;
uint64 p;

argaddr(1, &p);
argint(2, &n);
if(argfd(0, 0, &f) < 0)
return -1;

return filewrite(f, p, n);
}

filewrite 中,先是判断要输出到的文件类型,如果是设备文件则直接调用该设备的 write

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// kernel/file.c
// Write to file f.
// addr is a user virtual address.
int
filewrite(struct file *f, uint64 addr, int n)
{
int r, ret = 0;

if(f->writable == 0)
return -1;

if(f->type == FD_PIPE){
ret = pipewrite(f->pipe, addr, n);
} else if(f->type == FD_DEVICE){
if(f->major < 0 || f->major >= NDEV || !devsw[f->major].write)
return -1;
ret = devsw[f->major].write(1, addr, n);
}
//...

因此,这里执行的就是我们在开头设置的 consolewrite,也即 CONSOLE 驱动的 top 部分。consolewrite 对每个字符,先是把数据从用户空间复制,然后调用 uartputc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
//kernel/console.c
// user write()s to the console go here.
int
consolewrite(int user_src, uint64 src, int n)
{
int i;

for(i = 0; i < n; i++){
char c;
if(either_copyin(&c, user_src, src+i, 1) == -1)
break;
uartputc(c);
}

return i;
}

read

再来看 gets,其通过系统调用 read 来读取一行字符:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// user/ulib.c
char*
gets(char *buf, int max)
{
int i, cc;
char c;

for(i=0; i+1 < max; ){
cc = read(0, &c, 1);
if(cc < 1)
break;
buf[i++] = c;
if(c == '\n' || c == '\r')
break;
}
buf[i] = '\0';
return buf;
}

readwrite 的调用路径是类似的,通过 sys_readfileread 最终到达 consolereadconsolereadconsolewrite 要复杂,它从缓冲区 cons.buf 中读取整行的输入到用户空间,如果无字符可读或读完缓冲区后该行仍未结束,则会进入 sleep

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
// kernel/console.c
// user read()s from the console go here.
// copy (up to) a whole input line to dst.
// user_dist indicates whether dst is a user
// or kernel address.
//
int
consoleread(int user_dst, uint64 dst, int n)
{
uint target;
int c;
char cbuf;

target = n;
acquire(&cons.lock);
while(n > 0){
// wait until interrupt handler has put some
// input into cons.buffer.
while(cons.r == cons.w){
if(killed(myproc())){
release(&cons.lock);
return -1;
}
sleep(&cons.r, &cons.lock);
}

c = cons.buf[cons.r++ % INPUT_BUF_SIZE];

if(c == C('D')){ // end-of-file
if(n < target){
// Save ^D for next time, to make sure
// caller gets a 0-byte result.
cons.r--;
}
break;
}

// copy the input byte to the user-space buffer.
cbuf = c;
if(either_copyout(user_dst, dst, &cbuf, 1) == -1)
break;

dst++;
--n;

if(c == '\n'){
// a whole line has arrived, return to
// the user-level read().
break;
}
}
release(&cons.lock);

return target - n;
}

UART 的工作原理

字符的输出

现在我们知道,shell 的输入输出最终通过内核中 CONSOLE 驱动的 top 部分进行,接下来就是最底层的 UART 部分。先来看 uartputc,这里使用了一个生产者-消费者模型的环形队列,并最终通过 uartstart 发送字符给 UART,如果队列已满则会陷入 sleep 等待。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
// kernel/uart.c
// add a character to the output buffer and tell the
// UART to start sending if it isn't already.
// blocks if the output buffer is full.
// because it may block, it can't be called
// from interrupts; it's only suitable for use
// by write().
void
uartputc(int c)
{
acquire(&uart_tx_lock);

if(panicked){
for(;;)
;
}
while(uart_tx_w == uart_tx_r + UART_TX_BUF_SIZE){
// buffer is full.
// wait for uartstart() to open up space in the buffer.
sleep(&uart_tx_r, &uart_tx_lock);
}
uart_tx_buf[uart_tx_w % UART_TX_BUF_SIZE] = c;
uart_tx_w += 1;
uartstart();
release(&uart_tx_lock);
}

uartstart 会尝试向 UART 发送字符:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
// kernel/uart.c
// if the UART is idle, and a character is waiting
// in the transmit buffer, send it.
// caller must hold uart_tx_lock.
// called from both the top- and bottom-half.
void
uartstart()
{
while(1){
if(uart_tx_w == uart_tx_r){
// transmit buffer is empty.
return;
}

if((ReadReg(LSR) & LSR_TX_IDLE) == 0){
// the UART transmit holding register is full,
// so we cannot give it another byte.
// it will interrupt when it's ready for a new byte.
return;
}

int c = uart_tx_buf[uart_tx_r % UART_TX_BUF_SIZE];
uart_tx_r += 1;

// maybe uartputc() is waiting for space in the buffer.
wakeup(&uart_tx_r);

WriteReg(THR, c);
}
}

字符的输入

当用户在键盘上输入字符时,UART 会产生一个中断,PLIC 接收到中断后会路由到特定的 CPU 核,如果该核心设置了 SIE 的 E 位(外部中断位),硬件执行以下操作:

  1. 清除 SIE 寄存器的相应 bit 以关闭中断;
  2. 设置 SEPC 为当前程序计数器;
  3. 保存当前的 mode;
  4. 将 mode 设置为 Supervisor mode;
  5. 将 PC 设置为 STVEC 的值

接下来发生的事和 trap 一致,即保存现场并进入 trap.cusertrap 函数,然后判断是否为设备中断:

1
2
3
4
5
//...
else if((which_dev = devintr()) != 0){
// ok
}
//...

查看 devintr 可知,内核通过查看 scause 寄存器判断是否为外部中断,然后通过 plic_claim 获取中断;对于键盘输入的中断而言,最终会调用中断处理程序 uartintr

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
// kernel/trap.c
// check if it's an external interrupt or software interrupt,
// and handle it.
// returns 2 if timer interrupt,
// 1 if other device,
// 0 if not recognized.
int
devintr()
{
uint64 scause = r_scause();

if((scause & 0x8000000000000000L) &&
(scause & 0xff) == 9){
// this is a supervisor external interrupt, via PLIC.

// irq indicates which device interrupted.
int irq = plic_claim();

if(irq == UART0_IRQ){
uartintr();
} else if(irq == VIRTIO0_IRQ){
virtio_disk_intr();
} else if(irq){
printf("unexpected interrupt irq=%d\n", irq);
}

// the PLIC allows each device to raise at most one
// interrupt at a time; tell the PLIC the device is
// now allowed to interrupt again.
if(irq)
plic_complete(irq);

return 1;
} else if(scause == 0x8000000000000001L){
// software interrupt from a machine-mode timer interrupt,
// forwarded by timervec in kernelvec.S.

if(cpuid() == 0){
clockintr();
}

// acknowledge the software interrupt by clearing
// the SSIP bit in sip.
w_sip(r_sip() & ~2);

return 2;
} else {
return 0;
}
}

uartintr 是 UART 驱动的 bottom 部分,用来处理键盘输入和收到字符造成的中断。这个函数先是通过 uartgetc 读取字符,然后通过 consoleintr(即 CONSOLE 的中断处理程序)把字符显示到 CONSOLE 中。这里并不是中断的嵌套,因为不涉及硬件状态的转换。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// kernel/uart.c
// handle a uart interrupt, raised because input has
// arrived, or the uart is ready for more output, or
// both. called from devintr().
void
uartintr(void)
{
// read and process incoming characters.
while(1){
int c = uartgetc();
if(c == -1)
break;
consoleintr(c);
}

// send buffered characters.
acquire(&uart_tx_lock);
uartstart();
release(&uart_tx_lock);
}

consoleintr 中,CONSOLE 会对特殊字符(如 BackSpace,Ctrl+P等)进行处理,然后通过 consputc 把字符显示到屏幕上;如果读入到换行符,那么还会唤醒等待读入的 consoleread,把内容返回给 shell。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
// kernel/console.c
// the console input interrupt handler.
// uartintr() calls this for input character.
// do erase/kill processing, append to cons.buf,
// wake up consoleread() if a whole line has arrived.
//
void
consoleintr(int c)
{
acquire(&cons.lock);

switch(c){
case C('P'): // Print process list.
procdump();
break;
case C('U'): // Kill line.
while(cons.e != cons.w &&
cons.buf[(cons.e-1) % INPUT_BUF_SIZE] != '\n'){
cons.e--;
consputc(BACKSPACE);
}
break;
case C('H'): // Backspace
case '\x7f': // Delete key
if(cons.e != cons.w){
cons.e--;
consputc(BACKSPACE);
}
break;
default:
if(c != 0 && cons.e-cons.r < INPUT_BUF_SIZE){
c = (c == '\r') ? '\n' : c;

// echo back to the user.
consputc(c);

// store for consumption by consoleread().
cons.buf[cons.e++ % INPUT_BUF_SIZE] = c;

if(c == '\n' || c == C('D') || cons.e-cons.r == INPUT_BUF_SIZE){
// wake up consoleread() if a whole line (or end-of-file)
// has arrived.
cons.w = cons.e;
wakeup(&cons.r);
}
}
break;
}

release(&cons.lock);
}

那么拼图的最后一块就是 consputc 了,这个函数通过 uartputc_sync 来向屏幕输出字符,和 consolewrite 所使用的 uartputc 有所不同,同步版的 uartputc_sync 会一直自旋直到能够发送字符为止,而异步版的 uartputc 调用了 uartstart,在发现不能发送字符时会直接返回,后续能够继续发送字符时 UART 发出中断,uartintr 会重新调用 uartstart

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
// kernel/console.c
// send one character to the uart.
// called by printf(), and to echo input characters,
// but not from write().
//
void
consputc(int c)
{
if(c == BACKSPACE){
// if the user typed backspace, overwrite with a space.
uartputc_sync('\b'); uartputc_sync(' '); uartputc_sync('\b');
} else {
uartputc_sync(c);
}
}

// kernel/uart.c
// alternate version of uartputc() that doesn't
// use interrupts, for use by kernel printf() and
// to echo characters. it spins waiting for the uart's
// output register to be empty.
void
uartputc_sync(int c)
{
push_off();

if(panicked){
for(;;)
;
}

// wait for Transmit Holding Empty to be set in LSR.
while((ReadReg(LSR) & LSR_TX_IDLE) == 0)
;
WriteReg(THR, c);

pop_off();
}

xv6 驱动和中断——输入输出
https://ch3chohch3.github.io/2022/08/31/xv6_itrpt/
作者
CH3CHOHCH3
发布于
2022年8月31日
许可协议