Linux系统下网络模型数select最为常用,当然,select只是检测文件系统数据状态,并不只局限于网络编程,select的功能需要底层驱动提供支持,其中核心应用即为等待队列,其他模型,如poll和epoll,对驱动来说并无区别,驱动只是返回数据状态而已。
那在驱动层面是如何支持select的呢?驱动需要实现file_operations
结构中的poll函数指针,其实现也非常简单,只是poll_wait
函数的调用,原型如下:
unsigned int (*poll) (struct file *, struct poll_table_struct *);
void poll_wait(struct file *filp, wait_queue_head_t *queue, poll_table * wait);
需要注意的是poll_wait函数不会像它名字一样处于wait状态,仅供上层查询之用。其编程基本框架也比较固定,现修改 字符驱动 中的simple程序,实现poll回调函数如下:
unsigned int simple_poll (struct file * filp, struct poll_table_struct * wait)
{
unsigned int mask = 0;
poll_wait(filp, &simple_queue, wait);
if (len > 0)
{
mask |= POLLIN | POLLRDNORM;
}
return mask;
}
在每次read之后都把len赋值为0,调用write把len赋值为数据长,poll中只需判断len,如果len大于0,则返回可读,整理之后,整个程序代码如下:
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/cdev.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/device.h>
#include <linux/poll.h>
dev_t devno;
struct class * simple_class;
static struct cdev cdev;
wait_queue_head_t simple_queue;
char test_data[255];
int len = 0;
unsigned int simple_poll (struct file * filp, struct poll_table_struct * wait)
{
unsigned int mask = 0;
poll_wait(filp, &simple_queue, wait);
if (len > 0)
{
mask |= POLLIN | POLLRDNORM;
}
return mask;
}
ssize_t simple_read(struct file * pfile,
char __user * buf, size_t size, loff_t * ppos)
{
int ret = len;
len = 0;
if (copy_to_user(buf, test_data, ret))
return -EFAULT;
else
return ret;
}
ssize_t simple_write(struct file * pfile, const char __user * buf, size_t count, loff_t * ppos)
{
if (count > 255)
{
return -EFAULT;
}
if (!copy_from_user(test_data, buf, count))
{
len = count;
wake_up(&simple_queue);
}
return len;
}
int simple_open(struct inode * pnode, struct file * pfile)
{
printk(KERN_INFO "open simple\n");
return 0;
}
int simple_release(struct inode * pnode, struct file * pfile)
{
printk(KERN_INFO "close simple\n");
return 0;
}
static struct file_operations simple_op =
{
.owner = THIS_MODULE,
.read = simple_read,
.open = simple_open,
.release = simple_release,
.write = simple_write,
.poll = simple_poll,
};
static int __init initialization(void)
{
int result;
result = alloc_chrdev_region(&devno, 0, 1, "simple");
if (result < 0)
return result;
cdev_init(&cdev, &simple_op);
result = cdev_add(&cdev, devno, 1);
simple_class = class_create(THIS_MODULE, "simple");
device_create(simple_class, NULL, devno, NULL, "simple");
printk(KERN_INFO " init simple\n");
init_waitqueue_head(&simple_queue);
return result;
}
static void __exit cleanup(void)
{
device_destroy(simple_class, devno);
class_destroy(simple_class);
cdev_del(&cdev);
unregister_chrdev_region(devno, 1);
printk(KERN_INFO " cleanup simple\n");
}
module_init(initialization);
module_exit(cleanup);
MODULE_AUTHOR("alloc cppbreak@gmail.com");
MODULE_DESCRIPTION("A simple linux kernel module");
MODULE_VERSION("V0.1");
MODULE_LICENSE("Dual BSD/GPL");
用户态使用select测试程序如下:
#include <sys/select.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
int main()
{
int fd, ret;
char data[255];
fd_set set;
fd = open("/dev/simple", O_RDONLY | O_NONBLOCK);
while (fd != -1)
{
FD_ZERO(&set);
FD_SET(fd, &set);
select(fd + 1, &set, 0, 0, 0);
if (FD_ISSET(fd, &set))
{
printf("simple can read\n");
ret = read(fd, data, 255);
data[ret] = 0;
printf("\t%s\n", data);
}
}
return 0;
}
最终效果,加载驱动之后,使用uesr程序select,会进入阻塞状态,当再另一个终端下写入数据 echo “simple” > /dev/simple 后程序解除阻塞,输出:
:::bash
[root@cpphost driver]# ./a.out
simple can read
simple
这样,简单的几行代码就可让设备驱动支持select模型,复杂而又繁琐的工作都有系统内核处理掉了。
受好奇心驱使,可能会想系统是如何处理select请求的,select是个系统调用,最后会走到内核的do_select
函数,位于fs/select.c,代码结构大致如下(仅留下框架):
int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
{
...
struct poll_wqueues table;
poll_table *wait;
...
poll_initwait(&table);
...
for (;;) {
...
for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
...
for (j = 0; j < __NFDBITS; ++j, ++i, bit <f_op;
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
/* 这里,调用了f_op的poll函数,即我们的实现 */
mask = (*f_op->poll)(file, wait);
}
...
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
retval++;
wait = NULL;
}
if ((mask & POLLOUT_SET) && (out & bit)) {
res_out |= bit;
retval++;
wait = NULL;
}
if ((mask & POLLEX_SET) && (ex & bit)) {
res_ex |= bit;
retval++;
wait = NULL;
}
}
}
if (res_in)
*rinp = res_in;
if (res_out)
*routp = res_out;
if (res_ex)
*rexp = res_ex;
/* 这里进行调度 */
cond_resched();
}
...
/* 超时处理 */
if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
to, slack))
timed_out = 1;
}
/* 释放table */
poll_freewait(&table);
return retval;
}
大致结构也很清晰,进入select时,首先初始化一个等待变量wait,对每一个描述符循环调用对应的poll回调函数,驱动中poll函数又把wait加入自己的等待队列,这样当有数据时,可以及时唤醒select,poll回调返回后,select检查返回的mask,是否有数据可读写,如果都没有,则调用cond_resched();
进入等待,等待超时或者write唤醒等待队列。
这个过程也验证了select是遍历每个描述符,当描述符增多时导致效率线性下降,所以有了更为先进的 epoll 模型,具体实现可以参考 epoll源码分析