本文将mark下eventfd_signal的实现。
Overview
1 2 3 4
| eventfd_signal └── eventfd_signal_mask └── wake_up_locked_poll[__wake_up_locked_key] └── __wake_up_common
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, int wake_flags, void *key, wait_queue_entry_t *bookmark) { wait_queue_entry_t *curr, *next; int cnt = 0; ...
list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) { unsigned flags = curr->flags; int ret;
if (flags & WQ_FLAG_BOOKMARK) continue;
ret = curr->func(curr, mode, wake_flags, key); ... }
... }
|
由__wake_up_common
的实现可知,最终eventfd_signal
调用了wait_queue_entry
的func
回调。
1 2 3 4 5 6 7 8 9
|
struct wait_queue_entry { unsigned int flags; void *private; wait_queue_func_t func; struct list_head entry; };
|
vhost_poll_wakeup
源码解析:vhost ioeventfd与irqfd中提到过vhost_poll_wakeup
,那么这个函数又是如何与eventfd_signal
关联起来的呢?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, __poll_t mask, struct vhost_dev *dev, struct vhost_virtqueue *vq) { ... init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); ... }
static inline void init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func) { wq_entry->flags = 0; wq_entry->private = NULL; wq_entry->func = func; }
|
由上述代码片段可知,vhost_poll_wakeup
被设置为了wait_queue_entry
的func
回调。
由此可知,eventfd_signal
最终调用了vhost_poll_wakeup
函数;因此,vhost_poll_wakeup
函数运行上下文是vCPU线程(kvm调用了eventfd_signal
,而kvm的运行上下文是vCPU线程)。
1 2 3 4 5 6
| ioeventfd_write └── eventfd_signal └── eventfd_signal_mask └── wake_up_locked_poll[__wake_up_locked_key] └── __wake_up_common └── vhost_poll_wakeup
|
select/poll/epoll wait_queue_entry
的func
回调
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; entry->filp = get_file(filp); entry->wait_address = wait_address; entry->key = p->_key; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); }
|
对于select
和poll
,wait_queue_entry
的func
回调是pollwake
。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt) { struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt); struct epitem *epi = epq->epi; struct eppoll_entry *pwq;
if (unlikely(!epi)) return;
pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL); if (unlikely(!pwq)) { epq->epi = NULL; return; }
init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; if (epi->event.events & EPOLLEXCLUSIVE) add_wait_queue_exclusive(whead, &pwq->wait); else add_wait_queue(whead, &pwq->wait); pwq->next = epi->pwqlist; epi->pwqlist = pwq; }
|
对于epoll
,wait_queue_entry
的func
回调是ep_poll_callback
。
为了方便起见,本文只详细介绍下pollwake
。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
|
static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct poll_table_entry *entry; entry = container_of(wait, struct poll_table_entry, wait); if (key && !((unsigned long)key & entry->key)) { return 0; } return __pollwake(wait, mode, sync, key); } static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct poll_wqueues *pwq = wait->private; DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); smp_wmb(); pwq->triggered = 1; return default_wake_function(&dummy_wait, mode, sync, key); }
int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, void *key) { return try_to_wake_up(curr->private, mode, wake_flags); }
|
参考资料:
- linux 内核poll/select/epoll实现剖析(经典)-上