1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
|
/*
* kvm eventfd support - use eventfd objects to signal various KVM events
*
* Copyright 2009 Novell. All Rights Reserved.
*
* Author:
* Gregory Haskins <ghaskins@novell.com>
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
#include <linux/workqueue.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/list.h>
#include <linux/eventfd.h>
/*
* --------------------------------------------------------------------
* irqfd: Allows an fd to be used to inject an interrupt to the guest
*
* Credit goes to Avi Kivity for the original idea.
* --------------------------------------------------------------------
*/
struct _irqfd {
struct kvm *kvm;
struct eventfd_ctx *eventfd;
int gsi;
struct list_head list;
poll_table pt;
wait_queue_head_t *wqh;
wait_queue_t wait;
struct work_struct inject;
struct work_struct shutdown;
};
static struct workqueue_struct *irqfd_cleanup_wq;
static void
irqfd_inject(struct work_struct *work)
{
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd->kvm;
mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
mutex_unlock(&kvm->irq_lock);
}
/*
* Race-free decouple logic (ordering is critical)
*/
static void
irqfd_shutdown(struct work_struct *work)
{
struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
/*
* Synchronize with the wait-queue and unhook ourselves to prevent
* further events.
*/
remove_wait_queue(irqfd->wqh, &irqfd->wait);
/*
* We know no new events will be scheduled at this point, so block
* until all previously outstanding events have completed
*/
flush_work(&irqfd->inject);
/*
* It is now safe to release the object's resources
*/
eventfd_ctx_put(irqfd->eventfd);
kfree(irqfd);
}
/* assumes kvm->irqfds.lock is held */
static bool
irqfd_is_active(struct _irqfd *irqfd)
{
return list_empty(&irqfd->list) ? false : true;
}
/*
* Mark the irqfd as inactive and schedule it for removal
*
* assumes kvm->irqfds.lock is held
*/
static void
irqfd_deactivate(struct _irqfd *irqfd)
{
BUG_ON(!irqfd_is_active(irqfd));
list_del_init(&irqfd->list);
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
}
/*
* Called with wqh->lock held and interrupts disabled
*/
static int
irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
unsigned long flags = (unsigned long)key;
if (flags & POLLIN)
/* An event has been signaled, inject an interrupt */
schedule_work(&irqfd->inject);
if (flags & POLLHUP) {
/* The eventfd is closing, detach from KVM */
struct kvm *kvm = irqfd->kvm;
unsigned long flags;
spin_lock_irqsave(&kvm->irqfds.lock, flags);
/*
* We must check if someone deactivated the irqfd before
* we could acquire the irqfds.lock since the item is
* deactivated from the KVM side before it is unhooked from
* the wait-queue. If it is already deactivated, we can
* simply return knowing the other side will cleanup for us.
* We cannot race against the irqfd going away since the
* other side is required to acquire wqh->lock, which we hold
*/
if (irqfd_is_active(irqfd))
irqfd_deactivate(irqfd);
spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
}
return 0;
}
static void
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
irqfd->wqh = wqh;
add_wait_queue(wqh, &irqfd->wait);
}
static int
kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
{
struct _irqfd *irqfd;
struct file *file = NULL;
struct eventfd_ctx *eventfd = NULL;
int ret;
unsigned int events;
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
return -ENOMEM;
irqfd->kvm = kvm;
irqfd->gsi = gsi;
INIT_LIST_HEAD(&irqfd->list);
INIT_WORK(&irqfd->inject, irqfd_inject);
INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
file = eventfd_fget(fd);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
goto fail;
}
eventfd = eventfd_ctx_fileget(file);
if (IS_ERR(eventfd)) {
ret = PTR_ERR(eventfd);
goto fail;
}
irqfd->eventfd = eventfd;
/*
* Install our own custom wake-up handling so we are notified via
* a callback whenever someone signals the underlying eventfd
*/
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
events = file->f_op->poll(file, &irqfd->pt);
spin_lock_irq(&kvm->irqfds.lock);
list_add_tail(&irqfd->list, &kvm->irqfds.items);
spin_unlock_irq(&kvm->irqfds.lock);
/*
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
*/
if (events & POLLIN)
schedule_work(&irqfd->inject);
/*
* do not drop the file until the irqfd is fully initialized, otherwise
* we might race against the POLLHUP
*/
fput(file);
return 0;
fail:
if (eventfd && !IS_ERR(eventfd))
eventfd_ctx_put(eventfd);
if (file && !IS_ERR(file))
fput(file);
kfree(irqfd);
return ret;
}
void
kvm_irqfd_init(struct kvm *kvm)
{
spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items);
}
/*
* shutdown any irqfd's that match fd+gsi
*/
static int
kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
{
struct _irqfd *irqfd, *tmp;
struct eventfd_ctx *eventfd;
eventfd = eventfd_ctx_fdget(fd);
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
irqfd_deactivate(irqfd);
}
spin_unlock_irq(&kvm->irqfds.lock);
eventfd_ctx_put(eventfd);
/*
* Block until we know all outstanding shutdown jobs have completed
* so that we guarantee there will not be any more interrupts on this
* gsi once this deassign function returns.
*/
flush_workqueue(irqfd_cleanup_wq);
return 0;
}
int
kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
{
if (flags & KVM_IRQFD_FLAG_DEASSIGN)
return kvm_irqfd_deassign(kvm, fd, gsi);
return kvm_irqfd_assign(kvm, fd, gsi);
}
/*
* This function is called as the kvm VM fd is being released. Shutdown all
* irqfds that still remain open
*/
void
kvm_irqfd_release(struct kvm *kvm)
{
struct _irqfd *irqfd, *tmp;
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
irqfd_deactivate(irqfd);
spin_unlock_irq(&kvm->irqfds.lock);
/*
* Block until we know all outstanding shutdown jobs have completed
* since we do not take a kvm* reference.
*/
flush_workqueue(irqfd_cleanup_wq);
}
/*
* create a host-wide workqueue for issuing deferred shutdown requests
* aggregated from all vm* instances. We need our own isolated single-thread
* queue to prevent deadlock against flushing the normal work-queue.
*/
static int __init irqfd_module_init(void)
{
irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
if (!irqfd_cleanup_wq)
return -ENOMEM;
return 0;
}
static void __exit irqfd_module_exit(void)
{
destroy_workqueue(irqfd_cleanup_wq);
}
module_init(irqfd_module_init);
module_exit(irqfd_module_exit);
|