drm: Fix spinlock race condition that can lockup the kernel From: Mike Isely The i915_vblank_swap() function schedules an automatic buffer swap upon receipt of the vertical sync interrupt. Such an operation is lengthy so it can't happen normal interrupt context, so the DRM implements this by scheduling the work in a kernel softirq-scheduled tasklet. In order for the buffer swap to work safely, the DRM's central lock must be taken, via a call to drm_lock_take() in drm_irq.c within the function drm_locked_tasklet_func(). The lock-taking logic uses a non-interrupt-blocking spinlock to implement the manipulations needed to take the lock. Note that a non-interrupt-blocking spinlock blocks kernel pre-emption and atomically sets a flag, but interrupts are still enabled. This semantic is safe if ALL attempts to use the spinlock only happen from process context. However this buffer swap happens from softirq context which is really a form of interrupt context that WILL pre-empt execution even when normal thread pre-emption is otherwise disabled. Thus we have an unsafe situation, in that drm_locked_tasklet_func() can block on a spinlock already taken by a thread in process context which will never get scheduled again because of the blocked softirq tasklet. This wedges the kernel hard. It's a very small race condition, but a race nonetheless with a very undesirable potential outcome. To trigger this bug, run a dual-head cloned mode configuration which uses the i915 drm, then execute an opengl application which synchronizes buffer swaps against the vertical sync interrupt. In my testing, a lockup always results after running anywhere from 5 minutes to an hour and a half. I believe dual-head is needed to really trigger the problem because then the vertical sync interrupt handling is no longer predictable (due to being interrupt-sourced from two different heads running at different speeds). This raises the probability of a the tasklet trying to run while the userspace DRI is doing things to the GPU (and manipulating the DRM lock). The fix is to change the relevant spinlock semantics to be the interrupt-blocking form. Thus all calls for this spinlock change from spin_lock() to spin_lock_irqsave() and similarly calls to spin_unlock() change to spin_unlock_irqrestore(). After this change I am no longer able to trigger the lockup; the longest test run so far was 6 hours (test stopped after that point). Signed-off-by: Mike Isely --- drivers/char/drm/drm_fops.c | 5 ++-- drivers/char/drm/drm_lock.c | 35 +++++++++++++++++++--------------- 2 files changed, 23 insertions(+), 17 deletions(-) diff -uprN -X linux-2.6.24.3/Documentation/dontdiff linux-2.6.24.3/drivers/char/drm/drm_fops.c linux-2.6.24.3-drm-spinlock-fix/drivers/char/drm/drm_fops.c --- linux-2.6.24.3/drivers/char/drm/drm_fops.c 2008-02-25 18:20:20.000000000 -0600 +++ linux-2.6.24.3-drm-spinlock-fix/drivers/char/drm/drm_fops.c 2008-03-06 15:23:26.000000000 -0600 @@ -326,6 +326,7 @@ int drm_release(struct inode *inode, str struct drm_file *file_priv = filp->private_data; struct drm_device *dev = file_priv->head->dev; int retcode = 0; + unsigned long irqflags; lock_kernel(); @@ -357,9 +358,9 @@ int drm_release(struct inode *inode, str */ do{ - spin_lock(&dev->lock.spinlock); + spin_lock_irqsave(&dev->lock.spinlock, irqflags); locked = dev->lock.idle_has_lock; - spin_unlock(&dev->lock.spinlock); + spin_unlock_irqrestore(&dev->lock.spinlock, irqflags); if (locked) break; schedule(); diff -uprN -X linux-2.6.24.3/Documentation/dontdiff linux-2.6.24.3/drivers/char/drm/drm_lock.c linux-2.6.24.3-drm-spinlock-fix/drivers/char/drm/drm_lock.c --- linux-2.6.24.3/drivers/char/drm/drm_lock.c 2008-02-25 18:20:20.000000000 -0600 +++ linux-2.6.24.3-drm-spinlock-fix/drivers/char/drm/drm_lock.c 2008-03-06 15:23:34.000000000 -0600 @@ -53,6 +53,7 @@ int drm_lock(struct drm_device *dev, voi DECLARE_WAITQUEUE(entry, current); struct drm_lock *lock = data; int ret = 0; + unsigned long irqflags; ++file_priv->lock_count; @@ -71,9 +72,9 @@ int drm_lock(struct drm_device *dev, voi return -EINVAL; add_wait_queue(&dev->lock.lock_queue, &entry); - spin_lock(&dev->lock.spinlock); + spin_lock_irqsave(&dev->lock.spinlock, irqflags); dev->lock.user_waiters++; - spin_unlock(&dev->lock.spinlock); + spin_unlock_irqrestore(&dev->lock.spinlock, irqflags); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); if (!dev->lock.hw_lock) { @@ -95,9 +96,9 @@ int drm_lock(struct drm_device *dev, voi break; } } - spin_lock(&dev->lock.spinlock); + spin_lock_irqsave(&dev->lock.spinlock, irqflags); dev->lock.user_waiters--; - spin_unlock(&dev->lock.spinlock); + spin_unlock_irqrestore(&dev->lock.spinlock, irqflags); __set_current_state(TASK_RUNNING); remove_wait_queue(&dev->lock.lock_queue, &entry); @@ -198,8 +199,9 @@ int drm_lock_take(struct drm_lock_data * { unsigned int old, new, prev; volatile unsigned int *lock = &lock_data->hw_lock->lock; + unsigned long irqflags; - spin_lock(&lock_data->spinlock); + spin_lock_irqsave(&lock_data->spinlock, irqflags); do { old = *lock; if (old & _DRM_LOCK_HELD) @@ -211,7 +213,7 @@ int drm_lock_take(struct drm_lock_data * } prev = cmpxchg(lock, old, new); } while (prev != old); - spin_unlock(&lock_data->spinlock); + spin_unlock_irqrestore(&lock_data->spinlock, irqflags); if (_DRM_LOCKING_CONTEXT(old) == context) { if (old & _DRM_LOCK_HELD) { @@ -272,15 +274,16 @@ int drm_lock_free(struct drm_lock_data * { unsigned int old, new, prev; volatile unsigned int *lock = &lock_data->hw_lock->lock; + unsigned long irqflags; - spin_lock(&lock_data->spinlock); + spin_lock_irqsave(&lock_data->spinlock, irqflags); if (lock_data->kernel_waiters != 0) { drm_lock_transfer(lock_data, 0); lock_data->idle_has_lock = 1; - spin_unlock(&lock_data->spinlock); + spin_unlock_irqrestore(&lock_data->spinlock, irqflags); return 1; } - spin_unlock(&lock_data->spinlock); + spin_unlock_irqrestore(&lock_data->spinlock, irqflags); do { old = *lock; @@ -344,19 +347,20 @@ static int drm_notifier(void *priv) void drm_idlelock_take(struct drm_lock_data *lock_data) { int ret = 0; + unsigned long irqflags; - spin_lock(&lock_data->spinlock); + spin_lock_irqsave(&lock_data->spinlock, irqflags); lock_data->kernel_waiters++; if (!lock_data->idle_has_lock) { - spin_unlock(&lock_data->spinlock); + spin_unlock_irqrestore(&lock_data->spinlock, irqflags); ret = drm_lock_take(lock_data, DRM_KERNEL_CONTEXT); - spin_lock(&lock_data->spinlock); + spin_lock_irqsave(&lock_data->spinlock, irqflags); if (ret == 1) lock_data->idle_has_lock = 1; } - spin_unlock(&lock_data->spinlock); + spin_unlock_irqrestore(&lock_data->spinlock, irqflags); } EXPORT_SYMBOL(drm_idlelock_take); @@ -364,8 +368,9 @@ void drm_idlelock_release(struct drm_loc { unsigned int old, prev; volatile unsigned int *lock = &lock_data->hw_lock->lock; + unsigned long irqflags; - spin_lock(&lock_data->spinlock); + spin_lock_irqsave(&lock_data->spinlock, irqflags); if (--lock_data->kernel_waiters == 0) { if (lock_data->idle_has_lock) { do { @@ -376,7 +381,7 @@ void drm_idlelock_release(struct drm_loc lock_data->idle_has_lock = 0; } } - spin_unlock(&lock_data->spinlock); + spin_unlock_irqrestore(&lock_data->spinlock, irqflags); } EXPORT_SYMBOL(drm_idlelock_release);