From 3a76e3716b4e571f5d91a20b6afb412560599083 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 17 Aug 2008 22:02:11 -0700 Subject: pkt_sched: Grab correct lock in notify_and_destroy(). From: Jarek Poplawski When we are destroying non-root qdiscs, we need to lock the root of the qdisc tree not the the qdisc itself. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c25465e5607..c8dc72e1210 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -638,9 +638,9 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid qdisc_notify(skb, n, clid, old, new); if (old) { - spin_lock_bh(&old->q.lock); + sch_tree_lock(old); qdisc_destroy(old); - spin_unlock_bh(&old->q.lock); + sch_tree_unlock(old); } } -- cgit v1.2.3 From 8608db031b4d2932d645709e2cfe8fbcd91a7305 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 20:51:18 -0700 Subject: pkt_sched: Never schedule non-root qdiscs. Based upon initial discovery and patch by Jarek Poplawski. The qdisc watchdogs can be attached to any qdisc, not just the root, so make sure we schedule the correct one. CBQ has a similar bug. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c8dc72e1210..98c00847a3d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -426,7 +426,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - __netif_schedule(wd->qdisc); + __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; } -- cgit v1.2.3 From 25bfcd5a78a377ea4c54a3c21e44590e2fc478a6 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 18 Aug 2008 20:53:34 -0700 Subject: pkt_sched: Add lockdep annotation for qdisc locks Qdisc locks are initialized in the same function, qdisc_alloc(), so lockdep can't distinguish tx qdisc lock from rx and reports "possible recursive locking detected" when both these locks are taken eg. while using act_mirred with ifb. This looks like a false positive. Anyway, after this patch these locks will be reported more exactly. Reported-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 98c00847a3d..7d7070b1eeb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -707,6 +708,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return err; } +/* lockdep annotation is needed for ingress; egress gets it only for name */ +static struct lock_class_key qdisc_tx_lock; +static struct lock_class_key qdisc_rx_lock; + /* Allocate and initialize new qdisc. @@ -767,6 +772,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; handle = TC_H_MAKE(TC_H_INGRESS, 0); + lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); @@ -774,6 +780,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == 0) goto err_out3; } + lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); } sch->handle = handle; -- cgit v1.2.3 From 4d8863a29c4755a0461cd31b6865026187d6c43a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 21:03:15 -0700 Subject: pkt_sched: Don't hold qdisc lock over qdisc_destroy(). Based upon reports by Denys Fedoryshchenko, and feedback and help from Jarek Poplawski and Herbert Xu. We always either: 1) Never made an external reference to this qdisc. or 2) Did a dev_deactivate() which purged all asynchronous references. So do not lock the qdisc when we call qdisc_destroy(), it's illegal anyways as when we drop the lock this is free'd memory. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7d7070b1eeb..d91a2338877 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -638,11 +638,8 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid if (new || old) qdisc_notify(skb, n, clid, old, new); - if (old) { - sch_tree_lock(old); + if (old) qdisc_destroy(old); - sch_tree_unlock(old); - } } /* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -1092,16 +1089,10 @@ create_n_graft: graft: if (1) { - spinlock_t *root_lock; - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { - if (q) { - root_lock = qdisc_root_lock(q); - spin_lock_bh(root_lock); + if (q) qdisc_destroy(q); - spin_unlock_bh(root_lock); - } return err; } } -- cgit v1.2.3 From e5befbd9525d92bb074b70192eb2c69aae65fc60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 18 Aug 2008 22:30:01 -0700 Subject: pkt_sched: remove bogus block (cleanup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...Last block local var got just deleted. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/sched/sch_api.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d91a2338877..9372ec41ce8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1088,14 +1088,13 @@ create_n_graft: } graft: - if (1) { - err = qdisc_graft(dev, p, skb, n, clid, q, NULL); - if (err) { - if (q) - qdisc_destroy(q); - return err; - } + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); + if (err) { + if (q) + qdisc_destroy(q); + return err; } + return 0; } -- cgit v1.2.3 From f3b9605d744df537dee10fd06630f35a62b343ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 18 Aug 2008 22:33:05 -0700 Subject: Revert "pkt_sched: Add BH protection for qdisc_stab_lock." This reverts commit 1cfa26661a85549063e369e2b40275eeaa7b923c. qdisc_destroy() runs fully under RTNL again and not from softint any longer, so this change is no longer needed. Signed-off-by: David S. Miller --- net/sched/sch_api.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9372ec41ce8..ef0efeca635 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -332,7 +332,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (!s || tsize != s->tsize || (!tab && tsize > 0)) return ERR_PTR(-EINVAL); - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) @@ -340,11 +340,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); return stab; } - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) @@ -355,9 +355,9 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0) memcpy(stab->data, tab, tsize * sizeof(u16)); - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); list_add_tail(&stab->list, &qdisc_stab_list); - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); return stab; } @@ -367,14 +367,14 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (!tab) return; - spin_lock_bh(&qdisc_stab_lock); + spin_lock(&qdisc_stab_lock); if (--tab->refcnt == 0) { list_del(&tab->list); kfree(tab); } - spin_unlock_bh(&qdisc_stab_lock); + spin_unlock(&qdisc_stab_lock); } EXPORT_SYMBOL(qdisc_put_stab); -- cgit v1.2.3 From 2540e0511ea17e25831be543cdf9381e6209950d Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 21 Aug 2008 05:11:14 -0700 Subject: pkt_sched: Fix qdisc_watchdog() vs. dev_deactivate() race dev_deactivate() can skip rescheduling of a qdisc by qdisc_watchdog() or other timer calling netif_schedule() after dev_queue_deactivate(). We prevent this checking aliveness before scheduling the timer. Since during deactivation the root qdisc is available only as qdisc_sleeping additional accessor qdisc_root_sleeping() is created. With feedback from Herbert Xu Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ef0efeca635..45f442d7de4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -444,6 +444,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { ktime_t time; + if (test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state)) + return; + wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_US2NS(expires)); -- cgit v1.2.3 From f6e0b239a2657ea8cb67f0d83d0bfdbfd19a481b Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 22 Aug 2008 03:24:05 -0700 Subject: pkt_sched: Fix qdisc list locking Since some qdiscs call qdisc_tree_decrease_qlen() (so qdisc_lookup()) without rtnl_lock(), adding and deleting from a qdisc list needs additional locking. This patch adds global spinlock qdisc_list_lock and wrapper functions for modifying the list. It is considered as a temporary solution until hfsc_dequeue(), netem_dequeue() and tbf_dequeue() (or qdisc_tree_decrease_qlen()) are redone. With feedback from Herbert Xu and David S. Miller. Signed-off-by: Jarek Poplawski Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/sched/sch_api.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 45f442d7de4..e7fb9e0d21b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -199,19 +199,53 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } +/* + * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() + * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() + */ +static DEFINE_SPINLOCK(qdisc_list_lock); + +static void qdisc_list_add(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + spin_unlock_bh(&qdisc_list_lock); + } +} + +void qdisc_list_del(struct Qdisc *q) +{ + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + spin_lock_bh(&qdisc_list_lock); + list_del(&q->list); + spin_unlock_bh(&qdisc_list_lock); + } +} +EXPORT_SYMBOL(qdisc_list_del); + struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { unsigned int i; + struct Qdisc *q; + + spin_lock_bh(&qdisc_list_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *q, *txq_root = txq->qdisc_sleeping; + struct Qdisc *txq_root = txq->qdisc_sleeping; q = qdisc_match_from_root(txq_root, handle); if (q) - return q; + goto unlock; } - return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + + q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); + +unlock: + spin_unlock_bh(&qdisc_list_lock); + + return q; } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) @@ -810,8 +844,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out3; } } - if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS)) - list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list); + + qdisc_list_add(sch); return sch; } -- cgit v1.2.3