From 6ed106549d17474ca17a16057f4c0ed4eba5a7ca Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 23 Jun 2009 06:03:08 +0000 Subject: net: use NETDEV_TX_OK instead of 0 in ndo_start_xmit() functions This patch is the result of an automatic spatch transformation to convert all ndo_start_xmit() return values of 0 to NETDEV_TX_OK. Some occurences are missed by the automatic conversion, those will be handled in a seperate patch. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_teql.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 9c002b6e053..12434b6c204 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -314,7 +314,7 @@ restart: netif_wake_queue(dev); txq->tx_packets++; txq->tx_bytes += length; - return 0; + return NETDEV_TX_OK; } __netif_tx_unlock(slave_txq); } @@ -323,7 +323,7 @@ restart: break; case 1: master->slaves = NEXT_SLAVE(q); - return 0; + return NETDEV_TX_OK; default: nores = 1; break; @@ -345,7 +345,7 @@ restart: drop: txq->tx_dropped++; dev_kfree_skb(skb); - return 0; + return NETDEV_TX_OK; } static int teql_master_open(struct net_device *dev) -- cgit v1.2.3 From bbd8a0d3a3b65d341437f8b99c828fa5cc29c739 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Thu, 6 Aug 2009 01:44:21 +0000 Subject: net: Avoid enqueuing skb for default qdiscs dev_queue_xmit enqueue's a skb and calls qdisc_run which dequeue's the skb and xmits it. In most cases, the skb that is enqueue'd is the same one that is dequeue'd (unless the queue gets stopped or multiple cpu's write to the same queue and ends in a race with qdisc_run). For default qdiscs, we can remove the redundant enqueue/dequeue and simply xmit the skb since the default qdisc is work-conserving. The patch uses a new flag - TCQ_F_CAN_BYPASS to identify the default fast queue. The controversial part of the patch is incrementing qlen when a skb is requeued - this is to avoid checks like the second line below: + } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && >> !q->gso_skb && + !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { Results of a 2 hour testing for multiple netperf sessions (1, 2, 4, 8, 12 sessions on a 4 cpu system-X). The BW numbers are aggregate Mb/s across iterations tested with this version on System-X boxes with Chelsio 10gbps cards: ---------------------------------- Size | ORG BW NEW BW | ---------------------------------- 128K | 156964 159381 | 256K | 158650 162042 | ---------------------------------- Changes from ver1: 1. Move sch_direct_xmit declaration from sch_generic.h to pkt_sched.h 2. Update qdisc basic statistics for direct xmit path. 3. Set qlen to zero in qdisc_reset. 4. Changed some function names to more meaningful ones. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 93 ++++++++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 36 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 27d03816ec3..693df7ae33d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -37,15 +37,11 @@ * - updates to tree and tree walking are only done under the rtnl mutex. */ -static inline int qdisc_qlen(struct Qdisc *q) -{ - return q->q.qlen; -} - static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; q->qstats.requeues++; + q->q.qlen++; /* it's still part of the queue */ __netif_schedule(q); return 0; @@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { q->gso_skb = NULL; - else + q->q.qlen--; + } else skb = NULL; } else { skb = q->dequeue(q); @@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, } /* - * NOTE: Called under qdisc_lock(q) with locally disabled BH. - * - * __QDISC_STATE_RUNNING guarantees only one CPU can process - * this qdisc at a time. qdisc_lock(q) serializes queue accesses for - * this queue. - * - * netif_tx_lock serializes accesses to device driver. - * - * qdisc_lock(q) and netif_tx_lock are mutually exclusive, - * if one is grabbed, another must be free. - * - * Note, that this procedure can be called by a watchdog timer + * Transmit one skb, and handle the return status as required. Holding the + * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this + * function. * * Returns to the caller: * 0 - queue is empty or throttled. * >0 - queue is not empty. - * */ -static inline int qdisc_restart(struct Qdisc *q) +int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, struct netdev_queue *txq, + spinlock_t *root_lock) { - struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; - struct net_device *dev; - spinlock_t *root_lock; - struct sk_buff *skb; - - /* Dequeue packet */ - if (unlikely((skb = dequeue_skb(q)) == NULL)) - return 0; - - root_lock = qdisc_lock(q); /* And release qdisc */ spin_unlock(root_lock); - dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) @@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Qdisc *q) return ret; } +/* + * NOTE: Called under qdisc_lock(q) with locally disabled BH. + * + * __QDISC_STATE_RUNNING guarantees only one CPU can process + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for + * this queue. + * + * netif_tx_lock serializes accesses to device driver. + * + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, + * if one is grabbed, another must be free. + * + * Note, that this procedure can be called by a watchdog timer + * + * Returns to the caller: + * 0 - queue is empty or throttled. + * >0 - queue is not empty. + * + */ +static inline int qdisc_restart(struct Qdisc *q) +{ + struct netdev_queue *txq; + struct net_device *dev; + spinlock_t *root_lock; + struct sk_buff *skb; + + /* Dequeue packet */ + skb = dequeue_skb(q); + if (unlikely(!skb)) + return 0; + + root_lock = qdisc_lock(q); + dev = qdisc_dev(q); + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + + return sch_direct_xmit(skb, q, dev, txq, root_lock); +} + void __qdisc_run(struct Qdisc *q) { unsigned long start_time = jiffies; @@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc) if (ops->reset) ops->reset(qdisc); - kfree_skb(qdisc->gso_skb); - qdisc->gso_skb = NULL; + if (qdisc->gso_skb) { + kfree_skb(qdisc->gso_skb); + qdisc->gso_skb = NULL; + qdisc->q.qlen = 0; + } } EXPORT_SYMBOL(qdisc_reset); @@ -605,6 +623,9 @@ static void attach_one_default_qdisc(struct net_device *dev, printk(KERN_INFO "%s: activation failed\n", dev->name); return; } + + /* Can by-pass the queue discipline for default qdisc */ + qdisc->flags |= TCQ_F_CAN_BYPASS; } else { qdisc = &noqueue_qdisc; } -- cgit v1.2.3 From 3a6c2b419b7768703cfb2cabdb894517c5065e33 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Aug 2009 16:07:40 +0200 Subject: netlink: constify nlmsghdr arguments Consitfy nlmsghdr arguments to a couple of functions as preparation for the next patch, which will constify the netlink message data in all nfnetlink users. Signed-off-by: Patrick McHardy --- net/sched/act_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sched') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9d03cc33b6c..2dfb3e7a040 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1011,7 +1011,7 @@ replay: } static struct nlattr * -find_dump_kind(struct nlmsghdr *n) +find_dump_kind(const struct nlmsghdr *n) { struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; -- cgit v1.2.3 From fd3ae5e8fc5e947a9f151e80a65763a24b6368a9 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Tue, 18 Aug 2009 21:55:59 +0000 Subject: Speed-up pfifo_fast lookup using a private bitmap Maintain a per-qdisc bitmap for pfifo_fast giving availability of skbs for each band. This allows faster lookup for a skb when there are no high priority skbs. Also, it helps in (rare) cases when there are no skbs on the list, where an immediate lookup is faster than iterating through the three bands. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 70 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 22 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 693df7ae33d..6f7aebd1407 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -406,18 +406,38 @@ static const u8 prio2band[TC_PRIO_MAX+1] = #define PFIFO_FAST_BANDS 3 -static inline struct sk_buff_head *prio2list(struct sk_buff *skb, - struct Qdisc *qdisc) +/* + * Private data for a pfifo_fast scheduler containing: + * - queues for the three band + * - bitmap indicating which of the bands contain skbs + */ +struct pfifo_fast_priv { + u32 bitmap; + struct sk_buff_head q[PFIFO_FAST_BANDS]; +}; + +/* + * Convert a bitmap to the first band number where an skb is queued, where: + * bitmap=0 means there are no skbs on any band. + * bitmap=1 means there is an skb on band 0. + * bitmap=7 means there are skbs on all 3 bands, etc. + */ +static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; + +static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, + int band) { - struct sk_buff_head *list = qdisc_priv(qdisc); - return list + prio2band[skb->priority & TC_PRIO_MAX]; + return priv->q + band; } static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list = prio2list(skb, qdisc); + int band = prio2band[skb->priority & TC_PRIO_MAX]; + struct pfifo_fast_priv *priv = qdisc_priv(qdisc); + struct sk_buff_head *list = band2list(priv, band); if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { + priv->bitmap |= (1 << band); qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); } @@ -427,14 +447,18 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) { - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); + struct pfifo_fast_priv *priv = qdisc_priv(qdisc); + int band = bitmap2band[priv->bitmap]; - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { - if (!skb_queue_empty(list + prio)) { - qdisc->q.qlen--; - return __qdisc_dequeue_head(qdisc, list + prio); - } + if (likely(band >= 0)) { + struct sk_buff_head *list = band2list(priv, band); + struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); + + qdisc->q.qlen--; + if (skb_queue_empty(list)) + priv->bitmap &= ~(1 << band); + + return skb; } return NULL; @@ -442,12 +466,13 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) { - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); + struct pfifo_fast_priv *priv = qdisc_priv(qdisc); + int band = bitmap2band[priv->bitmap]; + + if (band >= 0) { + struct sk_buff_head *list = band2list(priv, band); - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { - if (!skb_queue_empty(list + prio)) - return skb_peek(list + prio); + return skb_peek(list); } return NULL; @@ -456,11 +481,12 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) static void pfifo_fast_reset(struct Qdisc* qdisc) { int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); + struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(qdisc, list + prio); + __qdisc_reset_queue(qdisc, band2list(priv, prio)); + priv->bitmap = 0; qdisc->qstats.backlog = 0; qdisc->q.qlen = 0; } @@ -480,17 +506,17 @@ nla_put_failure: static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) { int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); + struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - skb_queue_head_init(list + prio); + skb_queue_head_init(band2list(priv, prio)); return 0; } static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .id = "pfifo_fast", - .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), + .priv_size = sizeof(struct pfifo_fast_priv), .enqueue = pfifo_fast_enqueue, .dequeue = pfifo_fast_dequeue, .peek = pfifo_fast_peek, -- cgit v1.2.3 From a453e0689a3ccf85c08cb89753d7685046248c5c Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Sun, 30 Aug 2009 22:20:28 -0700 Subject: pkt_sched: Fix resource limiting in pfifo_fast pfifo_fast_enqueue has this check: if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { which allows each band to enqueue upto tx_queue_len skbs for a total of 3*tx_queue_len skbs. I am not sure if this was the intention of limiting in qdisc. Patch compiled and 32 simultaneous netperf testing ran fine. Also: # tc -s qdisc show dev eth2 qdisc pfifo_fast 0: root bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 Sent 16835026752 bytes 373116 pkt (dropped 0, overlimits 0 requeues 25) rate 0bit 0pps backlog 0b 0p requeues 25 Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6f7aebd1407..6128e6f2458 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -432,11 +432,11 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - int band = prio2band[skb->priority & TC_PRIO_MAX]; - struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); + if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { + int band = prio2band[skb->priority & TC_PRIO_MAX]; + struct pfifo_fast_priv *priv = qdisc_priv(qdisc); + struct sk_buff_head *list = band2list(priv, band); - if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { priv->bitmap |= (1 << band); qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); -- cgit v1.2.3 From 6fef4c0c8eeff7de13007a5f56113475444a253d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 31 Aug 2009 19:50:41 +0000 Subject: netdev: convert pseudo-devices to netdev_tx_t Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_teql.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 12434b6c204..5a002c24723 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -268,7 +268,7 @@ static inline int teql_resolve(struct sk_buff *skb, return __teql_resolve(skb, skb_res, dev); } -static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) { struct teql_master *master = netdev_priv(dev); struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); @@ -307,7 +307,7 @@ restart: if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && - slave_ops->ndo_start_xmit(skb, slave) == 0) { + slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); -- cgit v1.2.3 From c9f1d0389b962521af1e2b699c8ee5e299d77b85 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:13 +0000 Subject: net_sched: fix class grafting errno codes If the parent qdisc doesn't support classes, use EOPNOTSUPP. If the parent class doesn't exist, use ENOENT. Currently EINVAL is returned in both cases. Additionally check whether grafting is supported and remove a now unnecessary graft function from sch_ingress. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++---- net/sched/sch_ingress.c | 7 ------- 2 files changed, 4 insertions(+), 11 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c29..bef2d645a36 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -728,14 +728,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; - err = -EINVAL; - - if (cops) { + err = -EOPNOTSUPP; + if (cops && cops->graft) { unsigned long cl = cops->get(parent, classid); if (cl) { err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); - } + } else + err = -ENOENT; } if (!err) notify_and_destroy(skb, n, classid, old, new); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 4a2b7737435..ace7902b509 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -22,12 +22,6 @@ struct ingress_qdisc_data { /* ------------------------- Class/flow operations ------------------------- */ -static int ingress_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) -{ - return -EOPNOTSUPP; -} - static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; @@ -123,7 +117,6 @@ nla_put_failure: } static const struct Qdisc_class_ops ingress_class_ops = { - .graft = ingress_graft, .leaf = ingress_leaf, .get = ingress_get, .put = ingress_put, -- cgit v1.2.3 From 71ebe5e91947392bc276af713827eab12b6db8e4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:15 +0000 Subject: net_sched: make cls_ops->tcf_chain() optional Some qdiscs don't support attaching filters. Handle this centrally in cls_api and return a proper errno code (EOPNOTSUPP) instead of EINVAL. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/cls_api.c | 5 +++++ net/sched/sch_red.c | 6 ------ net/sched/sch_tbf.c | 6 ------ 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 09cdcdfe7e9..bcfbdb4758c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -181,6 +181,9 @@ replay: if ((cops = q->ops->cl_ops) == NULL) return -EINVAL; + if (cops->tcf_chain == NULL) + return -EOPNOTSUPP; + /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { cl = cops->get(q, parent); @@ -433,6 +436,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) goto out; if ((cops = q->ops->cl_ops) == NULL) goto errout; + if (cops->tcf_chain == NULL) + goto errout; if (TC_H_MIN(tcm->tcm_parent)) { cl = cops->get(q, tcm->tcm_parent); if (cl == 0) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 2bdf241f631..c27b8023f07 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -331,11 +331,6 @@ static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl) -{ - return NULL; -} - static const struct Qdisc_class_ops red_class_ops = { .graft = red_graft, .leaf = red_leaf, @@ -344,7 +339,6 @@ static const struct Qdisc_class_ops red_class_ops = { .change = red_change_class, .delete = red_delete, .walk = red_walk, - .tcf_chain = red_find_tcf, .dump = red_dump_class, }; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index e22dfe85e43..28909699d24 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -433,11 +433,6 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl) -{ - return NULL; -} - static const struct Qdisc_class_ops tbf_class_ops = { .graft = tbf_graft, @@ -447,7 +442,6 @@ static const struct Qdisc_class_ops tbf_class_ops = .change = tbf_change_class, .delete = tbf_delete, .walk = tbf_walk, - .tcf_chain = tbf_find_tcf, .dump = tbf_dump_class, }; -- cgit v1.2.3 From de6d5cdf881353f83006d5f3e28ac4fffd42145e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:16 +0000 Subject: net_sched: make cls_ops->change and cls_ops->delete optional Some schedulers don't support creating, changing or deleting classes. Make the respective callbacks optionally and consistently return -EOPNOTSUPP for unsupported operations, instead of currently either -EOPNOTSUPP, -ENOSYS or no error. In case of sch_prio and sch_multiq, the removed operations additionally checked for an invalid class. This is not necessary since the class argument can only orginate from ->get() or in case of ->change is 0 for creation of new classes, in which case ->change() incorrectly returned -ENOENT. As a side-effect, this patch fixes a possible (root-only) NULL pointer function call in sch_ingress, which didn't implement a so far mandatory ->delete() operation. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++++-- net/sched/sch_ingress.c | 7 ------- net/sched/sch_multiq.c | 22 ---------------------- net/sched/sch_prio.c | 21 --------------------- net/sched/sch_red.c | 13 ------------- net/sched/sch_sfq.c | 7 ------- net/sched/sch_tbf.c | 13 ------------- 7 files changed, 6 insertions(+), 85 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bef2d645a36..166fcca86e7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1417,7 +1417,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) goto out; break; case RTM_DELTCLASS: - err = cops->delete(q, cl); + err = -EOPNOTSUPP; + if (cops->delete) + err = cops->delete(q, cl); if (err == 0) tclass_notify(skb, n, q, cl, RTM_DELTCLASS); goto out; @@ -1431,7 +1433,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } new_cl = cl; - err = cops->change(q, clid, pid, tca, &new_cl); + err = -EOPNOTSUPP; + if (cops->change) + err = cops->change(q, clid, pid, tca, &new_cl); if (err == 0) tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index ace7902b509..a9e646bdb60 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -42,12 +42,6 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl) { } -static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg) -{ - return 0; -} - static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { return; @@ -120,7 +114,6 @@ static const struct Qdisc_class_ops ingress_class_ops = { .leaf = ingress_leaf, .get = ingress_get, .put = ingress_put, - .change = ingress_change, .walk = ingress_walk, .tcf_chain = ingress_find_tcf, .bind_tcf = ingress_bind_filter, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 91273120304..a0ffe7158ff 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -348,26 +348,6 @@ static void multiq_put(struct Qdisc *q, unsigned long cl) return; } -static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent, - struct nlattr **tca, unsigned long *arg) -{ - unsigned long cl = *arg; - struct multiq_sched_data *q = qdisc_priv(sch); - - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - -static int multiq_delete(struct Qdisc *sch, unsigned long cl) -{ - struct multiq_sched_data *q = qdisc_priv(sch); - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - - static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { @@ -430,8 +410,6 @@ static const struct Qdisc_class_ops multiq_class_ops = { .leaf = multiq_leaf, .get = multiq_get, .put = multiq_put, - .change = multiq_change, - .delete = multiq_delete, .walk = multiq_walk, .tcf_chain = multiq_find_tcf, .bind_tcf = multiq_bind, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 94cecef7014..209a4ca4b98 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -311,25 +311,6 @@ static void prio_put(struct Qdisc *q, unsigned long cl) return; } -static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg) -{ - unsigned long cl = *arg; - struct prio_sched_data *q = qdisc_priv(sch); - - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - -static int prio_delete(struct Qdisc *sch, unsigned long cl) -{ - struct prio_sched_data *q = qdisc_priv(sch); - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - - static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { @@ -392,8 +373,6 @@ static const struct Qdisc_class_ops prio_class_ops = { .leaf = prio_leaf, .get = prio_get, .put = prio_put, - .change = prio_change, - .delete = prio_delete, .walk = prio_walk, .tcf_chain = prio_find_tcf, .bind_tcf = prio_bind, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index c27b8023f07..a2c4d1aa3cb 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -308,17 +308,6 @@ static void red_put(struct Qdisc *sch, unsigned long arg) return; } -static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -ENOSYS; -} - -static int red_delete(struct Qdisc *sch, unsigned long cl) -{ - return -ENOSYS; -} - static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -336,8 +325,6 @@ static const struct Qdisc_class_ops red_class_ops = { .leaf = red_leaf, .get = red_get, .put = red_put, - .change = red_change_class, - .delete = red_delete, .walk = red_walk, .dump = red_dump_class, }; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 8706920a6d4..cb21380c060 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -496,12 +496,6 @@ nla_put_failure: return -1; } -static int sfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -EOPNOTSUPP; -} - static unsigned long sfq_get(struct Qdisc *sch, u32 classid) { return 0; @@ -560,7 +554,6 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) static const struct Qdisc_class_ops sfq_class_ops = { .get = sfq_get, - .change = sfq_change_class, .tcf_chain = sfq_find_tcf, .dump = sfq_dump_class, .dump_stats = sfq_dump_class_stats, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 28909699d24..d904167e73b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -410,17 +410,6 @@ static void tbf_put(struct Qdisc *sch, unsigned long arg) { } -static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -ENOSYS; -} - -static int tbf_delete(struct Qdisc *sch, unsigned long arg) -{ - return -ENOSYS; -} - static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -439,8 +428,6 @@ static const struct Qdisc_class_ops tbf_class_ops = .leaf = tbf_leaf, .get = tbf_get, .put = tbf_put, - .change = tbf_change_class, - .delete = tbf_delete, .walk = tbf_walk, .dump = tbf_dump_class, }; -- cgit v1.2.3 From 5b9a9ccfad8553dbf7a9b17ba78bad70215ed0e2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:17 +0000 Subject: net_sched: remove some unnecessary checks in classful schedulers The class argument to the ->graft(), ->leaf(), ->dump(), ->dump_stats() all originate from either ->get() or ->walk() and are always valid. Remove unnecessary checks. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 38 +++++++++++++++++--------------------- net/sched/sch_hfsc.c | 4 +--- net/sched/sch_htb.c | 35 +++++++++++++++++------------------ net/sched/sch_multiq.c | 11 +---------- net/sched/sch_prio.c | 11 +---------- net/sched/sch_red.c | 2 -- net/sched/sch_tbf.c | 3 --- 7 files changed, 37 insertions(+), 67 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d5798e17a83..5b132c47326 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1621,29 +1621,25 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct cbq_class *cl = (struct cbq_class*)arg; - if (cl) { - if (new == NULL) { - new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, - cl->common.classid); - if (new == NULL) - return -ENOBUFS; - } else { + if (new == NULL) { + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); + if (new == NULL) + return -ENOBUFS; + } else { #ifdef CONFIG_NET_CLS_ACT - if (cl->police == TC_POLICE_RECLASSIFY) - new->reshape_fail = cbq_reshape_fail; + if (cl->police == TC_POLICE_RECLASSIFY) + new->reshape_fail = cbq_reshape_fail; #endif - } - sch_tree_lock(sch); - *old = cl->q; - cl->q = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - - return 0; } - return -ENOENT; + sch_tree_lock(sch); + *old = cl->q; + cl->q = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + + return 0; } static struct Qdisc * @@ -1651,7 +1647,7 @@ cbq_leaf(struct Qdisc *sch, unsigned long arg) { struct cbq_class *cl = (struct cbq_class*)arg; - return cl ? cl->q : NULL; + return cl->q; } static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index dad0144423d..375d64cb1a3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1203,8 +1203,6 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct hfsc_class *cl = (struct hfsc_class *)arg; - if (cl == NULL) - return -ENOENT; if (cl->level > 0) return -EINVAL; if (new == NULL) { @@ -1228,7 +1226,7 @@ hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) { struct hfsc_class *cl = (struct hfsc_class *)arg; - if (cl != NULL && cl->level == 0) + if (cl->level == 0) return cl->qdisc; return NULL; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index ec4d46399d5..85acab9dc6f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1117,30 +1117,29 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct htb_class *cl = (struct htb_class *)arg; - if (cl && !cl->level) { - if (new == NULL && - (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, - cl->common.classid)) - == NULL) - return -ENOBUFS; - sch_tree_lock(sch); - *old = cl->un.leaf.q; - cl->un.leaf.q = new; - if (*old != NULL) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - } - sch_tree_unlock(sch); - return 0; + if (cl->level) + return -EINVAL; + if (new == NULL && + (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->common.classid)) == NULL) + return -ENOBUFS; + + sch_tree_lock(sch); + *old = cl->un.leaf.q; + cl->un.leaf.q = new; + if (*old != NULL) { + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); } - return -ENOENT; + sch_tree_unlock(sch); + return 0; } static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - return (cl && !cl->level) ? cl->un.leaf.q : NULL; + return !cl->level ? cl->un.leaf.q : NULL; } static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index a0ffe7158ff..069f81c9727 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -298,9 +298,6 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct multiq_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; - if (band >= q->bands) - return -EINVAL; - if (new == NULL) new = &noop_qdisc; @@ -320,9 +317,6 @@ multiq_leaf(struct Qdisc *sch, unsigned long arg) struct multiq_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; - if (band >= q->bands) - return NULL; - return q->queues[band]; } @@ -353,11 +347,8 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, { struct multiq_sched_data *q = qdisc_priv(sch); - if (cl - 1 > q->bands) - return -ENOENT; tcm->tcm_handle |= TC_H_MIN(cl); - if (q->queues[cl-1]) - tcm->tcm_info = q->queues[cl-1]->handle; + tcm->tcm_info = q->queues[cl-1]->handle; return 0; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 209a4ca4b98..0f73c412d04 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -262,9 +262,6 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; - if (band >= q->bands) - return -EINVAL; - if (new == NULL) new = &noop_qdisc; @@ -284,9 +281,6 @@ prio_leaf(struct Qdisc *sch, unsigned long arg) struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; - if (band >= q->bands) - return NULL; - return q->queues[band]; } @@ -316,11 +310,8 @@ static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff * { struct prio_sched_data *q = qdisc_priv(sch); - if (cl - 1 > q->bands) - return -ENOENT; tcm->tcm_handle |= TC_H_MIN(cl); - if (q->queues[cl-1]) - tcm->tcm_info = q->queues[cl-1]->handle; + tcm->tcm_info = q->queues[cl-1]->handle; return 0; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index a2c4d1aa3cb..072cdf442f8 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -268,8 +268,6 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl, { struct red_sched_data *q = qdisc_priv(sch); - if (cl != 1) - return -ENOENT; tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index d904167e73b..8fb8107ab18 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -368,9 +368,6 @@ static int tbf_dump_class(struct Qdisc *sch, unsigned long cl, { struct tbf_sched_data *q = qdisc_priv(sch); - if (cl != 1) /* only one class */ - return -ENOENT; - tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; -- cgit v1.2.3 From af356afa010f3cd2c8b8fcc3bce90f7a7b7ec02a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:18 +0000 Subject: net_sched: reintroduce dev->qdisc for use by sch_api Currently the multiqueue integration with the qdisc API suffers from a few problems: - with multiple queues, all root qdiscs use the same handle. This means they can't be exposed to userspace in a backwards compatible fashion. - all API operations always refer to queue number 0. Newly created qdiscs are automatically shared between all queues, its not possible to address individual queues or restore multiqueue behaviour once a shared qdisc has been attached. - Dumps only contain the root qdisc of queue 0, in case of non-shared qdiscs this means the statistics are incomplete. This patch reintroduces dev->qdisc, which points to the (single) root qdisc from userspace's point of view. Currently it either points to the first (non-shared) default qdisc, or a qdisc shared between all queues. The following patches will introduce a classful dummy qdisc, which will be used as root qdisc and contain the per-queue qdiscs as children. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/cls_api.c | 7 ++----- net/sched/sch_api.c | 41 ++++++++++++++++------------------------- net/sched/sch_generic.c | 25 +++++++++++-------------- 3 files changed, 29 insertions(+), 44 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bcfbdb4758c..6a536949cdc 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -168,8 +168,7 @@ replay: /* Find qdisc */ if (!parent) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); @@ -408,7 +407,6 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -427,9 +425,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; - dev_queue = netdev_get_tx_queue(dev, 0); if (!tcm->tcm_parent) - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 166fcca86e7..8aa9a0c5a9e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -207,7 +207,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) static void qdisc_list_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list); } void qdisc_list_del(struct Qdisc *q) @@ -219,17 +219,11 @@ EXPORT_SYMBOL(qdisc_list_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - unsigned int i; struct Qdisc *q; - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *txq_root = txq->qdisc_sleeping; - - q = qdisc_match_from_root(txq_root, handle); - if (q) - goto out; - } + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); out: @@ -720,9 +714,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (new && i > 0) atomic_inc(&new->refcnt); - notify_and_destroy(skb, n, classid, old, new); + qdisc_destroy(old); } + notify_and_destroy(skb, n, classid, dev->qdisc, new); + if (new) + atomic_inc(&new->refcnt); + dev->qdisc = new ? : &noop_qdisc; + if (dev->flags & IFF_UP) dev_activate(dev); } else { @@ -974,9 +973,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) q = dev->rx_queue.qdisc_sleeping; } } else { - struct netdev_queue *dev_queue; - dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; } if (!q) return -ENOENT; @@ -1044,9 +1041,7 @@ replay: q = dev->rx_queue.qdisc_sleeping; } } else { - struct netdev_queue *dev_queue; - dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; } /* It may be default qdisc, ignore it */ @@ -1291,8 +1286,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) goto done; dev_queue = &dev->rx_queue; @@ -1323,7 +1317,6 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); - struct netdev_queue *dev_queue; struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -1361,7 +1354,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -1372,7 +1364,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev_queue->qdisc_sleeping->handle; + qid = dev->qdisc->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -1383,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev_queue->qdisc_sleeping->handle; + qid = dev->qdisc->handle; } /* OK. Locate qdisc */ @@ -1588,8 +1580,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) goto done; dev_queue = &dev->rx_queue; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6128e6f2458..a91f079fb47 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -623,19 +623,6 @@ void qdisc_destroy(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_destroy); -static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - if (txq->qdisc_sleeping != &noop_qdisc) - return false; - } - return true; -} - static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) @@ -677,6 +664,7 @@ static void transition_one_qdisc(struct net_device *dev, void dev_activate(struct net_device *dev) { + struct netdev_queue *txq; int need_watchdog; /* No queueing discipline is attached to device; @@ -685,9 +673,14 @@ void dev_activate(struct net_device *dev) virtual interfaces */ - if (dev_all_qdisc_sleeping_noop(dev)) + if (dev->qdisc == &noop_qdisc) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); + txq = netdev_get_tx_queue(dev, 0); + dev->qdisc = txq->qdisc_sleeping; + atomic_inc(&dev->qdisc->refcnt); + } + if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; @@ -777,6 +770,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { + dev->qdisc = &noop_qdisc; netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); @@ -802,5 +796,8 @@ void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); + qdisc_destroy(dev->qdisc); + dev->qdisc = &noop_qdisc; + WARN_ON(timer_pending(&dev->watchdog_timer)); } -- cgit v1.2.3 From 589983cd21f4a2e4ed74a958805a90fa676845c5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:20 +0000 Subject: net_sched: move dev_graft_qdisc() to sch_generic.c It will be used in a following patch by the multiqueue qdisc. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 26 -------------------------- net/sched/sch_generic.c | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 26 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 8aa9a0c5a9e..d71f12be6e2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -610,32 +610,6 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device queue. */ - -static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, - struct Qdisc *qdisc) -{ - struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; - spinlock_t *root_lock; - - root_lock = qdisc_lock(oqdisc); - spin_lock_bh(root_lock); - - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); - - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; - rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); - - spin_unlock_bh(root_lock); - - return oqdisc; -} - void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) { const struct Qdisc_class_ops *cops; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a91f079fb47..e7c47ceb009 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -623,6 +623,31 @@ void qdisc_destroy(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_destroy); +/* Attach toplevel qdisc to device queue. */ +struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc) +{ + struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; + spinlock_t *root_lock; + + root_lock = qdisc_lock(oqdisc); + spin_lock_bh(root_lock); + + /* Prune old scheduler */ + if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + qdisc_reset(oqdisc); + + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); + + spin_unlock_bh(root_lock); + + return oqdisc; +} + static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) -- cgit v1.2.3 From 6ec1c69a8f6492fd25722f4762721921da074c12 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 6 Sep 2009 01:58:51 -0700 Subject: net_sched: add classful multiqueue dummy scheduler This patch adds a classful dummy scheduler which can be used as root qdisc for multiqueue devices and exposes each device queue as a child class. This allows to address queues individually and graft them similar to regular classes. Additionally it presents an accumulated view of the statistics of all real root qdiscs in the dummy root. Two new callbacks are added to the qdisc_ops and qdisc_class_ops: - cl_ops->select_queue selects the tx queue number for new child classes. - qdisc_ops->attach() overrides root qdisc device grafting to attach non-shared qdiscs to the queues. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/Makefile | 2 +- net/sched/sch_api.c | 18 +++- net/sched/sch_generic.c | 32 +++++-- net/sched/sch_mq.c | 234 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 273 insertions(+), 13 deletions(-) create mode 100644 net/sched/sch_mq.c (limited to 'net/sched') diff --git a/net/sched/Makefile b/net/sched/Makefile index 54d950cd4b8..f14e71bfa58 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -2,7 +2,7 @@ # Makefile for the Linux Traffic Control Unit. # -obj-y := sch_generic.o +obj-y := sch_generic.o sch_mq.o obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d71f12be6e2..2a78d541015 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -678,6 +678,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (dev->flags & IFF_UP) dev_deactivate(dev); + if (new && new->ops->attach) { + new->ops->attach(new); + num_q = 0; + } + for (i = 0; i < num_q; i++) { struct netdev_queue *dev_queue = &dev->rx_queue; @@ -692,7 +697,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } notify_and_destroy(skb, n, classid, dev->qdisc, new); - if (new) + if (new && !new->ops->attach) atomic_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; @@ -1095,10 +1100,16 @@ create_n_graft: q = qdisc_create(dev, &dev->rx_queue, tcm->tcm_parent, tcm->tcm_parent, tca, &err); - else - q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), + else { + unsigned int ntx = 0; + + if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) + ntx = p->ops->cl_ops->select_queue(p, tcm); + + q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), tcm->tcm_parent, tcm->tcm_handle, tca, &err); + } if (q == NULL) { if (err == -EAGAIN) goto replay; @@ -1674,6 +1685,7 @@ static int __init pktsched_init(void) { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); + register_qdisc(&mq_qdisc_ops); proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e7c47ceb009..4ae6aa562f2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -514,7 +514,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) return 0; } -static struct Qdisc_ops pfifo_fast_ops __read_mostly = { +struct Qdisc_ops pfifo_fast_ops __read_mostly = { .id = "pfifo_fast", .priv_size = sizeof(struct pfifo_fast_priv), .enqueue = pfifo_fast_enqueue, @@ -670,6 +670,26 @@ static void attach_one_default_qdisc(struct net_device *dev, dev_queue->qdisc_sleeping = qdisc; } +static void attach_default_qdiscs(struct net_device *dev) +{ + struct netdev_queue *txq; + struct Qdisc *qdisc; + + txq = netdev_get_tx_queue(dev, 0); + + if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) { + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); + dev->qdisc = txq->qdisc_sleeping; + atomic_inc(&dev->qdisc->refcnt); + } else { + qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); + if (qdisc) { + qdisc->ops->attach(qdisc); + dev->qdisc = qdisc; + } + } +} + static void transition_one_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_need_watchdog) @@ -689,7 +709,6 @@ static void transition_one_qdisc(struct net_device *dev, void dev_activate(struct net_device *dev) { - struct netdev_queue *txq; int need_watchdog; /* No queueing discipline is attached to device; @@ -698,13 +717,8 @@ void dev_activate(struct net_device *dev) virtual interfaces */ - if (dev->qdisc == &noop_qdisc) { - netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - - txq = netdev_get_tx_queue(dev, 0); - dev->qdisc = txq->qdisc_sleeping; - atomic_inc(&dev->qdisc->refcnt); - } + if (dev->qdisc == &noop_qdisc) + attach_default_qdiscs(dev); if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c new file mode 100644 index 00000000000..c84dec9c8c7 --- /dev/null +++ b/net/sched/sch_mq.c @@ -0,0 +1,234 @@ +/* + * net/sched/sch_mq.c Classful multiqueue dummy scheduler + * + * Copyright (c) 2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct mq_sched { + struct Qdisc **qdiscs; +}; + +static void mq_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct mq_sched *priv = qdisc_priv(sch); + unsigned int ntx; + + if (!priv->qdiscs) + return; + for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) + qdisc_destroy(priv->qdiscs[ntx]); + kfree(priv->qdiscs); +} + +static int mq_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct net_device *dev = qdisc_dev(sch); + struct mq_sched *priv = qdisc_priv(sch); + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; + unsigned int ntx; + + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!netif_is_multiqueue(dev)) + return -EOPNOTSUPP; + + /* pre-allocate qdiscs, attachment can't fail */ + priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), + GFP_KERNEL); + if (priv->qdiscs == NULL) + return -ENOMEM; + + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + dev_queue = netdev_get_tx_queue(dev, ntx); + qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(ntx + 1))); + if (qdisc == NULL) + goto err; + qdisc->flags |= TCQ_F_CAN_BYPASS; + priv->qdiscs[ntx] = qdisc; + } + + return 0; + +err: + mq_destroy(sch); + return -ENOMEM; +} + +static void mq_attach(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct mq_sched *priv = qdisc_priv(sch); + struct Qdisc *qdisc; + unsigned int ntx; + + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + qdisc = priv->qdiscs[ntx]; + qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (qdisc) + qdisc_destroy(qdisc); + } + kfree(priv->qdiscs); + priv->qdiscs = NULL; +} + +static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int ntx; + + sch->q.qlen = 0; + memset(&sch->bstats, 0, sizeof(sch->bstats)); + memset(&sch->qstats, 0, sizeof(sch->qstats)); + + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + spin_lock_bh(qdisc_lock(qdisc)); + sch->q.qlen += qdisc->q.qlen; + sch->bstats.bytes += qdisc->bstats.bytes; + sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.qlen += qdisc->qstats.qlen; + sch->qstats.backlog += qdisc->qstats.backlog; + sch->qstats.drops += qdisc->qstats.drops; + sch->qstats.requeues += qdisc->qstats.requeues; + sch->qstats.overlimits += qdisc->qstats.overlimits; + spin_unlock_bh(qdisc_lock(qdisc)); + } + return 0; +} + +static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned long ntx = cl - 1; + + if (ntx >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, ntx); +} + +static unsigned int mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm) +{ + unsigned int ntx = TC_H_MIN(tcm->tcm_parent); + + if (!mq_queue_get(sch, ntx)) + return 0; + return ntx - 1; +} + +static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, + struct Qdisc **old) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + struct net_device *dev = qdisc_dev(sch); + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + *old = dev_graft_qdisc(dev_queue, new); + + if (dev->flags & IFF_UP) + dev_activate(dev); + return 0; +} + +static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + + return dev_queue->qdisc_sleeping; +} + +static unsigned long mq_get(struct Qdisc *sch, u32 classid) +{ + unsigned int ntx = TC_H_MIN(classid); + + if (!mq_queue_get(sch, ntx)) + return 0; + return ntx; +} + +static void mq_put(struct Qdisc *sch, unsigned long cl) +{ + return; +} + +static int mq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle |= TC_H_MIN(cl); + tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + return 0; +} + +static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + + sch = dev_queue->qdisc_sleeping; + if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || + gnet_stats_copy_queue(d, &sch->qstats) < 0) + return -1; + return 0; +} + +static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx; + + if (arg->stop) + return; + + arg->count = arg->skip; + for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { + if (arg->fn(sch, ntx + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops mq_class_ops = { + .select_queue = mq_select_queue, + .graft = mq_graft, + .leaf = mq_leaf, + .get = mq_get, + .put = mq_put, + .walk = mq_walk, + .dump = mq_dump_class, + .dump_stats = mq_dump_class_stats, +}; + +struct Qdisc_ops mq_qdisc_ops __read_mostly = { + .cl_ops = &mq_class_ops, + .id = "mq", + .priv_size = sizeof(struct mq_sched), + .init = mq_init, + .destroy = mq_destroy, + .attach = mq_attach, + .dump = mq_dump, + .owner = THIS_MODULE, +}; -- cgit v1.2.3 From 23bcf634c8bc0d84607a5b863333191d58baee4c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 9 Sep 2009 18:11:23 -0700 Subject: net_sched: fix estimator lock selection for mq child qdiscs When new child qdiscs are attached to the mq qdisc, they are actually attached as root qdiscs to the device queues. The lock selection for new estimators incorrectly picks the root lock of the existing and to be replaced qdisc, which results in a use-after-free once the old qdisc has been destroyed. Mark mq qdisc instances with a new flag and treat qdiscs attached to mq as children similar to regular root qdiscs. Additionally prevent estimators from being attached to the mq qdisc itself since it only updates its byte and packet counters during dumps. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 42 ++++++++++++++++++++++++++---------------- net/sched/sch_mq.c | 1 + 2 files changed, 27 insertions(+), 16 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2a78d541015..3af106140f3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -733,7 +733,8 @@ static struct lock_class_key qdisc_rx_lock; static struct Qdisc * qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - u32 parent, u32 handle, struct nlattr **tca, int *errp) + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -810,24 +811,21 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (tca[TCA_RATE]) { spinlock_t *root_lock; + err = -EOPNOTSUPP; + if (sch->flags & TCQ_F_MQROOT) + goto err_out4; + if ((sch->parent != TC_H_ROOT) && - !(sch->flags & TCQ_F_INGRESS)) + !(sch->flags & TCQ_F_INGRESS) && + (!p || !(p->flags & TCQ_F_MQROOT))) root_lock = qdisc_root_sleeping_lock(sch); else root_lock = qdisc_lock(sch); err = gen_new_estimator(&sch->bstats, &sch->rate_est, root_lock, tca[TCA_RATE]); - if (err) { - /* - * Any broken qdiscs that would require - * a ops->reset() here? The qdisc was never - * in action so it shouldn't be necessary. - */ - if (ops->destroy) - ops->destroy(sch); - goto err_out3; - } + if (err) + goto err_out4; } qdisc_list_add(sch); @@ -843,6 +841,15 @@ err_out2: err_out: *errp = err; return NULL; + +err_out4: + /* + * Any broken qdiscs that would require a ops->reset() here? + * The qdisc was never in action so it shouldn't be necessary. + */ + if (ops->destroy) + ops->destroy(sch); + goto err_out3; } static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) @@ -867,13 +874,16 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) qdisc_put_stab(sch->stab); sch->stab = stab; - if (tca[TCA_RATE]) + if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator because change can't be undone. */ + if (sch->flags & TCQ_F_MQROOT) + goto out; gen_replace_estimator(&sch->bstats, &sch->rate_est, qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); - + } +out: return 0; } @@ -1097,7 +1107,7 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, &dev->rx_queue, + q = qdisc_create(dev, &dev->rx_queue, p, tcm->tcm_parent, tcm->tcm_parent, tca, &err); else { @@ -1106,7 +1116,7 @@ create_n_graft: if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) ntx = p->ops->cl_ops->select_queue(p, tcm); - q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), + q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p, tcm->tcm_parent, tcm->tcm_handle, tca, &err); } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index c84dec9c8c7..dd5ee022f1f 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -64,6 +64,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) priv->qdiscs[ntx] = qdisc; } + sch->flags |= TCQ_F_MQROOT; return 0; err: -- cgit v1.2.3