diff options
Diffstat (limited to 'net/sched')
37 files changed, 1164 insertions, 629 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 6767e54155d..4f7ef0db302 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -194,6 +194,17 @@ config NET_SCH_NETEM If unsure, say N. +config NET_SCH_DRR + tristate "Deficit Round Robin scheduler (DRR)" + help + Say Y here if you want to use the Deficit Round Robin (DRR) packet + scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_drr. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT @@ -316,6 +327,17 @@ config NET_CLS_FLOW To compile this code as a module, choose M here: the module will be called cls_flow. +config NET_CLS_CGROUP + bool "Control Group Classifier" + select NET_CLS + depends on CGROUPS + ---help--- + Say Y here if you want to classify packets based on the control + cgroup of their process. + + To compile this code as a module, choose M here: the + module will be called cls_cgroup. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index e60c9925b26..54d950cd4b8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o +obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o @@ -38,6 +39,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o +obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8f457f1e0ac..9d03cc33b6c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, } EXPORT_SYMBOL(tcf_hash_check); -struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) +struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, + struct tc_action *a, int size, int bind, + u32 *idx_gen, struct tcf_hashinfo *hinfo) { struct tcf_common *p = kzalloc(size, GFP_KERNEL); if (unlikely(!p)) - return p; + return ERR_PTR(-ENOMEM); p->tcfc_refcnt = 1; if (bind) p->tcfc_bindcnt = 1; @@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; - if (est) - gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - &p->tcfc_lock, est); + if (est) { + int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, + &p->tcfc_lock, est); + if (err) { + kfree(p); + return ERR_PTR(err); + } + } + a->priv = (void *) p; return p; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ac04289da5d..e7f796aec65 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind, &gact_idx_gen, &gact_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0453d79ebf5..082c520b0de 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, &ipt_idx_gen, &ipt_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 70341c020b6..b9aaab4e035 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, &mirred_idx_gen, &mirred_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7b39ed485bc..d885ba31156 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &nat_idx_gen, &nat_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index d5f4e340486..96c0ed115e2 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &pedit_idx_gen, &pedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 38015b49394..5c72a116b1a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -182,17 +182,32 @@ override: R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); if (R_tab == NULL) goto failure; + + if (!est && (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { + err = -EINVAL; + goto failure; + } + if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); - if (P_tab == NULL) { - qdisc_put_rtab(R_tab); + if (P_tab == NULL) goto failure; - } } } - /* No failure allowed after this point */ + spin_lock_bh(&police->tcf_lock); + if (est) { + err = gen_replace_estimator(&police->tcf_bstats, + &police->tcf_rate_est, + &police->tcf_lock, est); + if (err) + goto failure_unlock; + } + + /* No failure allowed after this point */ if (R_tab != NULL) { qdisc_put_rtab(police->tcfp_R_tab); police->tcfp_R_tab = R_tab; @@ -217,10 +232,6 @@ override: if (tb[TCA_POLICE_AVRATE]) police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); - if (est) - gen_replace_estimator(&police->tcf_bstats, - &police->tcf_rate_est, - &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -238,7 +249,13 @@ override: a->priv = police; return ret; +failure_unlock: + spin_unlock_bh(&police->tcf_lock); failure: + if (P_tab) + qdisc_put_rtab(P_tab); + if (R_tab) + qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) kfree(police); return err; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e7851ce92cf..8daa1ebc741 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &simp_idx_gen, &simp_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_defact(pc); ret = alloc_defdata(d, defdata); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fe9777e77f3..4ab916b8074 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, if (!pc) { pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &skbedit_idx_gen, &skbedit_hash_info); - if (unlikely(!pc)) - return -ENOMEM; + if (IS_ERR(pc)) + return PTR_ERR(pc); d = to_skbedit(pc); ret = ACT_P_CREATED; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 16e7ac9774e..173fcc4b050 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, if (src->action) { struct tc_action *act; tcf_tree_lock(tp); - act = xchg(&dst->action, src->action); + act = dst->action; + dst->action = src->action; tcf_tree_unlock(tp); if (act) tcf_action_destroy(act, TCA_ACT_UNBIND); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 956915c217d..4e2bda85411 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp, static void basic_destroy(struct tcf_proto *tp) { - struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL); + struct basic_head *head = tp->root; struct basic_filter *f, *n; list_for_each_entry_safe(f, n, &head->flist, link) { diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c new file mode 100644 index 00000000000..0d68b197598 --- /dev/null +++ b/net/sched/cls_cgroup.c @@ -0,0 +1,288 @@ +/* + * net/sched/cls_cgroup.c Control Group Classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/cgroup.h> +#include <net/rtnetlink.h> +#include <net/pkt_cls.h> + +struct cgroup_cls_state +{ + struct cgroup_subsys_state css; + u32 classid; +}; + +static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp) +{ + return (struct cgroup_cls_state *) + cgroup_subsys_state(cgrp, net_cls_subsys_id); +} + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct cgroup_cls_state *cs; + + if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + + if (cgrp->parent) + cs->classid = net_cls_state(cgrp->parent)->classid; + + return &cs->css; +} + +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + kfree(ss); +} + +static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) +{ + return net_cls_state(cgrp)->classid; +} + +static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) +{ + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + + net_cls_state(cgrp)->classid = (u32) value; + + cgroup_unlock(); + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, +}; + +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); +} + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, + .subsys_id = net_cls_subsys_id, +}; + +struct cls_cgroup_head +{ + u32 handle; + struct tcf_exts exts; + struct tcf_ematch_tree ematches; +}; + +static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_cgroup_head *head = tp->root; + struct cgroup_cls_state *cs; + int ret = 0; + + /* + * Due to the nature of the classifier it is required to ignore all + * packets originating from softirq context as accessing `current' + * would lead to false results. + * + * This test assumes that all callers of dev_queue_xmit() explicitely + * disable bh. Knowing this, it is possible to detect softirq based + * calls by looking at the number of nested bh disable calls because + * softirqs always disables bh. + */ + if (softirq_count() != SOFTIRQ_OFFSET) + return -1; + + rcu_read_lock(); + cs = (struct cgroup_cls_state *) task_subsys_state(current, + net_cls_subsys_id); + if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { + res->classid = cs->classid; + res->class = 0; + ret = tcf_exts_exec(skb, &head->exts, res); + } else + ret = -1; + + rcu_read_unlock(); + + return ret; +} + +static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +{ + return 0UL; +} + +static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int cls_cgroup_init(struct tcf_proto *tp) +{ + return 0; +} + +static const struct tcf_ext_map cgroup_ext_map = { + .action = TCA_CGROUP_ACT, + .police = TCA_CGROUP_POLICE, +}; + +static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { + [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, +}; + +static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg) +{ + struct nlattr *tb[TCA_CGROUP_MAX+1]; + struct cls_cgroup_head *head = tp->root; + struct tcf_ematch_tree t; + struct tcf_exts e; + int err; + + if (head == NULL) { + if (!handle) + return -EINVAL; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + head->handle = handle; + + tcf_tree_lock(tp); + tp->root = head; + tcf_tree_unlock(tp); + } + + if (handle != head->handle) + return -ENOENT; + + err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], + cgroup_policy); + if (err < 0) + return err; + + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map); + if (err < 0) + return err; + + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); + if (err < 0) + return err; + + tcf_exts_change(tp, &head->exts, &e); + tcf_em_tree_change(tp, &head->ematches, &t); + + return 0; +} + +static void cls_cgroup_destroy(struct tcf_proto *tp) +{ + struct cls_cgroup_head *head = tp->root; + + if (head) { + tcf_exts_destroy(tp, &head->exts); + tcf_em_tree_destroy(tp, &head->ematches); + kfree(head); + } +} + +static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) +{ + return -EOPNOTSUPP; +} + +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_cgroup_head *head = tp->root; + + if (arg->count < arg->skip) + goto skip; + + if (arg->fn(tp, (unsigned long) head, arg) < 0) { + arg->stop = 1; + return; + } +skip: + arg->count++; +} + +static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_cgroup_head *head = tp->root; + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + t->tcm_handle = head->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 || + tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { + .kind = "cgroup", + .init = cls_cgroup_init, + .change = cls_cgroup_change, + .classify = cls_cgroup_classify, + .destroy = cls_cgroup_destroy, + .get = cls_cgroup_get, + .put = cls_cgroup_put, + .delete = cls_cgroup_delete, + .walk = cls_cgroup_walk, + .dump = cls_cgroup_dump, + .owner = THIS_MODULE, +}; + +static int __init init_cgroup_cls(void) +{ + return register_tcf_proto_ops(&cls_cgroup_ops); +} + +static void __exit exit_cgroup_cls(void) +{ + unregister_tcf_proto_ops(&cls_cgroup_ops); +} + +module_init(init_cgroup_cls); +module_exit(exit_cgroup_cls); +MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index b0f90e593af..6d6e87585fb 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) static void fw_destroy(struct tcf_proto *tp) { - struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL); + struct fw_head *head = tp->root; struct fw_filter *f; int h; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index e3d8455eebc..bdf1f4172ee 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) static void route4_destroy(struct tcf_proto *tp) { - struct route4_head *head = xchg(&tp->root, NULL); + struct route4_head *head = tp->root; int h1, h2; if (head == NULL) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 7a7bff5ded2..e806f2314b5 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -13,12 +13,6 @@ #include <net/netlink.h> #include <net/pkt_cls.h> - -/* - * Not quite sure if we need all the xchgs Alexey uses when accessing things. - * Can always add them later ... :) - */ - /* * Passing parameters to the root seems to be done more awkwardly than really * necessary. At least, u32 doesn't seem to use such dirty hacks. To be diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 246f9065ce3..05d178008cb 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) static void u32_destroy(struct tcf_proto *tp) { struct tc_u_common *tp_c = tp->data; - struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); + struct tc_u_hnode *root_ht = tp->root; WARN_ON(root_ht == NULL); @@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); - struct tc_u_hnode *ht_down = NULL; + struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) goto errout; @@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base, } tcf_tree_lock(tp); - ht_down = xchg(&n->ht_down, ht_down); + ht_old = n->ht_down; + n->ht_down = ht_down; tcf_tree_unlock(tp); - if (ht_down) - ht_down->refcnt--; + if (ht_old) + ht_old->refcnt--; } if (tb[TCA_U32_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index e82519e548d..aab59409728 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -71,7 +71,7 @@ * * static void __exit exit_my_ematch(void) * { - * return tcf_em_unregister(&my_ops); + * tcf_em_unregister(&my_ops); * } * * module_init(init_my_ematch); @@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register); * * Returns -ENOENT if no matching ematch was found. */ -int tcf_em_unregister(struct tcf_ematch_ops *ops) +void tcf_em_unregister(struct tcf_ematch_ops *ops) { - int err = 0; - struct tcf_ematch_ops *e; - write_lock(&ematch_mod_lock); - list_for_each_entry(e, &ematch_ops, link) { - if (e == ops) { - list_del(&e->link); - goto out; - } - } - - err = -ENOENT; -out: + list_del(&ops->link); write_unlock(&ematch_mod_lock); - return err; } EXPORT_SYMBOL(tcf_em_unregister); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6ab4a2f92ca..f859dd5fabf 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -97,10 +97,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, Auxiliary routines: - ---requeue + ---peek - requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if netif_queue_stopped()=0. + like dequeue but without removing a packet from the queue ---reset @@ -147,8 +146,14 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; - if (qops->requeue == NULL) - qops->requeue = noop_qdisc_ops.requeue; + if (qops->peek == NULL) { + if (qops->dequeue == NULL) { + qops->peek = noop_qdisc_ops.peek; + } else { + rc = -EINVAL; + goto out; + } + } if (qops->dequeue == NULL) qops->dequeue = noop_qdisc_ops.dequeue; @@ -199,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -/* - * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen() - * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue() - */ -static DEFINE_SPINLOCK(qdisc_list_lock); - static void qdisc_list_add(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); - spin_unlock_bh(&qdisc_list_lock); - } } void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - spin_lock_bh(&qdisc_list_lock); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) list_del(&q->list); - spin_unlock_bh(&qdisc_list_lock); - } } EXPORT_SYMBOL(qdisc_list_del); @@ -229,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) unsigned int i; struct Qdisc *q; - spin_lock_bh(&qdisc_list_lock); - for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct Qdisc *txq_root = txq->qdisc_sleeping; q = qdisc_match_from_root(txq_root, handle); if (q) - goto unlock; + goto out; } q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); - -unlock: - spin_unlock_bh(&qdisc_list_lock); - +out: return q; } @@ -892,9 +880,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) sch->stab = stab; if (tca[TCA_RATE]) + /* NB: ignores errors from replace_estimator + because change can't be undone. */ gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 43d37256c15..2a8b83af7c4 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -62,7 +62,7 @@ struct atm_qdisc_data { struct atm_flow_data link; /* unclassified skbs go here */ struct atm_flow_data *flows; /* NB: "link" is also on this list */ - struct tasklet_struct task; /* requeue tasklet */ + struct tasklet_struct task; /* dequeue tasklet */ }; /* ------------------------- Class/flow operations ------------------------- */ @@ -102,7 +102,8 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, return -EINVAL; if (!new) new = &noop_qdisc; - *old = xchg(&flow->q, new); + *old = flow->q; + flow->q = new; if (*old) qdisc_reset(*old); return 0; @@ -480,11 +481,14 @@ static void sch_atm_dequeue(unsigned long data) * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) */ - while ((skb = flow->q->dequeue(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) { - (void)flow->q->ops->requeue(skb, flow->q); + while ((skb = flow->q->ops->peek(flow->q))) { + if (!atm_may_send(flow->vcc, skb->truesize)) break; - } + + skb = qdisc_dequeue_peeked(flow->q); + if (unlikely(!skb)) + break; + pr_debug("atm_tc_dequeue: sending on class %p\n", flow); /* remove any LL header somebody else has attached */ skb_pull(skb, skb_network_offset(skb)); @@ -516,27 +520,19 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); tasklet_schedule(&p->task); - skb = p->link.q->dequeue(p->link.q); + skb = qdisc_dequeue_peeked(p->link.q); if (skb) sch->q.qlen--; return skb; } -static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff *atm_tc_peek(struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); - int ret; - pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - ret = p->link.q->ops->requeue(skb, p->link.q); - if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - p->link.qstats.drops++; - } - return ret; + pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); + + return p->link.q->ops->peek(p->link.q); } static unsigned int atm_tc_drop(struct Qdisc *sch) @@ -694,7 +690,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = { .priv_size = sizeof(struct atm_qdisc_data), .enqueue = atm_tc_enqueue, .dequeue = atm_tc_dequeue, - .requeue = atm_tc_requeue, + .peek = atm_tc_peek, .drop = atm_tc_drop, .init = atm_tc_init, .reset = atm_tc_reset, diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 507fb488bc9..094a874b48b 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = { .priv_size = 0, .enqueue = blackhole_enqueue, .dequeue = blackhole_dequeue, + .peek = blackhole_dequeue, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 03e389e8d94..9e43ed94916 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -static int -cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int ret; - - if ((cl = q->tx_class) == NULL) { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - q->tx_class = NULL; - - cbq_mark_toplevel(q, cl); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; - cl->q->__parent = sch; -#endif - if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - if (!cl->next_alive) - cbq_activate_class(cl); - return 0; - } - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; -} - /* Overlimit actions */ /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ @@ -1669,7 +1635,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, #endif } sch_tree_lock(sch); - *old = xchg(&cl->q, new); + *old = cl->q; + cl->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1798,11 +1765,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t } if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]); + rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), + tb[TCA_CBQ_RTAB]); if (rtab == NULL) return -EINVAL; } + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + if (rtab) + qdisc_put_rtab(rtab); + return err; + } + } + /* Change class parameters */ sch_tree_lock(sch); @@ -1810,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_deactivate_class(cl); if (rtab) { - rtab = xchg(&cl->R_tab, rtab); - qdisc_put_rtab(rtab); + qdisc_put_rtab(cl->R_tab); + cl->R_tab = rtab; } if (tb[TCA_CBQ_LSSOPT]) @@ -1838,10 +1817,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1888,6 +1863,17 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl = kzalloc(sizeof(*cl), GFP_KERNEL); if (cl == NULL) goto failure; + + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + goto failure; + } + } + cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; @@ -1929,10 +1915,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); - *arg = (unsigned long)cl; return 0; @@ -2066,7 +2048,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct cbq_sched_data), .enqueue = cbq_enqueue, .dequeue = cbq_dequeue, - .requeue = cbq_requeue, + .peek = qdisc_peek_dequeued, .drop = cbq_drop, .init = cbq_init, .reset = cbq_reset, diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c new file mode 100644 index 00000000000..f6b4fa97df7 --- /dev/null +++ b/net/sched/sch_drr.c @@ -0,0 +1,519 @@ +/* + * net/sched/sch_drr.c Deficit Round Robin scheduler + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/pkt_sched.h> +#include <net/sch_generic.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> + +struct drr_class { + struct Qdisc_class_common common; + unsigned int refcnt; + unsigned int filter_cnt; + + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct list_head alist; + struct Qdisc *qdisc; + + u32 quantum; + u32 deficit; +}; + +struct drr_sched { + struct list_head active; + struct tcf_proto *filter_list; + struct Qdisc_class_hash clhash; +}; + +static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) +{ + struct drr_sched *q = qdisc_priv(sch); + struct Qdisc_class_common *clc; + + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct drr_class, common); +} + +static void drr_purge_queue(struct drr_class *cl) +{ + unsigned int len = cl->qdisc->q.qlen; + + qdisc_reset(cl->qdisc); + qdisc_tree_decrease_qlen(cl->qdisc, len); +} + +static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { + [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, +}; + +static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)*arg; + struct nlattr *tb[TCA_DRR_MAX + 1]; + u32 quantum; + int err; + + err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy); + if (err < 0) + return err; + + if (tb[TCA_DRR_QUANTUM]) { + quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]); + if (quantum == 0) + return -EINVAL; + } else + quantum = psched_mtu(qdisc_dev(sch)); + + if (cl != NULL) { + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + + sch_tree_lock(sch); + if (tb[TCA_DRR_QUANTUM]) + cl->quantum = quantum; + sch_tree_unlock(sch); + + return 0; + } + + cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL); + if (cl == NULL) + return -ENOBUFS; + + cl->refcnt = 1; + cl->common.classid = classid; + cl->quantum = quantum; + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); + if (cl->qdisc == NULL) + cl->qdisc = &noop_qdisc; + + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + qdisc_destroy(cl->qdisc); + kfree(cl); + return err; + } + } + + sch_tree_lock(sch); + qdisc_class_hash_insert(&q->clhash, &cl->common); + sch_tree_unlock(sch); + + qdisc_class_hash_grow(sch, &q->clhash); + + *arg = (unsigned long)cl; + return 0; +} + +static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) +{ + gen_kill_estimator(&cl->bstats, &cl->rate_est); + qdisc_destroy(cl->qdisc); + kfree(cl); +} + +static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->filter_cnt > 0) + return -EBUSY; + + sch_tree_lock(sch); + + drr_purge_queue(cl); + qdisc_class_hash_remove(&q->clhash, &cl->common); + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); + + sch_tree_unlock(sch); + return 0; +} + +static unsigned long drr_get_class(struct Qdisc *sch, u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->refcnt++; + + return (unsigned long)cl; +} + +static void drr_put_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); +} + +static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl) +{ + struct drr_sched *q = qdisc_priv(sch); + + if (cl) + return NULL; + + return &q->filter_list; +} + +static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->filter_cnt++; + + return (unsigned long)cl; +} + +static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + cl->filter_cnt--; +} + +static int drr_graft_class(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (new == NULL) { + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); + if (new == NULL) + new = &noop_qdisc; + } + + sch_tree_lock(sch); + drr_purge_queue(cl); + *old = cl->qdisc; + cl->qdisc = new; + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + return cl->qdisc; +} + +static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); +} + +static int drr_dump_class(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct nlattr *nest; + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; + tcm->tcm_info = cl->qdisc->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum); + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct tc_drr_stats xstats; + + memset(&xstats, 0, sizeof(xstats)); + if (cl->qdisc->q.qlen) + xstats.deficit = cl->deficit; + + if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) + return -1; + + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, (unsigned long)cl, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + } +} + +static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { + cl = drr_find_class(sch, skb->priority); + if (cl != NULL) + return cl; + } + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + cl = (struct drr_class *)res.class; + if (cl == NULL) + cl = drr_find_class(sch, res.classid); + return cl; + } + return NULL; +} + +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + int err; + + cl = drr_classify(skb, sch, &err); + if (cl == NULL) { + if (err & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return err; + } + + len = qdisc_pkt_len(skb); + err = qdisc_enqueue(skb, cl->qdisc); + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } + return err; + } + + if (cl->qdisc->q.qlen == 1) { + list_add_tail(&cl->alist, &q->active); + cl->deficit = cl->quantum; + } + + cl->bstats.packets++; + cl->bstats.bytes += len; + sch->bstats.packets++; + sch->bstats.bytes += len; + + sch->q.qlen++; + return err; +} + +static struct sk_buff *drr_dequeue(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct sk_buff *skb; + unsigned int len; + + if (list_empty(&q->active)) + goto out; + while (1) { + cl = list_first_entry(&q->active, struct drr_class, alist); + skb = cl->qdisc->ops->peek(cl->qdisc); + if (skb == NULL) + goto out; + + len = qdisc_pkt_len(skb); + if (len <= cl->deficit) { + cl->deficit -= len; + skb = qdisc_dequeue_peeked(cl->qdisc); + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + sch->q.qlen--; + return skb; + } + + cl->deficit += cl->quantum; + list_move_tail(&cl->alist, &q->active); + } +out: + return NULL; +} + +static unsigned int drr_drop(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + + list_for_each_entry(cl, &q->active, alist) { + if (cl->qdisc->ops->drop) { + len = cl->qdisc->ops->drop(cl->qdisc); + if (len > 0) { + sch->q.qlen--; + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + return len; + } + } + } + return 0; +} + +static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +{ + struct drr_sched *q = qdisc_priv(sch); + int err; + + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; + INIT_LIST_HEAD(&q->active); + return 0; +} + +static void drr_reset_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (cl->qdisc->q.qlen) + list_del(&cl->alist); + qdisc_reset(cl->qdisc); + } + } + sch->q.qlen = 0; +} + +static void drr_destroy_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n, *next; + unsigned int i; + + tcf_destroy_chain(&q->filter_list); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + drr_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); +} + +static const struct Qdisc_class_ops drr_class_ops = { + .change = drr_change_class, + .delete = drr_delete_class, + .get = drr_get_class, + .put = drr_put_class, + .tcf_chain = drr_tcf_chain, + .bind_tcf = drr_bind_tcf, + .unbind_tcf = drr_unbind_tcf, + .graft = drr_graft_class, + .leaf = drr_class_leaf, + .qlen_notify = drr_qlen_notify, + .dump = drr_dump_class, + .dump_stats = drr_dump_class_stats, + .walk = drr_walk, +}; + +static struct Qdisc_ops drr_qdisc_ops __read_mostly = { + .cl_ops = &drr_class_ops, + .id = "drr", + .priv_size = sizeof(struct drr_sched), + .enqueue = drr_enqueue, + .dequeue = drr_dequeue, + .peek = qdisc_peek_dequeued, + .drop = drr_drop, + .init = drr_init_qdisc, + .reset = drr_reset_qdisc, + .destroy = drr_destroy_qdisc, + .owner = THIS_MODULE, +}; + +static int __init drr_init(void) +{ + return register_qdisc(&drr_qdisc_ops); +} + +static void __exit drr_exit(void) +{ + unregister_qdisc(&drr_qdisc_ops); +} + +module_init(drr_init); +module_exit(drr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index ba43aab3a85..d303daa45d4 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -68,7 +68,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, } sch_tree_lock(sch); - *old = xchg(&p->q, new); + *old = p->q; + p->q = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -313,24 +314,13 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) return skb; } -static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff *dsmark_peek(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - - err = p->q->ops->requeue(skb, p->q); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - sch->qstats.drops++; - return err; - } - sch->q.qlen++; - sch->qstats.requeues++; + pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p); - return NET_XMIT_SUCCESS; + return p->q->ops->peek(p->q); } static unsigned int dsmark_drop(struct Qdisc *sch) @@ -496,7 +486,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { .priv_size = sizeof(struct dsmark_qdisc_data), .enqueue = dsmark_enqueue, .dequeue = dsmark_dequeue, - .requeue = dsmark_requeue, + .peek = dsmark_peek, .drop = dsmark_drop, .init = dsmark_init, .reset = dsmark_reset, diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 23d258bfe8a..92cfc9d7e3b 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -83,7 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, @@ -98,7 +98,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cdcd16fcfed..5f5efe4e607 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg) char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); - dev->tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + @@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg) void __netdev_watchdog_up(struct net_device *dev) { - if (dev->tx_timeout) { + if (dev->netdev_ops->ndo_tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, @@ -311,21 +311,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) return NULL; } -static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", - skb->dev->name); - kfree_skb(skb); - return NET_XMIT_CN; -} - struct Qdisc_ops noop_qdisc_ops __read_mostly = { .id = "noop", .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, - .requeue = noop_requeue, + .peek = noop_dequeue, .owner = THIS_MODULE, }; @@ -340,7 +331,6 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -351,7 +341,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, - .requeue = noop_requeue, + .peek = noop_dequeue, .owner = THIS_MODULE, }; @@ -367,7 +357,6 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -416,10 +405,17 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) return NULL; } -static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) { - qdisc->q.qlen++; - return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); + int prio; + struct sk_buff_head *list = qdisc_priv(qdisc); + + for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { + if (!skb_queue_empty(list + prio)) + return skb_peek(list + prio); + } + + return NULL; } static void pfifo_fast_reset(struct Qdisc* qdisc) @@ -462,7 +458,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), .enqueue = pfifo_fast_enqueue, .dequeue = pfifo_fast_dequeue, - .requeue = pfifo_fast_requeue, + .peek = pfifo_fast_peek, .init = pfifo_fast_init, .reset = pfifo_fast_reset, .dump = pfifo_fast_dump, @@ -488,7 +484,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); - skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -531,6 +526,9 @@ void qdisc_reset(struct Qdisc *qdisc) if (ops->reset) ops->reset(qdisc); + + kfree_skb(qdisc->gso_skb); + qdisc->gso_skb = NULL; } EXPORT_SYMBOL(qdisc_reset); @@ -557,8 +555,6 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); - __skb_queue_purge(&qdisc->requeue); - kfree((char *) qdisc - qdisc->padded); } EXPORT_SYMBOL(qdisc_destroy); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c1ad6b8de10..40408d595c0 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -240,26 +240,6 @@ congestion_drop: return NET_XMIT_CN; } -static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct gred_sched *t = qdisc_priv(sch); - struct gred_sched_data *q; - u16 dp = tc_index_to_dp(skb); - - if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { - if (net_ratelimit()) - printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " - "for requeue, screwing up backlog.\n", - tc_index_to_dp(skb)); - } else { - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - q->backlog += qdisc_pkt_len(skb); - } - - return qdisc_requeue(skb, sch); -} - static struct sk_buff *gred_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -602,7 +582,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .priv_size = sizeof(struct gred_sched), .enqueue = gred_enqueue, .dequeue = gred_dequeue, - .requeue = gred_requeue, + .peek = qdisc_peek_head, .drop = gred_drop, .init = gred_init, .reset = gred_reset, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c1e77da8cd0..45c31b1a4e1 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -184,7 +184,6 @@ struct hfsc_sched struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ - struct sk_buff_head requeue; /* requeued packet */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -880,28 +879,20 @@ set_passive(struct hfsc_class *cl) */ } -/* - * hack to get length of first packet in queue. - */ static unsigned int qdisc_peek_len(struct Qdisc *sch) { struct sk_buff *skb; unsigned int len; - skb = sch->dequeue(sch); + skb = sch->ops->peek(sch); if (skb == NULL) { if (net_ratelimit()) printk("qdisc_peek_len: non work-conserving qdisc ?\n"); return 0; } len = qdisc_pkt_len(skb); - if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { - if (net_ratelimit()) - printk("qdisc_peek_len: failed to requeue\n"); - qdisc_tree_decrease_qlen(sch, 1); - return 0; - } + return len; } @@ -1027,6 +1018,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } cur_time = psched_get_time(); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } + sch_tree_lock(sch); if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); @@ -1043,10 +1042,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); return 0; } @@ -1072,6 +1067,16 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + if (tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) { + kfree(cl); + return err; + } + } + if (rsc != NULL) hfsc_change_rsc(cl, rsc, 0); if (fsc != NULL) @@ -1102,9 +1107,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, qdisc_class_hash_grow(sch, &q->clhash); - if (tca[TCA_RATE]) - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1211,7 +1213,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); hfsc_purge_queue(sch, cl); - *old = xchg(&cl->qdisc, new); + *old = cl->qdisc; + cl->qdisc = new; sch_tree_unlock(sch); return 0; } @@ -1440,7 +1443,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); - skb_queue_head_init(&q->requeue); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1525,7 +1527,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } - __skb_queue_purge(&q->requeue); q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); @@ -1550,7 +1551,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch) hfsc_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1574,7 +1574,7 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hfsc_class *cl; - int err; + int uninitialized_var(err); cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { @@ -1617,8 +1617,6 @@ hfsc_dequeue(struct Qdisc *sch) if (sch->q.qlen == 0) return NULL; - if ((skb = __skb_dequeue(&q->requeue))) - goto out; cur_time = psched_get_time(); @@ -1642,7 +1640,7 @@ hfsc_dequeue(struct Qdisc *sch) } } - skb = cl->qdisc->dequeue(cl->qdisc); + skb = qdisc_dequeue_peeked(cl->qdisc); if (skb == NULL) { if (net_ratelimit()) printk("HFSC: Non-work-conserving qdisc ?\n"); @@ -1667,24 +1665,12 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - out: sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } -static int -hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct hfsc_sched *q = qdisc_priv(sch); - - __skb_queue_head(&q->requeue, skb); - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - static unsigned int hfsc_drop(struct Qdisc *sch) { @@ -1735,7 +1721,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .dump = hfsc_dump_qdisc, .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, - .requeue = hfsc_requeue, + .peek = qdisc_peek_dequeued, .drop = hfsc_drop, .cl_ops = &hfsc_class_ops, .priv_size = sizeof(struct hfsc_sched), diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d14f02056ae..8a451998973 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -551,7 +551,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; + int uninitialized_var(ret); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -591,47 +591,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -/* TODO: requeuing packet charges it to policers again !! */ -static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb, sch, &ret); - struct sk_buff *tskb; - - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb(tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } -#ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; -#endif - } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != - NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; - } else - htb_activate(q, cl); - - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -1141,7 +1100,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, == NULL) return -ENOBUFS; sch_tree_lock(sch); - if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { + *old = cl->un.leaf.q; + cl->un.leaf.q = new; + if (*old != NULL) { qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); } @@ -1371,9 +1332,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; - gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } + cl->refcnt = 1; cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); @@ -1425,10 +1391,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (parent) parent->children++; } else { - if (tca[TCA_RATE]) - gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + if (tca[TCA_RATE]) { + err = gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + if (err) + return err; + } sch_tree_lock(sch); } @@ -1565,7 +1534,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .priv_size = sizeof(struct htb_sched), .enqueue = htb_enqueue, .dequeue = htb_dequeue, - .requeue = htb_requeue, + .peek = qdisc_peek_dequeued, .drop = htb_drop, .init = htb_init, .reset = htb_reset, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 915f3149dde..7e151861794 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct Qdisc *qdisc; - struct multiq_sched_data *q = qdisc_priv(sch); - int ret; - - qdisc = multiq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif - - ret = qdisc->ops->requeue(skb, qdisc); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - if (q->curband) - q->curband--; - else - q->curband = q->bands - 1; - return NET_XMIT_SUCCESS; - } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; -} - - static struct sk_buff *multiq_dequeue(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) q->curband = 0; /* Check that target subqueue is available before - * pulling an skb to avoid excessive requeues + * pulling an skb to avoid head-of-line blocking. */ if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { qdisc = q->queues[q->curband]; @@ -155,6 +121,34 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) } +static struct sk_buff *multiq_peek(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned int curband = q->curband; + struct Qdisc *qdisc; + struct sk_buff *skb; + int band; + + for (band = 0; band < q->bands; band++) { + /* cycle through bands to ensure fairness */ + curband++; + if (curband >= q->bands) + curband = 0; + + /* Check that target subqueue is available before + * pulling an skb to avoid head-of-line blocking. + */ + if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) { + qdisc = q->queues[curband]; + skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; + } + } + return NULL; + +} + static unsigned int multiq_drop(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -220,7 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) q->bands = qopt->bands; for (i = q->bands; i < q->max_bands; i++) { if (q->queues[i] != &noop_qdisc) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); } @@ -230,7 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, @@ -238,12 +233,13 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -451,7 +447,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct multiq_sched_data), .enqueue = multiq_enqueue, .dequeue = multiq_dequeue, - .requeue = multiq_requeue, + .peek = multiq_peek, .drop = multiq_drop, .init = multiq_init, .reset = multiq_reset, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a11959908d9..f840d6b27c6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -233,7 +233,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ cb->time_to_send = psched_get_time(); q->counter = 0; - ret = q->qdisc->ops->requeue(skb, q->qdisc); + + __skb_queue_head(&q->qdisc->q, skb); + q->qdisc->qstats.backlog += qdisc_pkt_len(skb); + q->qdisc->qstats.requeues++; + ret = NET_XMIT_SUCCESS; } if (likely(ret == NET_XMIT_SUCCESS)) { @@ -248,20 +252,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -/* Requeue packets but don't change time stamp */ -static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int netem_drop(struct Qdisc* sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -283,25 +273,22 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (sch->flags & TCQ_F_THROTTLED) return NULL; - skb = q->qdisc->dequeue(q->qdisc); + skb = q->qdisc->ops->peek(q->qdisc); if (skb) { const struct netem_skb_cb *cb = netem_skb_cb(skb); psched_time_t now = psched_get_time(); /* if more time remaining? */ if (cb->time_to_send <= now) { + skb = qdisc_dequeue_peeked(q->qdisc); + if (unlikely(!skb)) + return NULL; + pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; return skb; } - if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) { - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - printk(KERN_ERR "netem: %s could not requeue\n", - q->qdisc->ops->id); - } - qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); } @@ -344,14 +331,13 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - d = xchg(&q->delay_dist, d); + kfree(q->delay_dist); + q->delay_dist = d; spin_unlock_bh(root_lock); - - kfree(d); return 0; } -static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) +static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corr *c = nla_data(attr); @@ -359,27 +345,24 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->delay_cor, c->delay_corr); init_crandom(&q->loss_cor, c->loss_corr); init_crandom(&q->dup_cor, c->dup_corr); - return 0; } -static int get_reorder(struct Qdisc *sch, const struct nlattr *attr) +static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_reorder *r = nla_data(attr); q->reorder = r->probability; init_crandom(&q->reorder_cor, r->correlation); - return 0; } -static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr) +static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); const struct tc_netem_corrupt *r = nla_data(attr); q->corrupt = r->probability; init_crandom(&q->corrupt_cor, r->correlation); - return 0; } static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { @@ -438,11 +421,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (q->gap) q->reorder = ~0; - if (tb[TCA_NETEM_CORR]) { - ret = get_correlation(sch, tb[TCA_NETEM_CORR]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_CORR]) + get_correlation(sch, tb[TCA_NETEM_CORR]); if (tb[TCA_NETEM_DELAY_DIST]) { ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); @@ -450,17 +430,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) return ret; } - if (tb[TCA_NETEM_REORDER]) { - ret = get_reorder(sch, tb[TCA_NETEM_REORDER]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_REORDER]) + get_reorder(sch, tb[TCA_NETEM_REORDER]); - if (tb[TCA_NETEM_CORRUPT]) { - ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); - if (ret) - return ret; - } + if (tb[TCA_NETEM_CORRUPT]) + get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); return 0; } @@ -541,7 +515,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = tfifo_enqueue, .dequeue = qdisc_dequeue_head, - .requeue = qdisc_requeue, + .peek = qdisc_peek_head, .drop = qdisc_queue_drop, .init = tfifo_init, .reset = qdisc_reset_queue, @@ -624,99 +598,12 @@ nla_put_failure: return -1; } -static int netem_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct netem_sched_data *q = qdisc_priv(sch); - - if (cl != 1) /* only one class */ - return -ENOENT; - - tcm->tcm_handle |= TC_H_MIN(1); - tcm->tcm_info = q->qdisc->handle; - - return 0; -} - -static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) -{ - struct netem_sched_data *q = qdisc_priv(sch); - - if (new == NULL) - new = &noop_qdisc; - - sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - - return 0; -} - -static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct netem_sched_data *q = qdisc_priv(sch); - return q->qdisc; -} - -static unsigned long netem_get(struct Qdisc *sch, u32 classid) -{ - return 1; -} - -static void netem_put(struct Qdisc *sch, unsigned long arg) -{ -} - -static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -ENOSYS; -} - -static int netem_delete(struct Qdisc *sch, unsigned long arg) -{ - return -ENOSYS; -} - -static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; - } -} - -static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl) -{ - return NULL; -} - -static const struct Qdisc_class_ops netem_class_ops = { - .graft = netem_graft, - .leaf = netem_leaf, - .get = netem_get, - .put = netem_put, - .change = netem_change_class, - .delete = netem_delete, - .walk = netem_walk, - .tcf_chain = netem_find_tcf, - .dump = netem_dump_class, -}; - static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .id = "netem", - .cl_ops = &netem_class_ops, .priv_size = sizeof(struct netem_sched_data), .enqueue = netem_enqueue, .dequeue = netem_dequeue, - .requeue = netem_requeue, + .peek = qdisc_peek_dequeued, .drop = netem_drop, .init = netem_init, .reset = netem_reset, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 504a78cdb71..94cecef7014 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -93,34 +93,20 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -prio_requeue(struct sk_buff *skb, struct Qdisc* sch) +static struct sk_buff *prio_peek(struct Qdisc *sch) { - struct Qdisc *qdisc; - int ret; - - qdisc = prio_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif + struct prio_sched_data *q = qdisc_priv(sch); + int prio; - if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + for (prio = 0; prio < q->bands; prio++) { + struct Qdisc *qdisc = q->queues[prio]; + struct sk_buff *skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; + return NULL; } - static struct sk_buff *prio_dequeue(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -201,7 +187,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { - struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + struct Qdisc *child = q->queues[i]; + q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); @@ -211,18 +198,19 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child; + struct Qdisc *child, *old; child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); - child = xchg(&q->queues[i], child); + old = q->queues[i]; + q->queues[i] = child; - if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, - child->q.qlen); - qdisc_destroy(child); + if (old != &noop_qdisc) { + qdisc_tree_decrease_qlen(old, + old->q.qlen); + qdisc_destroy(old); } sch_tree_unlock(sch); } @@ -421,7 +409,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .priv_size = sizeof(struct prio_sched_data), .enqueue = prio_enqueue, .dequeue = prio_dequeue, - .requeue = prio_requeue, + .peek = prio_peek, .drop = prio_drop, .init = prio_init, .reset = prio_reset, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 5da05839e22..2bdf241f631 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -108,23 +108,6 @@ congestion_drop: return NET_XMIT_CN; } -static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct red_sched_data *q = qdisc_priv(sch); - struct Qdisc *child = q->qdisc; - int ret; - - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - - ret = child->ops->requeue(skb, child); - if (likely(ret == NET_XMIT_SUCCESS)) { - sch->qstats.requeues++; - sch->q.qlen++; - } - return ret; -} - static struct sk_buff * red_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -140,6 +123,14 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch) return skb; } +static struct sk_buff * red_peek(struct Qdisc* sch) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + + return child->ops->peek(child); +} + static unsigned int red_drop(struct Qdisc* sch) { struct red_sched_data *q = qdisc_priv(sch); @@ -211,7 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, @@ -292,7 +284,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -361,7 +354,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = { .cl_ops = &red_class_ops, .enqueue = red_enqueue, .dequeue = red_dequeue, - .requeue = red_requeue, + .peek = red_peek, .drop = red_drop, .init = red_init, .reset = red_reset, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index fe1508ef0d3..ab8cfee3c9c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -329,71 +329,20 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; } -static int -sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff * +sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; - sfq_index x; - int ret; - - hash = sfq_classify(skb, sch, &ret); - if (hash == 0) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } - hash--; - - x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; - } - - sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_head(&q->qs[x], skb); - /* If selected queue has length q->limit+1, this means that - * all another queues are empty and we do simple tail drop. - * This packet is still requeued at head of queue, tail packet - * is dropped. - */ - if (q->qs[x].qlen > q->limit) { - skb = q->qs[x].prev; - __skb_unlink(skb, &q->qs[x]); - sch->qstats.drops++; - sch->qstats.backlog -= qdisc_pkt_len(skb); - kfree_skb(skb); - return NET_XMIT_CN; - } - - sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; - q->next[x] = x; - q->allot[x] = q->quantum; - } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; - q->tail = x; - } - } + sfq_index a; - if (++sch->q.qlen <= q->limit) { - sch->qstats.requeues++; - return 0; - } + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; - sch->qstats.drops++; - sfq_drop(sch); - return NET_XMIT_CN; + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); } - - - static struct sk_buff * sfq_dequeue(struct Qdisc *sch) { @@ -624,7 +573,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct sfq_sched_data), .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, - .requeue = sfq_requeue, + .peek = sfq_peek, .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 94c61598b86..a2f93c09f3c 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return 0; } -static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct tbf_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int tbf_drop(struct Qdisc* sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -169,7 +156,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - skb = q->qdisc->dequeue(q->qdisc); + skb = q->qdisc->ops->peek(q->qdisc); if (skb) { psched_time_t now; @@ -192,6 +179,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) toks -= L2T(q, len); if ((toks|ptoks) >= 0) { + skb = qdisc_dequeue_peeked(q->qdisc); + if (unlikely(!skb)) + return NULL; + q->t_c = now; q->tokens = toks; q->ptokens = ptoks; @@ -214,12 +205,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) (cf. CSZ, HPFQ, HFSC) */ - if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { - /* When requeue fails skb is dropped */ - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - } - sch->qstats.overlimits++; } return NULL; @@ -251,6 +236,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) struct tc_tbf_qopt *qopt; struct qdisc_rate_table *rtab = NULL; struct qdisc_rate_table *ptab = NULL; + struct qdisc_rate_table *tmp; struct Qdisc *child = NULL; int max_size,n; @@ -299,7 +285,8 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); - qdisc_destroy(xchg(&q->qdisc, child)); + qdisc_destroy(q->qdisc); + q->qdisc = child; } q->limit = qopt->limit; q->mtu = qopt->mtu; @@ -307,8 +294,14 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) q->buffer = qopt->buffer; q->tokens = q->buffer; q->ptokens = q->mtu; - rtab = xchg(&q->R_tab, rtab); - ptab = xchg(&q->P_tab, ptab); + + tmp = q->R_tab; + q->R_tab = rtab; + rtab = tmp; + + tmp = q->P_tab; + q->P_tab = ptab; + ptab = tmp; sch_tree_unlock(sch); err = 0; done: @@ -398,7 +391,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; sch_tree_lock(sch); - *old = xchg(&q->qdisc, new); + *old = q->qdisc; + q->qdisc = new; qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -469,7 +463,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, - .requeue = tbf_requeue, + .peek = qdisc_peek_dequeued, .drop = tbf_drop, .init = tbf_init, .reset = tbf_reset, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index d35ef059abb..cfc8e7caba6 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_DROP; } -static int -teql_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct teql_sched_data *q = qdisc_priv(sch); - - __skb_queue_head(&q->q, skb); - sch->qstats.requeues++; - return 0; -} - static struct sk_buff * teql_dequeue(struct Qdisc* sch) { @@ -123,6 +113,13 @@ teql_dequeue(struct Qdisc* sch) return skb; } +static struct sk_buff * +teql_peek(struct Qdisc* sch) +{ + /* teql is meant to be used as root qdisc */ + return NULL; +} + static __inline__ void teql_neigh_release(struct neighbour *n) { @@ -433,7 +430,7 @@ static __init void teql_master_setup(struct net_device *dev) ops->enqueue = teql_enqueue; ops->dequeue = teql_dequeue; - ops->requeue = teql_requeue; + ops->peek = teql_peek; ops->init = teql_qdisc_init; ops->reset = teql_reset; ops->destroy = teql_destroy; |