aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/Kconfig10
-rw-r--r--drivers/net/ixgbe/Makefile2
-rw-r--r--drivers/net/ixgbe/ixgbe.h25
-rw-r--r--drivers/net/ixgbe/ixgbe_dcb.c332
-rw-r--r--drivers/net/ixgbe/ixgbe_dcb.h157
-rw-r--r--drivers/net/ixgbe/ixgbe_dcb_82598.c398
-rw-r--r--drivers/net/ixgbe/ixgbe_dcb_82598.h94
-rw-r--r--drivers/net/ixgbe/ixgbe_dcb_nl.c356
-rw-r--r--drivers/net/ixgbe/ixgbe_ethtool.c30
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c200
-rw-r--r--include/linux/dcbnl.h230
-rw-r--r--include/linux/netdevice.h8
-rw-r--r--include/linux/rtnetlink.h5
-rw-r--r--include/net/dcbnl.h44
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile3
-rw-r--r--net/dcb/Kconfig12
-rw-r--r--net/dcb/Makefile1
-rw-r--r--net/dcb/dcbnl.c704
19 files changed, 2593 insertions, 19 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index afa206590ad..efd461d7c2b 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2451,6 +2451,16 @@ config IXGBE_DCA
driver. DCA is a method for warming the CPU cache before data
is used, with the intent of lessening the impact of cache misses.
+config IXGBE_DCBNL
+ bool "Data Center Bridging (DCB) Support"
+ default n
+ depends on IXGBE && DCBNL
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the
+ driver.
+
+ If unsure, say N.
+
config IXGB
tristate "Intel(R) PRO/10GbE support"
depends on PCI
diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
index ccd83d9f579..3228e508e62 100644
--- a/drivers/net/ixgbe/Makefile
+++ b/drivers/net/ixgbe/Makefile
@@ -34,3 +34,5 @@ obj-$(CONFIG_IXGBE) += ixgbe.o
ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
ixgbe_82598.o ixgbe_phy.o
+
+ixgbe-$(CONFIG_IXGBE_DCBNL) += ixgbe_dcb.o ixgbe_dcb_82598.o ixgbe_dcb_nl.o
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 132854f646b..796f189f387 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -35,7 +35,7 @@
#include "ixgbe_type.h"
#include "ixgbe_common.h"
-
+#include "ixgbe_dcb.h"
#ifdef CONFIG_IXGBE_DCA
#include <linux/dca.h>
#endif
@@ -84,6 +84,7 @@
#define IXGBE_TX_FLAGS_TSO (u32)(1 << 2)
#define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 3)
#define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000
#define IXGBE_TX_FLAGS_VLAN_SHIFT 16
#define IXGBE_MAX_LRO_DESCRIPTORS 8
@@ -134,7 +135,7 @@ struct ixgbe_ring {
u16 reg_idx; /* holds the special value that gets the hardware register
* offset associated with this ring, which is different
- * for DCE and RSS modes */
+ * for DCB and RSS modes */
#ifdef CONFIG_IXGBE_DCA
/* cpu for tx queue */
@@ -152,8 +153,10 @@ struct ixgbe_ring {
u16 rx_buf_len;
};
+#define RING_F_DCB 0
#define RING_F_VMDQ 1
#define RING_F_RSS 2
+#define IXGBE_MAX_DCB_INDICES 8
#define IXGBE_MAX_RSS_INDICES 16
#define IXGBE_MAX_VMDQ_INDICES 16
struct ixgbe_ring_feature {
@@ -164,6 +167,10 @@ struct ixgbe_ring_feature {
#define MAX_RX_QUEUES 64
#define MAX_TX_QUEUES 32
+#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+ ? 8 : 1)
+#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
+
/* MAX_MSIX_Q_VECTORS of these are allocated,
* but we only use one per queue-specific vector.
*/
@@ -215,6 +222,9 @@ struct ixgbe_adapter {
struct work_struct reset_task;
struct ixgbe_q_vector q_vector[MAX_MSIX_Q_VECTORS];
char name[MAX_MSIX_COUNT][IFNAMSIZ + 5];
+ struct ixgbe_dcb_config dcb_cfg;
+ struct ixgbe_dcb_config temp_dcb_cfg;
+ u8 dcb_set_bitmap;
/* Interrupt Throttle Rate */
u32 itr_setting;
@@ -270,6 +280,7 @@ struct ixgbe_adapter {
#define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 20)
#define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 22)
#define IXGBE_FLAG_IN_WATCHDOG_TASK (u32)(1 << 23)
+#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 24)
/* default to trying for four seconds */
#define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
@@ -313,6 +324,12 @@ enum ixgbe_boards {
};
extern struct ixgbe_info ixgbe_82598_info;
+#ifdef CONFIG_IXGBE_DCBNL
+extern struct dcbnl_rtnl_ops dcbnl_ops;
+extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
+ struct ixgbe_dcb_config *dst_dcb_cfg,
+ int tc_max);
+#endif
extern char ixgbe_driver_name[];
extern const char ixgbe_driver_version[];
@@ -327,5 +344,9 @@ extern int ixgbe_setup_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *)
extern void ixgbe_free_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
extern void ixgbe_free_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
+extern void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter);
+extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
+void ixgbe_napi_add_all(struct ixgbe_adapter *adapter);
+void ixgbe_napi_del_all(struct ixgbe_adapter *adapter);
#endif /* _IXGBE_H_ */
diff --git a/drivers/net/ixgbe/ixgbe_dcb.c b/drivers/net/ixgbe/ixgbe_dcb.c
new file mode 100644
index 00000000000..e2e28ac63de
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb.c
@@ -0,0 +1,332 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2007 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+
+/**
+ * ixgbe_dcb_config - Struct containing DCB settings.
+ * @dcb_config: Pointer to DCB config structure
+ *
+ * This function checks DCB rules for DCB settings.
+ * The following rules are checked:
+ * 1. The sum of bandwidth percentages of all Bandwidth Groups must total 100%.
+ * 2. The sum of bandwidth percentages of all Traffic Classes within a Bandwidth
+ * Group must total 100.
+ * 3. A Traffic Class should not be set to both Link Strict Priority
+ * and Group Strict Priority.
+ * 4. Link strict Bandwidth Groups can only have link strict traffic classes
+ * with zero bandwidth.
+ */
+s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *dcb_config)
+{
+ struct tc_bw_alloc *p;
+ s32 ret_val = 0;
+ u8 i, j, bw = 0, bw_id;
+ u8 bw_sum[2][MAX_BW_GROUP];
+ bool link_strict[2][MAX_BW_GROUP];
+
+ memset(bw_sum, 0, sizeof(bw_sum));
+ memset(link_strict, 0, sizeof(link_strict));
+
+ /* First Tx, then Rx */
+ for (i = 0; i < 2; i++) {
+ /* Check each traffic class for rule violation */
+ for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
+ p = &dcb_config->tc_config[j].path[i];
+
+ bw = p->bwg_percent;
+ bw_id = p->bwg_id;
+
+ if (bw_id >= MAX_BW_GROUP) {
+ ret_val = DCB_ERR_CONFIG;
+ goto err_config;
+ }
+ if (p->prio_type == prio_link) {
+ link_strict[i][bw_id] = true;
+ /* Link strict should have zero bandwidth */
+ if (bw) {
+ ret_val = DCB_ERR_LS_BW_NONZERO;
+ goto err_config;
+ }
+ } else if (!bw) {
+ /*
+ * Traffic classes without link strict
+ * should have non-zero bandwidth.
+ */
+ ret_val = DCB_ERR_TC_BW_ZERO;
+ goto err_config;
+ }
+ bw_sum[i][bw_id] += bw;
+ }
+
+ bw = 0;
+
+ /* Check each bandwidth group for rule violation */
+ for (j = 0; j < MAX_BW_GROUP; j++) {
+ bw += dcb_config->bw_percentage[i][j];
+ /*
+ * Sum of bandwidth percentages of all traffic classes
+ * within a Bandwidth Group must total 100 except for
+ * link strict group (zero bandwidth).
+ */
+ if (link_strict[i][j]) {
+ if (bw_sum[i][j]) {
+ /*
+ * Link strict group should have zero
+ * bandwidth.
+ */
+ ret_val = DCB_ERR_LS_BWG_NONZERO;
+ goto err_config;
+ }
+ } else if (bw_sum[i][j] != BW_PERCENT &&
+ bw_sum[i][j] != 0) {
+ ret_val = DCB_ERR_TC_BW;
+ goto err_config;
+ }
+ }
+
+ if (bw != BW_PERCENT) {
+ ret_val = DCB_ERR_BW_GROUP;
+ goto err_config;
+ }
+ }
+
+err_config:
+ return ret_val;
+}
+
+/**
+ * ixgbe_dcb_calculate_tc_credits - Calculates traffic class credits
+ * @ixgbe_dcb_config: Struct containing DCB settings.
+ * @direction: Configuring either Tx or Rx.
+ *
+ * This function calculates the credits allocated to each traffic class.
+ * It should be called only after the rules are checked by
+ * ixgbe_dcb_check_config().
+ */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *dcb_config,
+ u8 direction)
+{
+ struct tc_bw_alloc *p;
+ s32 ret_val = 0;
+ /* Initialization values default for Tx settings */
+ u32 credit_refill = 0;
+ u32 credit_max = 0;
+ u16 link_percentage = 0;
+ u8 bw_percent = 0;
+ u8 i;
+
+ if (dcb_config == NULL) {
+ ret_val = DCB_ERR_CONFIG;
+ goto out;
+ }
+
+ /* Find out the link percentage for each TC first */
+ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+ p = &dcb_config->tc_config[i].path[direction];
+ bw_percent = dcb_config->bw_percentage[direction][p->bwg_id];
+
+ link_percentage = p->bwg_percent;
+ /* Must be careful of integer division for very small nums */
+ link_percentage = (link_percentage * bw_percent) / 100;
+ if (p->bwg_percent > 0 && link_percentage == 0)
+ link_percentage = 1;
+
+ /* Save link_percentage for reference */
+ p->link_percent = (u8)link_percentage;
+
+ /* Calculate credit refill and save it */
+ credit_refill = link_percentage * MINIMUM_CREDIT_REFILL;
+ p->data_credits_refill = (u16)credit_refill;
+
+ /* Calculate maximum credit for the TC */
+ credit_max = (link_percentage * MAX_CREDIT) / 100;
+
+ /*
+ * Adjustment based on rule checking, if the percentage
+ * of a TC is too small, the maximum credit may not be
+ * enough to send out a jumbo frame in data plane arbitration.
+ */
+ if (credit_max && (credit_max < MINIMUM_CREDIT_FOR_JUMBO))
+ credit_max = MINIMUM_CREDIT_FOR_JUMBO;
+
+ if (direction == DCB_TX_CONFIG) {
+ /*
+ * Adjustment based on rule checking, if the
+ * percentage of a TC is too small, the maximum
+ * credit may not be enough to send out a TSO
+ * packet in descriptor plane arbitration.
+ */
+ if (credit_max &&
+ (credit_max < MINIMUM_CREDIT_FOR_TSO))
+ credit_max = MINIMUM_CREDIT_FOR_TSO;
+
+ dcb_config->tc_config[i].desc_credits_max =
+ (u16)credit_max;
+ }
+
+ p->data_credits_max = (u16)credit_max;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_dcb_get_tc_stats - Returns status of each traffic class
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count: Number of elements in bwg_array.
+ *
+ * This function returns the status data for each of the Traffic Classes in use.
+ */
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
+ u8 tc_count)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_get_tc_stats_82598(hw, stats, tc_count);
+ return ret;
+}
+
+/**
+ * ixgbe_dcb_get_pfc_stats - Returns CBFC status of each traffic class
+ * hw - pointer to hardware structure
+ * stats - pointer to statistics structure
+ * tc_count - Number of elements in bwg_array.
+ *
+ * This function returns the CBFC status data for each of the Traffic Classes.
+ */
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
+ u8 tc_count)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_get_pfc_stats_82598(hw, stats, tc_count);
+ return ret;
+}
+
+/**
+ * ixgbe_dcb_config_rx_arbiter - Config Rx arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Rx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
+ return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter - Config Tx Desc arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
+ return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter - Config Tx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
+ return ret;
+}
+
+/**
+ * ixgbe_dcb_config_pfc - Config priority flow control
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Priority Flow Control for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_config_pfc_82598(hw, dcb_config);
+ return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats - Config traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *hw)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_config_tc_stats_82598(hw);
+ return ret;
+}
+
+/**
+ * ixgbe_dcb_hw_config - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ s32 ret = 0;
+ if (hw->mac.type == ixgbe_mac_82598EB)
+ ret = ixgbe_dcb_hw_config_82598(hw, dcb_config);
+ return ret;
+}
+
diff --git a/drivers/net/ixgbe/ixgbe_dcb.h b/drivers/net/ixgbe/ixgbe_dcb.h
new file mode 100644
index 00000000000..62dfd243bed
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb.h
@@ -0,0 +1,157 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2007 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _DCB_CONFIG_H_
+#define _DCB_CONFIG_H_
+
+#include "ixgbe_type.h"
+
+/* DCB data structures */
+
+#define IXGBE_MAX_PACKET_BUFFERS 8
+#define MAX_USER_PRIORITY 8
+#define MAX_TRAFFIC_CLASS 8
+#define MAX_BW_GROUP 8
+#define BW_PERCENT 100
+
+#define DCB_TX_CONFIG 0
+#define DCB_RX_CONFIG 1
+
+/* DCB error Codes */
+#define DCB_SUCCESS 0
+#define DCB_ERR_CONFIG -1
+#define DCB_ERR_PARAM -2
+
+/* Transmit and receive Errors */
+/* Error in bandwidth group allocation */
+#define DCB_ERR_BW_GROUP -3
+/* Error in traffic class bandwidth allocation */
+#define DCB_ERR_TC_BW -4
+/* Traffic class has both link strict and group strict enabled */
+#define DCB_ERR_LS_GS -5
+/* Link strict traffic class has non zero bandwidth */
+#define DCB_ERR_LS_BW_NONZERO -6
+/* Link strict bandwidth group has non zero bandwidth */
+#define DCB_ERR_LS_BWG_NONZERO -7
+/* Traffic class has zero bandwidth */
+#define DCB_ERR_TC_BW_ZERO -8
+
+#define DCB_NOT_IMPLEMENTED 0x7FFFFFFF
+
+struct dcb_pfc_tc_debug {
+ u8 tc;
+ u8 pause_status;
+ u64 pause_quanta;
+};
+
+enum strict_prio_type {
+ prio_none = 0,
+ prio_group,
+ prio_link
+};
+
+/* Traffic class bandwidth allocation per direction */
+struct tc_bw_alloc {
+ u8 bwg_id; /* Bandwidth Group (BWG) ID */
+ u8 bwg_percent; /* % of BWG's bandwidth */
+ u8 link_percent; /* % of link bandwidth */
+ u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */
+ u16 data_credits_refill; /* Credit refill amount in 64B granularity */
+ u16 data_credits_max; /* Max credits for a configured packet buffer
+ * in 64B granularity.*/
+ enum strict_prio_type prio_type; /* Link or Group Strict Priority */
+};
+
+enum dcb_pfc_type {
+ pfc_disabled = 0,
+ pfc_enabled_full,
+ pfc_enabled_tx,
+ pfc_enabled_rx
+};
+
+/* Traffic class configuration */
+struct tc_configuration {
+ struct tc_bw_alloc path[2]; /* One each for Tx/Rx */
+ enum dcb_pfc_type dcb_pfc; /* Class based flow control setting */
+
+ u16 desc_credits_max; /* For Tx Descriptor arbitration */
+ u8 tc; /* Traffic class (TC) */
+};
+
+enum dcb_rx_pba_cfg {
+ pba_equal, /* PBA[0-7] each use 64KB FIFO */
+ pba_80_48 /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
+};
+
+struct ixgbe_dcb_config {
+ struct tc_configuration tc_config[MAX_TRAFFIC_CLASS];
+ u8 bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */
+
+ bool round_robin_enable;
+
+ enum dcb_rx_pba_cfg rx_pba_cfg;
+
+ u32 dcb_cfg_version; /* Not used...OS-specific? */
+ u32 link_speed; /* For bandwidth allocation validation purpose */
+};
+
+/* DCB driver APIs */
+
+/* DCB rule checking function.*/
+s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *config);
+
+/* DCB credits calculation */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *, u8);
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, struct ixgbe_dcb_config *g);
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB traffic class stats */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB definitions for credit calculation */
+#define MAX_CREDIT_REFILL 511 /* 0x1FF * 64B = 32704B */
+#define MINIMUM_CREDIT_REFILL 5 /* 5*64B = 320B */
+#define MINIMUM_CREDIT_FOR_JUMBO 145 /* 145= UpperBound((9*1024+54)/64B) for 9KB jumbo frame */
+#define DCB_MAX_TSO_SIZE (32*1024) /* MAX TSO packet size supported in DCB mode */
+#define MINIMUM_CREDIT_FOR_TSO (DCB_MAX_TSO_SIZE/64 + 1) /* 513 for 32KB TSO packet */
+#define MAX_CREDIT 4095 /* Maximum credit supported: 256KB * 1204 / 64B */
+
+#endif /* _DCB_CONFIG_H */
diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ixgbe/ixgbe_dcb_82598.c
new file mode 100644
index 00000000000..fce6867a451
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_82598.c
@@ -0,0 +1,398 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2007 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+
+/**
+ * ixgbe_dcb_get_tc_stats_82598 - Return status data for each traffic class
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count: Number of elements in bwg_array.
+ *
+ * This function returns the status data for each of the Traffic Classes in use.
+ */
+s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *hw,
+ struct ixgbe_hw_stats *stats,
+ u8 tc_count)
+{
+ int tc;
+
+ if (tc_count > MAX_TRAFFIC_CLASS)
+ return DCB_ERR_PARAM;
+
+ /* Statistics pertaining to each traffic class */
+ for (tc = 0; tc < tc_count; tc++) {
+ /* Transmitted Packets */
+ stats->qptc[tc] += IXGBE_READ_REG(hw, IXGBE_QPTC(tc));
+ /* Transmitted Bytes */
+ stats->qbtc[tc] += IXGBE_READ_REG(hw, IXGBE_QBTC(tc));
+ /* Received Packets */
+ stats->qprc[tc] += IXGBE_READ_REG(hw, IXGBE_QPRC(tc));
+ /* Received Bytes */
+ stats->qbrc[tc] += IXGBE_READ_REG(hw, IXGBE_QBRC(tc));
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_dcb_get_pfc_stats_82598 - Returns CBFC status data
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count: Number of elements in bwg_array.
+ *
+ * This function returns the CBFC status data for each of the Traffic Classes.
+ */
+s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *hw,
+ struct ixgbe_hw_stats *stats,
+ u8 tc_count)
+{
+ int tc;
+
+ if (tc_count > MAX_TRAFFIC_CLASS)
+ return DCB_ERR_PARAM;
+
+ for (tc = 0; tc < tc_count; tc++) {
+ /* Priority XOFF Transmitted */
+ stats->pxofftxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(tc));
+ /* Priority XOFF Received */
+ stats->pxoffrxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(tc));
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_dcb_config_packet_buffers_82598 - Configure packet buffers
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure packet buffers for DCB mode.
+ */
+s32 ixgbe_dcb_config_packet_buffers_82598(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ s32 ret_val = 0;
+ u32 value = IXGBE_RXPBSIZE_64KB;
+ u8 i = 0;
+
+ /* Setup Rx packet buffer sizes */
+ switch (dcb_config->rx_pba_cfg) {
+ case pba_80_48:
+ /* Setup the first four at 80KB */
+ value = IXGBE_RXPBSIZE_80KB;
+ for (; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
+ /* Setup the last four at 48KB...don't re-init i */
+ value = IXGBE_RXPBSIZE_48KB;
+ /* Fall Through */
+ case pba_equal:
+ default:
+ for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
+
+ /* Setup Tx packet buffer sizes */
+ for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i),
+ IXGBE_TXPBSIZE_40KB);
+ }
+ break;
+ }
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_dcb_config_rx_arbiter_82598 - Config Rx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Rx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ struct tc_bw_alloc *p;
+ u32 reg = 0;
+ u32 credit_refill = 0;
+ u32 credit_max = 0;
+ u8 i = 0;
+
+ reg = IXGBE_READ_REG(hw, IXGBE_RUPPBMR) | IXGBE_RUPPBMR_MQA;
+ IXGBE_WRITE_REG(hw, IXGBE_RUPPBMR, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+ /* Enable Arbiter */
+ reg &= ~IXGBE_RMCS_ARBDIS;
+ /* Enable Receive Recycle within the BWG */
+ reg |= IXGBE_RMCS_RRM;
+ /* Enable Deficit Fixed Priority arbitration*/
+ reg |= IXGBE_RMCS_DFP;
+
+ IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+ /* Configure traffic class credits and priority */
+ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+ p = &dcb_config->tc_config[i].path[DCB_RX_CONFIG];
+ credit_refill = p->data_credits_refill;
+ credit_max = p->data_credits_max;
+
+ reg = credit_refill | (credit_max << IXGBE_RT2CR_MCL_SHIFT);
+
+ if (p->prio_type == prio_link)
+ reg |= IXGBE_RT2CR_LSP;
+
+ IXGBE_WRITE_REG(hw, IXGBE_RT2CR(i), reg);
+ }
+
+ reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+ reg |= IXGBE_RDRXCTL_RDMTS_1_2;
+ reg |= IXGBE_RDRXCTL_MPBEN;
+ reg |= IXGBE_RDRXCTL_MCEN;
+ IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+ /* Make sure there is enough descriptors before arbitration */
+ reg &= ~IXGBE_RXCTRL_DMBYPS;
+ IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg);
+
+ return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter_82598 - Config Tx Desc. arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ struct tc_bw_alloc *p;
+ u32 reg, max_credits;
+ u8 i;
+
+ reg = IXGBE_READ_REG(hw, IXGBE_DPMCS);
+
+ /* Enable arbiter */
+ reg &= ~IXGBE_DPMCS_ARBDIS;
+ if (!(dcb_config->round_robin_enable)) {
+ /* Enable DFP and Recycle mode */
+ reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM);
+ }
+ reg |= IXGBE_DPMCS_TSOEF;
+ /* Configure Max TSO packet size 34KB including payload and headers */
+ reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT);
+
+ IXGBE_WRITE_REG(hw, IXGBE_DPMCS, reg);
+
+ /* Configure traffic class credits and priority */
+ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+ p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
+ max_credits = dcb_config->tc_config[i].desc_credits_max;
+ reg = max_credits << IXGBE_TDTQ2TCCR_MCL_SHIFT;
+ reg |= p->data_credits_refill;
+ reg |= (u32)(p->bwg_id) << IXGBE_TDTQ2TCCR_BWG_SHIFT;
+
+ if (p->prio_type == prio_group)
+ reg |= IXGBE_TDTQ2TCCR_GSP;
+
+ if (p->prio_type == prio_link)
+ reg |= IXGBE_TDTQ2TCCR_LSP;
+
+ IXGBE_WRITE_REG(hw, IXGBE_TDTQ2TCCR(i), reg);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter_82598 - Config Tx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ struct tc_bw_alloc *p;
+ u32 reg;
+ u8 i;
+
+ reg = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
+ /* Enable Data Plane Arbiter */
+ reg &= ~IXGBE_PDPMCS_ARBDIS;
+ /* Enable DFP and Transmit Recycle Mode */
+ reg |= (IXGBE_PDPMCS_TPPAC | IXGBE_PDPMCS_TRM);
+
+ IXGBE_WRITE_REG(hw, IXGBE_PDPMCS, reg);
+
+ /* Configure traffic class credits and priority */
+ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+ p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
+ reg = p->data_credits_refill;
+ reg |= (u32)(p->data_credits_max) << IXGBE_TDPT2TCCR_MCL_SHIFT;
+ reg |= (u32)(p->bwg_id) << IXGBE_TDPT2TCCR_BWG_SHIFT;
+
+ if (p->prio_type == prio_group)
+ reg |= IXGBE_TDPT2TCCR_GSP;
+
+ if (p->prio_type == prio_link)
+ reg |= IXGBE_TDPT2TCCR_LSP;
+
+ IXGBE_WRITE_REG(hw, IXGBE_TDPT2TCCR(i), reg);
+ }
+
+ /* Enable Tx packet buffer division */
+ reg = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
+ reg |= IXGBE_DTXCTL_ENDBUBD;
+ IXGBE_WRITE_REG(hw, IXGBE_DTXCTL, reg);
+
+ return 0;
+}
+
+/**
+ * ixgbe_dcb_config_pfc_82598 - Config priority flow control
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Priority Flow Control for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ u32 reg, rx_pba_size;
+ u8 i;
+
+ /* Enable Transmit Priority Flow Control */
+ reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+ reg &= ~IXGBE_RMCS_TFCE_802_3X;
+ /* correct the reporting of our flow control status */
+ hw->fc.type = ixgbe_fc_none;
+ reg |= IXGBE_RMCS_TFCE_PRIORITY;
+ IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+ /* Enable Receive Priority Flow Control */
+ reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+ reg &= ~IXGBE_FCTRL_RFCE;
+ reg |= IXGBE_FCTRL_RPFCE;
+ IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg);
+
+ /*
+ * Configure flow control thresholds and enable priority flow control
+ * for each traffic class.
+ */
+ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+ if (dcb_config->rx_pba_cfg == pba_equal) {
+ rx_pba_size = IXGBE_RXPBSIZE_64KB;
+ } else {
+ rx_pba_size = (i < 4) ? IXGBE_RXPBSIZE_80KB
+ : IXGBE_RXPBSIZE_48KB;
+ }
+
+ reg = ((rx_pba_size >> 5) & 0xFFF0);
+ if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
+ dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
+ reg |= IXGBE_FCRTL_XONE;
+
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg);
+
+ reg = ((rx_pba_size >> 2) & 0xFFF0);
+ if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
+ dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
+ reg |= IXGBE_FCRTH_FCEN;
+
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg);
+ }
+
+ /* Configure pause time */
+ for (i = 0; i < (MAX_TRAFFIC_CLASS >> 1); i++)
+ IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), 0x68006800);
+
+ /* Configure flow control refresh threshold value */
+ IXGBE_WRITE_REG(hw, IXGBE_FCRTV, 0x3400);
+
+ return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats_82598 - Configure traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
+{
+ u32 reg = 0;
+ u8 i = 0;
+ u8 j = 0;
+
+ /* Receive Queues stats setting - 8 queues per statistics reg */
+ for (i = 0, j = 0; i < 15 && j < 8; i = i + 2, j++) {
+ reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i));
+ reg |= ((0x1010101) * j);
+ IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg);
+ reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i + 1));
+ reg |= ((0x1010101) * j);
+ IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i + 1), reg);
+ }
+ /* Transmit Queues stats setting - 4 queues per statistics reg */
+ for (i = 0; i < 8; i++) {
+ reg = IXGBE_READ_REG(hw, IXGBE_TQSMR(i));
+ reg |= ((0x1010101) * i);
+ IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i), reg);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_dcb_hw_config_82598 - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw,
+ struct ixgbe_dcb_config *dcb_config)
+{
+ ixgbe_dcb_config_packet_buffers_82598(hw, dcb_config);
+ ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
+ ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
+ ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
+ ixgbe_dcb_config_pfc_82598(hw, dcb_config);
+ ixgbe_dcb_config_tc_stats_82598(hw);
+
+ return 0;
+}
diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ixgbe/ixgbe_dcb_82598.h
new file mode 100644
index 00000000000..1e6a313719d
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_82598.h
@@ -0,0 +1,94 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2007 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _DCB_82598_CONFIG_H_
+#define _DCB_82598_CONFIG_H_
+
+/* DCB register definitions */
+
+#define IXGBE_DPMCS_MTSOS_SHIFT 16
+#define IXGBE_DPMCS_TDPAC 0x00000001 /* 0 Round Robin, 1 DFP - Deficit Fixed Priority */
+#define IXGBE_DPMCS_TRM 0x00000010 /* Transmit Recycle Mode */
+#define IXGBE_DPMCS_ARBDIS 0x00000040 /* DCB arbiter disable */
+#define IXGBE_DPMCS_TSOEF 0x00080000 /* TSO Expand Factor: 0=x4, 1=x2 */
+
+#define IXGBE_RUPPBMR_MQA 0x80000000 /* Enable UP to queue mapping */
+
+#define IXGBE_RT2CR_MCL_SHIFT 12 /* Offset to Max Credit Limit setting */
+#define IXGBE_RT2CR_LSP 0x80000000 /* LSP enable bit */
+
+#define IXGBE_RDRXCTL_MPBEN 0x00000010 /* DMA config for multiple packet buffers enable */
+#define IXGBE_RDRXCTL_MCEN 0x00000040 /* DMA config for multiple cores (RSS) enable */
+
+#define IXGBE_TDTQ2TCCR_MCL_SHIFT 12
+#define IXGBE_TDTQ2TCCR_BWG_SHIFT 9
+#define IXGBE_TDTQ2TCCR_GSP 0x40000000
+#define IXGBE_TDTQ2TCCR_LSP 0x80000000
+
+#define IXGBE_TDPT2TCCR_MCL_SHIFT 12
+#define IXGBE_TDPT2TCCR_BWG_SHIFT 9
+#define IXGBE_TDPT2TCCR_GSP 0x40000000
+#define IXGBE_TDPT2TCCR_LSP 0x80000000
+
+#define IXGBE_PDPMCS_TPPAC 0x00000020 /* 0 Round Robin, 1 for DFP - Deficit Fixed Priority */
+#define IXGBE_PDPMCS_ARBDIS 0x00000040 /* Arbiter disable */
+#define IXGBE_PDPMCS_TRM 0x00000100 /* Transmit Recycle Mode enable */
+
+#define IXGBE_DTXCTL_ENDBUBD 0x00000004 /* Enable DBU buffer division */
+
+#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */
+#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */
+#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */
+#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */
+
+#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000
+
+/* DCB hardware-specific driver APIs */
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *,
+ u8);
+
+/* DCB traffic class stats */
+s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *,
+ u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *,
+ struct ixgbe_dcb_config *);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+#endif /* _DCB_82598_CONFIG_H */
diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
new file mode 100644
index 00000000000..50bff2af6b0
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -0,0 +1,356 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 1999 - 2008 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include <linux/dcbnl.h>
+
+/* Callbacks for DCB netlink in the kernel */
+#define BIT_DCB_MODE 0x01
+#define BIT_PFC 0x02
+#define BIT_PG_RX 0x04
+#define BIT_PG_TX 0x08
+
+int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
+ struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max)
+{
+ struct tc_configuration *src_tc_cfg = NULL;
+ struct tc_configuration *dst_tc_cfg = NULL;
+ int i;
+
+ if (!src_dcb_cfg || !dst_dcb_cfg)
+ return -EINVAL;
+
+ for (i = DCB_PG_ATTR_TC_0; i < tc_max + DCB_PG_ATTR_TC_0; i++) {
+ src_tc_cfg = &src_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
+ dst_tc_cfg = &dst_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
+
+ dst_tc_cfg->path[DCB_TX_CONFIG].prio_type =
+ src_tc_cfg->path[DCB_TX_CONFIG].prio_type;
+
+ dst_tc_cfg->path[DCB_TX_CONFIG].bwg_id =
+ src_tc_cfg->path[DCB_TX_CONFIG].bwg_id;
+
+ dst_tc_cfg->path[DCB_TX_CONFIG].bwg_percent =
+ src_tc_cfg->path[DCB_TX_CONFIG].bwg_percent;
+
+ dst_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap =
+ src_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap;
+
+ dst_tc_cfg->path[DCB_RX_CONFIG].prio_type =
+ src_tc_cfg->path[DCB_RX_CONFIG].prio_type;
+
+ dst_tc_cfg->path[DCB_RX_CONFIG].bwg_id =
+ src_tc_cfg->path[DCB_RX_CONFIG].bwg_id;
+
+ dst_tc_cfg->path[DCB_RX_CONFIG].bwg_percent =
+ src_tc_cfg->path[DCB_RX_CONFIG].bwg_percent;
+
+ dst_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap =
+ src_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap;
+ }
+
+ for (i = DCB_PG_ATTR_BW_ID_0; i < DCB_PG_ATTR_BW_ID_MAX; i++) {
+ dst_dcb_cfg->bw_percentage[DCB_TX_CONFIG]
+ [i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
+ [DCB_TX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
+ dst_dcb_cfg->bw_percentage[DCB_RX_CONFIG]
+ [i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
+ [DCB_RX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
+ }
+
+ for (i = DCB_PFC_UP_ATTR_0; i < DCB_PFC_UP_ATTR_MAX; i++) {
+ dst_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc =
+ src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc;
+ }
+
+ return 0;
+}
+
+static u8 ixgbe_dcbnl_get_state(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ DPRINTK(DRV, INFO, "Get DCB Admin Mode.\n");
+
+ return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
+}
+
+static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+ /* All traffic should default to class 0 */
+ return 0;
+}
+
+static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ DPRINTK(DRV, INFO, "Set DCB Admin Mode.\n");
+
+ if (state > 0) {
+ /* Turn on DCB */
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ return;
+ } else {
+ if (netif_running(netdev))
+ netdev->stop(netdev);
+ ixgbe_reset_interrupt_capability(adapter);
+ ixgbe_napi_del_all(adapter);
+ kfree(adapter->tx_ring);
+ kfree(adapter->rx_ring);
+ adapter->tx_ring = NULL;
+ adapter->rx_ring = NULL;
+ netdev->select_queue = &ixgbe_dcb_select_queue;
+
+ adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+ adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
+ ixgbe_init_interrupt_scheme(adapter);
+ ixgbe_napi_add_all(adapter);
+ if (netif_running(netdev))
+ netdev->open(netdev);
+ }
+ } else {
+ /* Turn off DCB */
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ if (netif_running(netdev))
+ netdev->stop(netdev);
+ ixgbe_reset_interrupt_capability(adapter);
+ ixgbe_napi_del_all(adapter);
+ kfree(adapter->tx_ring);
+ kfree(adapter->rx_ring);
+ adapter->tx_ring = NULL;
+ adapter->rx_ring = NULL;
+ netdev->select_queue = NULL;
+
+ adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
+ adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
+ ixgbe_init_interrupt_scheme(adapter);
+ ixgbe_napi_add_all(adapter);
+ if (netif_running(netdev))
+ netdev->open(netdev);
+ } else {
+ return;
+ }
+ }
+}
+
+static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev,
+ u8 *perm_addr)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ for (i = 0; i < netdev->addr_len; i++)
+ perm_addr[i] = adapter->hw.mac.perm_addr[i];
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+ u8 prio, u8 bwg_id, u8 bw_pct,
+ u8 up_map)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ if (prio != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio;
+ if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id = bwg_id;
+ if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent =
+ bw_pct;
+ if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap =
+ up_map;
+
+ if ((adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type !=
+ adapter->dcb_cfg.tc_config[tc].path[0].prio_type) ||
+ (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id !=
+ adapter->dcb_cfg.tc_config[tc].path[0].bwg_id) ||
+ (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent !=
+ adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent) ||
+ (adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap !=
+ adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap))
+ adapter->dcb_set_bitmap |= BIT_PG_TX;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+ u8 bw_pct)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct;
+
+ if (adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] !=
+ adapter->dcb_cfg.bw_percentage[0][bwg_id])
+ adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+ u8 prio, u8 bwg_id, u8 bw_pct,
+ u8 up_map)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ if (prio != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio;
+ if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id = bwg_id;
+ if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent =
+ bw_pct;
+ if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+ adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap =
+ up_map;
+
+ if ((adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type !=
+ adapter->dcb_cfg.tc_config[tc].path[1].prio_type) ||
+ (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id !=
+ adapter->dcb_cfg.tc_config[tc].path[1].bwg_id) ||
+ (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent !=
+ adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent) ||
+ (adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap !=
+ adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap))
+ adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+ u8 bw_pct)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct;
+
+ if (adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] !=
+ adapter->dcb_cfg.bw_percentage[1][bwg_id])
+ adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+ u8 *prio, u8 *bwg_id, u8 *bw_pct,
+ u8 *up_map)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ *prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type;
+ *bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id;
+ *bw_pct = adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent;
+ *up_map = adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+ u8 *bw_pct)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ *bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id];
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+ u8 *prio, u8 *bwg_id, u8 *bw_pct,
+ u8 *up_map)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ *prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type;
+ *bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id;
+ *bw_pct = adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent;
+ *up_map = adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+ u8 *bw_pct)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ *bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id];
+}
+
+static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
+ u8 setting)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting;
+ if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc !=
+ adapter->dcb_cfg.tc_config[priority].dcb_pfc)
+ adapter->dcb_set_bitmap |= BIT_PFC;
+}
+
+static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
+ u8 *setting)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+ *setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc;
+}
+
+static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ int ret;
+
+ if (!adapter->dcb_set_bitmap)
+ return 1;
+
+ while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+ msleep(1);
+
+ if (netif_running(netdev))
+ ixgbe_down(adapter);
+
+ ret = ixgbe_copy_dcb_cfg(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
+ adapter->ring_feature[RING_F_DCB].indices);
+ if (ret) {
+ clear_bit(__IXGBE_RESETTING, &adapter->state);
+ return ret;
+ }
+
+ if (netif_running(netdev))
+ ixgbe_up(adapter);
+
+ adapter->dcb_set_bitmap = 0x00;
+ clear_bit(__IXGBE_RESETTING, &adapter->state);
+ return ret;
+}
+
+struct dcbnl_rtnl_ops dcbnl_ops = {
+ .getstate = ixgbe_dcbnl_get_state,
+ .setstate = ixgbe_dcbnl_set_state,
+ .getpermhwaddr = ixgbe_dcbnl_get_perm_hw_addr,
+ .setpgtccfgtx = ixgbe_dcbnl_set_pg_tc_cfg_tx,
+ .setpgbwgcfgtx = ixgbe_dcbnl_set_pg_bwg_cfg_tx,
+ .setpgtccfgrx = ixgbe_dcbnl_set_pg_tc_cfg_rx,
+ .setpgbwgcfgrx = ixgbe_dcbnl_set_pg_bwg_cfg_rx,
+ .getpgtccfgtx = ixgbe_dcbnl_get_pg_tc_cfg_tx,
+ .getpgbwgcfgtx = ixgbe_dcbnl_get_pg_bwg_cfg_tx,
+ .getpgtccfgrx = ixgbe_dcbnl_get_pg_tc_cfg_rx,
+ .getpgbwgcfgrx = ixgbe_dcbnl_get_pg_bwg_cfg_rx,
+ .setpfccfg = ixgbe_dcbnl_set_pfc_cfg,
+ .getpfccfg = ixgbe_dcbnl_get_pfc_cfg,
+ .setall = ixgbe_dcbnl_set_all
+};
+
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index a610016a017..aaa4404e7c5 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -97,9 +97,18 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = {
((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \
((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \
(sizeof(struct ixgbe_queue_stats) / sizeof(u64)))
-#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN)
#define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats)
-#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN)
+#define IXGBE_PB_STATS_LEN ( \
+ (((struct ixgbe_adapter *)netdev->priv)->flags & \
+ IXGBE_FLAG_DCB_ENABLED) ? \
+ (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \
+ sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \
+ sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \
+ sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
+ / sizeof(u64) : 0)
+#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
+ IXGBE_PB_STATS_LEN + \
+ IXGBE_QUEUE_STATS_LEN)
static int ixgbe_get_settings(struct net_device *netdev,
struct ethtool_cmd *ecmd)
@@ -831,6 +840,16 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
data[i + k] = queue_stat[k];
i += k;
}
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) {
+ data[i++] = adapter->stats.pxontxc[j];
+ data[i++] = adapter->stats.pxofftxc[j];
+ }
+ for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) {
+ data[i++] = adapter->stats.pxonrxc[j];
+ data[i++] = adapter->stats.pxoffrxc[j];
+ }
+ }
}
static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
@@ -859,6 +878,13 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
sprintf(p, "rx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
}
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+ sprintf(p, "tx_pb_%u_pxon", i);
+ }
+ for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) {
+ }
+ }
/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
break;
}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 40108523377..91dde9cdab6 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -404,7 +404,7 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter,
if (adapter->netdev->features & NETIF_F_LRO &&
skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (adapter->vlgrp && is_vlan)
+ if (adapter->vlgrp && is_vlan && (tag != 0))
lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb,
adapter->vlgrp, tag,
rx_desc);
@@ -413,12 +413,12 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter,
ring->lro_used = true;
} else {
if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) {
- if (adapter->vlgrp && is_vlan)
+ if (adapter->vlgrp && is_vlan && (tag != 0))
vlan_hwaccel_receive_skb(skb, adapter->vlgrp, tag);
else
netif_receive_skb(skb);
} else {
- if (adapter->vlgrp && is_vlan)
+ if (adapter->vlgrp && is_vlan && (tag != 0))
vlan_hwaccel_rx(skb, adapter->vlgrp, tag);
else
netif_rx(skb);
@@ -1670,10 +1670,12 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
* effects of setting this bit are only that SRRCTL must be
* fully programmed [0..15]
*/
- rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
- rdrxctl |= IXGBE_RDRXCTL_MVMEN;
- IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
-
+ if (adapter->flags &
+ (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED)) {
+ rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+ rdrxctl |= IXGBE_RDRXCTL_MVMEN;
+ IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
+ }
if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
/* Fill out redirection table */
@@ -1732,6 +1734,16 @@ static void ixgbe_vlan_rx_register(struct net_device *netdev,
ixgbe_irq_disable(adapter);
adapter->vlgrp = grp;
+ /*
+ * For a DCB driver, always enable VLAN tag stripping so we can
+ * still receive traffic from a DCB-enabled host even if we're
+ * not in DCB mode.
+ */
+ ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL);
+ ctrl |= IXGBE_VLNCTRL_VME;
+ ctrl &= ~IXGBE_VLNCTRL_CFIEN;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VLNCTRL, ctrl);
+
if (grp) {
/* enable VLAN tag insert/strip */
ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL);
@@ -1896,6 +1908,44 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
}
}
+#ifdef CONFIG_IXGBE_DCBNL
+/*
+ * ixgbe_configure_dcb - Configure DCB hardware
+ * @adapter: ixgbe adapter struct
+ *
+ * This is called by the driver on open to configure the DCB hardware.
+ * This is also called by the gennetlink interface when reconfiguring
+ * the DCB state.
+ */
+static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 txdctl, vlnctrl;
+ int i, j;
+
+ ixgbe_dcb_check_config(&adapter->dcb_cfg);
+ ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_TX_CONFIG);
+ ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_RX_CONFIG);
+
+ /* reconfigure the hardware */
+ ixgbe_dcb_hw_config(&adapter->hw, &adapter->dcb_cfg);
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ j = adapter->tx_ring[i].reg_idx;
+ txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(j));
+ /* PThresh workaround for Tx hang with DFP enabled. */
+ txdctl |= 32;
+ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(j), txdctl);
+ }
+ /* Enable VLAN tag insert/strip */
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+ vlnctrl |= IXGBE_VLNCTRL_VME | IXGBE_VLNCTRL_VFE;
+ vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+ hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true);
+}
+
+#endif
static void ixgbe_configure(struct ixgbe_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
@@ -1904,6 +1954,16 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
ixgbe_set_rx_mode(netdev);
ixgbe_restore_vlan(adapter);
+#ifdef CONFIG_IXGBE_DCBNL
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ netif_set_gso_max_size(netdev, 32768);
+ ixgbe_configure_dcb(adapter);
+ } else {
+ netif_set_gso_max_size(netdev, 65536);
+ }
+#else
+ netif_set_gso_max_size(netdev, 65536);
+#endif
ixgbe_configure_tx(adapter);
ixgbe_configure_rx(adapter);
@@ -1995,9 +2055,6 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
ixgbe_irq_enable(adapter);
- /* enable transmits */
- netif_tx_start_all_queues(netdev);
-
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -2260,6 +2317,11 @@ static void ixgbe_reset_task(struct work_struct *work)
struct ixgbe_adapter *adapter;
adapter = container_of(work, struct ixgbe_adapter, reset_task);
+ /* If we're already down or resetting, just bail */
+ if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+ test_bit(__IXGBE_RESETTING, &adapter->state))
+ return;
+
adapter->tx_timeout_count++;
ixgbe_reinit_locked(adapter);
@@ -2269,15 +2331,31 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
{
int nrq = 1, ntq = 1;
int feature_mask = 0, rss_i, rss_m;
+ int dcb_i, dcb_m;
/* Number of supported queues */
switch (adapter->hw.mac.type) {
case ixgbe_mac_82598EB:
+ dcb_i = adapter->ring_feature[RING_F_DCB].indices;
+ dcb_m = 0;
rss_i = adapter->ring_feature[RING_F_RSS].indices;
rss_m = 0;
feature_mask |= IXGBE_FLAG_RSS_ENABLED;
+ feature_mask |= IXGBE_FLAG_DCB_ENABLED;
switch (adapter->flags & feature_mask) {
+ case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED):
+ dcb_m = 0x7 << 3;
+ rss_i = min(8, rss_i);
+ rss_m = 0x7;
+ nrq = dcb_i * rss_i;
+ ntq = min(MAX_TX_QUEUES, dcb_i * rss_i);
+ break;
+ case (IXGBE_FLAG_DCB_ENABLED):
+ dcb_m = 0x7 << 3;
+ nrq = dcb_i;
+ ntq = dcb_i;
+ break;
case (IXGBE_FLAG_RSS_ENABLED):
rss_m = 0xF;
nrq = rss_i;
@@ -2285,6 +2363,8 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
break;
case 0:
default:
+ dcb_i = 0;
+ dcb_m = 0;
rss_i = 0;
rss_m = 0;
nrq = 1;
@@ -2292,6 +2372,12 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
break;
}
+ /* Sanity check, we should never have zero queues */
+ nrq = (nrq ?:1);
+ ntq = (ntq ?:1);
+
+ adapter->ring_feature[RING_F_DCB].indices = dcb_i;
+ adapter->ring_feature[RING_F_DCB].mask = dcb_m;
adapter->ring_feature[RING_F_RSS].indices = rss_i;
adapter->ring_feature[RING_F_RSS].mask = rss_m;
break;
@@ -2343,6 +2429,7 @@ static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
+ adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
ixgbe_set_num_queues(adapter);
} else {
@@ -2362,15 +2449,42 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
{
int feature_mask = 0, rss_i;
int i, txr_idx, rxr_idx;
+ int dcb_i;
/* Number of supported queues */
switch (adapter->hw.mac.type) {
case ixgbe_mac_82598EB:
+ dcb_i = adapter->ring_feature[RING_F_DCB].indices;
rss_i = adapter->ring_feature[RING_F_RSS].indices;
txr_idx = 0;
rxr_idx = 0;
+ feature_mask |= IXGBE_FLAG_DCB_ENABLED;
feature_mask |= IXGBE_FLAG_RSS_ENABLED;
switch (adapter->flags & feature_mask) {
+ case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED):
+ for (i = 0; i < dcb_i; i++) {
+ int j;
+ /* Rx first */
+ for (j = 0; j < adapter->num_rx_queues; j++) {
+ adapter->rx_ring[rxr_idx].reg_idx =
+ i << 3 | j;
+ rxr_idx++;
+ }
+ /* Tx now */
+ for (j = 0; j < adapter->num_tx_queues; j++) {
+ adapter->tx_ring[txr_idx].reg_idx =
+ i << 2 | (j >> 1);
+ if (j & 1)
+ txr_idx++;
+ }
+ }
+ case (IXGBE_FLAG_DCB_ENABLED):
+ /* the number of queues is assumed to be symmetric */
+ for (i = 0; i < dcb_i; i++) {
+ adapter->rx_ring[i].reg_idx = i << 3;
+ adapter->tx_ring[i].reg_idx = i << 2;
+ }
+ break;
case (IXGBE_FLAG_RSS_ENABLED):
for (i = 0; i < adapter->num_rx_queues; i++)
adapter->rx_ring[i].reg_idx = i;
@@ -2395,7 +2509,7 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
* number of queues at compile-time. The polling_netdev array is
* intended for Multiqueue, but should work fine with a single queue.
**/
-static int __devinit ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
+static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
{
int i;
@@ -2465,6 +2579,7 @@ static int __devinit ixgbe_set_interrupt_capability(struct ixgbe_adapter
adapter->msix_entries = kcalloc(v_budget,
sizeof(struct msix_entry), GFP_KERNEL);
if (!adapter->msix_entries) {
+ adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
ixgbe_set_num_queues(adapter);
kfree(adapter->tx_ring);
@@ -2505,7 +2620,7 @@ out:
return err;
}
-static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
+void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
{
if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
@@ -2529,7 +2644,7 @@ static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
* - Hardware queue count (num_*_queues)
* - defined by miscellaneous hardware support/features (RSS, etc.)
**/
-static int __devinit ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
+int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
{
int err;
@@ -2577,6 +2692,10 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
struct pci_dev *pdev = adapter->pdev;
unsigned int rss;
+#ifdef CONFIG_IXGBE_DCBNL
+ int j;
+ struct tc_configuration *tc;
+#endif
/* PCI config space info */
@@ -2590,6 +2709,27 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus());
adapter->ring_feature[RING_F_RSS].indices = rss;
adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
+ adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES;
+
+#ifdef CONFIG_IXGBE_DCBNL
+ /* Configure DCB traffic classes */
+ for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
+ tc = &adapter->dcb_cfg.tc_config[j];
+ tc->path[DCB_TX_CONFIG].bwg_id = 0;
+ tc->path[DCB_TX_CONFIG].bwg_percent = 12 + (j & 1);
+ tc->path[DCB_RX_CONFIG].bwg_id = 0;
+ tc->path[DCB_RX_CONFIG].bwg_percent = 12 + (j & 1);
+ tc->dcb_pfc = pfc_disabled;
+ }
+ adapter->dcb_cfg.bw_percentage[DCB_TX_CONFIG][0] = 100;
+ adapter->dcb_cfg.bw_percentage[DCB_RX_CONFIG][0] = 100;
+ adapter->dcb_cfg.rx_pba_cfg = pba_equal;
+ adapter->dcb_cfg.round_robin_enable = false;
+ adapter->dcb_set_bitmap = 0x00;
+ ixgbe_copy_dcb_cfg(&adapter->dcb_cfg, &adapter->temp_dcb_cfg,
+ adapter->ring_feature[RING_F_DCB].indices);
+
+#endif
if (hw->mac.ops.get_media_type &&
(hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper))
adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
@@ -2967,7 +3107,7 @@ static int ixgbe_close(struct net_device *netdev)
* @adapter: private struct
* helper function to napi_add each possible q_vector->napi
*/
-static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
+void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
{
int q_idx, q_vectors;
int (*poll)(struct napi_struct *, int);
@@ -2988,7 +3128,7 @@ static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
}
}
-static void ixgbe_napi_del_all(struct ixgbe_adapter *adapter)
+void ixgbe_napi_del_all(struct ixgbe_adapter *adapter)
{
int q_idx;
int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
@@ -3109,6 +3249,18 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
adapter->stats.mpc[i] += mpc;
total_mpc += adapter->stats.mpc[i];
adapter->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+ adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+ adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+ adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+ adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+ adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw,
+ IXGBE_PXONRXC(i));
+ adapter->stats.pxontxc[i] += IXGBE_READ_REG(hw,
+ IXGBE_PXONTXC(i));
+ adapter->stats.pxoffrxc[i] += IXGBE_READ_REG(hw,
+ IXGBE_PXOFFRXC(i));
+ adapter->stats.pxofftxc[i] += IXGBE_READ_REG(hw,
+ IXGBE_PXOFFTXC(i));
}
adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
/* work around hardware counting issue */
@@ -3248,6 +3400,7 @@ static void ixgbe_watchdog_task(struct work_struct *work)
(FLOW_TX ? "TX" : "None"))));
netif_carrier_on(netdev);
+ netif_tx_wake_all_queues(netdev);
} else {
/* Force detection of hung controller */
adapter->detect_tx_hung = true;
@@ -3258,6 +3411,7 @@ static void ixgbe_watchdog_task(struct work_struct *work)
if (netif_carrier_ok(netdev)) {
DPRINTK(LINK, INFO, "NIC Link is Down\n");
netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
}
}
@@ -3604,6 +3758,14 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
tx_flags |= vlan_tx_tag_get(skb);
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
+ tx_flags |= (skb->queue_mapping << 13);
+ }
+ tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
+ tx_flags |= IXGBE_TX_FLAGS_VLAN;
+ } else if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+ tx_flags |= (skb->queue_mapping << 13);
tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
tx_flags |= IXGBE_TX_FLAGS_VLAN;
}
@@ -3878,6 +4040,13 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
netdev->vlan_features |= NETIF_F_IP_CSUM;
netdev->vlan_features |= NETIF_F_SG;
+ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+ adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+
+#ifdef CONFIG_IXGBE_DCBNL
+ netdev->dcbnl_ops = &dcbnl_ops;
+#endif
+
if (pci_using_dac)
netdev->features |= NETIF_F_HIGHDMA;
@@ -3946,6 +4115,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
}
netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
ixgbe_napi_add_all(adapter);
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
new file mode 100644
index 00000000000..32d32c1ee41
--- /dev/null
+++ b/include/linux/dcbnl.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#ifndef __LINUX_DCBNL_H__
+#define __LINUX_DCBNL_H__
+
+#define DCB_PROTO_VERSION 1
+
+struct dcbmsg {
+ unsigned char dcb_family;
+ __u8 cmd;
+ __u16 dcb_pad;
+};
+
+/**
+ * enum dcbnl_commands - supported DCB commands
+ *
+ * @DCB_CMD_UNDEFINED: unspecified command to catch errors
+ * @DCB_CMD_GSTATE: request the state of DCB in the device
+ * @DCB_CMD_SSTATE: set the state of DCB in the device
+ * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx
+ * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx
+ * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx
+ * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx
+ * @DCB_CMD_PFC_GCFG: request the priority flow control configuration
+ * @DCB_CMD_PFC_SCFG: set the priority flow control configuration
+ * @DCB_CMD_SET_ALL: apply all changes to the underlying device
+ * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying
+ * device. Only useful when using bonding.
+ */
+enum dcbnl_commands {
+ DCB_CMD_UNDEFINED,
+
+ DCB_CMD_GSTATE,
+ DCB_CMD_SSTATE,
+
+ DCB_CMD_PGTX_GCFG,
+ DCB_CMD_PGTX_SCFG,
+ DCB_CMD_PGRX_GCFG,
+ DCB_CMD_PGRX_SCFG,
+
+ DCB_CMD_PFC_GCFG,
+ DCB_CMD_PFC_SCFG,
+
+ DCB_CMD_SET_ALL,
+ DCB_CMD_GPERM_HWADDR,
+
+ __DCB_CMD_ENUM_MAX,
+ DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
+};
+
+
+/**
+ * enum dcbnl_attrs - DCB top-level netlink attributes
+ *
+ * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING)
+ * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8)
+ * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8)
+ * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED)
+ * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8)
+ * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED)
+ * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8)
+ * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED)
+ */
+enum dcbnl_attrs {
+ DCB_ATTR_UNDEFINED,
+
+ DCB_ATTR_IFNAME,
+ DCB_ATTR_STATE,
+ DCB_ATTR_PFC_STATE,
+ DCB_ATTR_PFC_CFG,
+ DCB_ATTR_NUM_TC,
+ DCB_ATTR_PG_CFG,
+ DCB_ATTR_SET_ALL,
+ DCB_ATTR_PERM_HWADDR,
+
+ __DCB_ATTR_ENUM_MAX,
+ DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs
+ *
+ * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined
+ * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG)
+ *
+ */
+enum dcbnl_pfc_up_attrs {
+ DCB_PFC_UP_ATTR_UNDEFINED,
+
+ DCB_PFC_UP_ATTR_0,
+ DCB_PFC_UP_ATTR_1,
+ DCB_PFC_UP_ATTR_2,
+ DCB_PFC_UP_ATTR_3,
+ DCB_PFC_UP_ATTR_4,
+ DCB_PFC_UP_ATTR_5,
+ DCB_PFC_UP_ATTR_6,
+ DCB_PFC_UP_ATTR_7,
+ DCB_PFC_UP_ATTR_ALL,
+
+ __DCB_PFC_UP_ATTR_ENUM_MAX,
+ DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_pg_attrs - DCB Priority Group attributes
+ *
+ * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED)
+ * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG)
+ *
+ */
+enum dcbnl_pg_attrs {
+ DCB_PG_ATTR_UNDEFINED,
+
+ DCB_PG_ATTR_TC_0,
+ DCB_PG_ATTR_TC_1,
+ DCB_PG_ATTR_TC_2,
+ DCB_PG_ATTR_TC_3,
+ DCB_PG_ATTR_TC_4,
+ DCB_PG_ATTR_TC_5,
+ DCB_PG_ATTR_TC_6,
+ DCB_PG_ATTR_TC_7,
+ DCB_PG_ATTR_TC_MAX,
+ DCB_PG_ATTR_TC_ALL,
+
+ DCB_PG_ATTR_BW_ID_0,
+ DCB_PG_ATTR_BW_ID_1,
+ DCB_PG_ATTR_BW_ID_2,
+ DCB_PG_ATTR_BW_ID_3,
+ DCB_PG_ATTR_BW_ID_4,
+ DCB_PG_ATTR_BW_ID_5,
+ DCB_PG_ATTR_BW_ID_6,
+ DCB_PG_ATTR_BW_ID_7,
+ DCB_PG_ATTR_BW_ID_MAX,
+ DCB_PG_ATTR_BW_ID_ALL,
+
+ __DCB_PG_ATTR_ENUM_MAX,
+ DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_tc_attrs - DCB Traffic Class attributes
+ *
+ * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to
+ * Valid values are: 0-7
+ * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map
+ * Some devices may not support changing the
+ * user priority map of a TC.
+ * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting
+ * 0 - none
+ * 1 - group strict
+ * 2 - link strict
+ * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and
+ * not configured to use link strict priority,
+ * this is the percentage of bandwidth of the
+ * priority group this traffic class belongs to
+ * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters
+ *
+ */
+enum dcbnl_tc_attrs {
+ DCB_TC_ATTR_PARAM_UNDEFINED,
+
+ DCB_TC_ATTR_PARAM_PGID,
+ DCB_TC_ATTR_PARAM_UP_MAPPING,
+ DCB_TC_ATTR_PARAM_STRICT_PRIO,
+ DCB_TC_ATTR_PARAM_BW_PCT,
+ DCB_TC_ATTR_PARAM_ALL,
+
+ __DCB_TC_ATTR_PARAM_ENUM_MAX,
+ DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcb_general_attr_values - general DCB attribute values
+ *
+ * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported
+ *
+ */
+enum dcb_general_attr_values {
+ DCB_ATTR_VALUE_UNDEFINED = 0xff
+};
+
+
+#endif /* __LINUX_DCBNL_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8fb23679ee..6095af572df 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,6 +43,9 @@
#include <net/net_namespace.h>
#include <net/dsa.h>
+#ifdef CONFIG_DCBNL
+#include <net/dcbnl.h>
+#endif
struct vlan_group;
struct ethtool_ops;
@@ -843,6 +846,11 @@ struct net_device
#define GSO_MAX_SIZE 65536
unsigned int gso_max_size;
+#ifdef CONFIG_DCBNL
+ /* Data Center Bridging netlink ops */
+ struct dcbnl_rtnl_ops *dcbnl_ops;
+#endif
+
#ifdef CONFIG_COMPAT_NET_DEV_OPS
struct {
int (*init)(struct net_device *dev);
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 2b3d51c6ec9..e88f7058b3a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -107,6 +107,11 @@ enum {
RTM_GETADDRLABEL,
#define RTM_GETADDRLABEL RTM_GETADDRLABEL
+ RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+ RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
new file mode 100644
index 00000000000..0ef0c5a46d8
--- /dev/null
+++ b/include/net/dcbnl.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#ifndef __NET_DCBNL_H__
+#define __NET_DCBNL_H__
+
+/*
+ * Ops struct for the netlink callbacks. Used by DCB-enabled drivers through
+ * the netdevice struct.
+ */
+struct dcbnl_rtnl_ops {
+ u8 (*getstate)(struct net_device *);
+ void (*setstate)(struct net_device *, u8);
+ void (*getpermhwaddr)(struct net_device *, u8 *);
+ void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8);
+ void (*setpgbwgcfgtx)(struct net_device *, int, u8);
+ void (*setpgtccfgrx)(struct net_device *, int, u8, u8, u8, u8);
+ void (*setpgbwgcfgrx)(struct net_device *, int, u8);
+ void (*getpgtccfgtx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
+ void (*getpgbwgcfgtx)(struct net_device *, int, u8 *);
+ void (*getpgtccfgrx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
+ void (*getpgbwgcfgrx)(struct net_device *, int, u8 *);
+ void (*setpfccfg)(struct net_device *, int, u8);
+ void (*getpfccfg)(struct net_device *, int, u8 *);
+ u8 (*setall)(struct net_device *);
+};
+
+#endif /* __NET_DCBNL_H__ */
diff --git a/net/Kconfig b/net/Kconfig
index 4e2e40ba8ba..c7d01c3a23c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -194,6 +194,7 @@ source "net/lapb/Kconfig"
source "net/econet/Kconfig"
source "net/wanrouter/Kconfig"
source "net/sched/Kconfig"
+source "net/dcb/Kconfig"
menu "Network testing"
diff --git a/net/Makefile b/net/Makefile
index 27d1f10dc0e..83b064651f1 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -57,6 +57,9 @@ obj-$(CONFIG_NETLABEL) += netlabel/
obj-$(CONFIG_IUCV) += iucv/
obj-$(CONFIG_RFKILL) += rfkill/
obj-$(CONFIG_NET_9P) += 9p/
+ifeq ($(CONFIG_DCBNL),y)
+obj-$(CONFIG_DCB) += dcb/
+endif
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_SYSCTL) += sysctl_net.o
diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
new file mode 100644
index 00000000000..bdf38802d33
--- /dev/null
+++ b/net/dcb/Kconfig
@@ -0,0 +1,12 @@
+config DCB
+ tristate "Data Center Bridging support"
+
+config DCBNL
+ bool "Data Center Bridging netlink interface support"
+ depends on DCB
+ default n
+ ---help---
+ This option turns on the netlink interface
+ (dcbnl) for Data Center Bridging capable devices.
+
+ If unsure, say N.
diff --git a/net/dcb/Makefile b/net/dcb/Makefile
new file mode 100644
index 00000000000..9930f4cde81
--- /dev/null
+++ b/net/dcb/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DCB) += dcbnl.o
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
new file mode 100644
index 00000000000..516e8be83d7
--- /dev/null
+++ b/net/dcb/dcbnl.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <net/rtnetlink.h>
+#include <linux/dcbnl.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+
+/**
+ * Data Center Bridging (DCB) is a collection of Ethernet enhancements
+ * intended to allow network traffic with differing requirements
+ * (highly reliable, no drops vs. best effort vs. low latency) to operate
+ * and co-exist on Ethernet. Current DCB features are:
+ *
+ * Enhanced Transmission Selection (aka Priority Grouping [PG]) - provides a
+ * framework for assigning bandwidth guarantees to traffic classes.
+ *
+ * Priority-based Flow Control (PFC) - provides a flow control mechanism which
+ * can work independently for each 802.1p priority.
+ *
+ * Congestion Notification - provides a mechanism for end-to-end congestion
+ * control for protocols which do not have built-in congestion management.
+ *
+ * More information about the emerging standards for these Ethernet features
+ * can be found at: http://www.ieee802.org/1/pages/dcbridges.html
+ *
+ * This file implements an rtnetlink interface to allow configuration of DCB
+ * features for capable devices.
+ */
+
+MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
+MODULE_DESCRIPTION("Data Center Bridging generic netlink interface");
+MODULE_LICENSE("GPL");
+
+/**************** DCB attribute policies *************************************/
+
+/* DCB netlink attributes policy */
+static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
+ [DCB_ATTR_IFNAME] = {.type = NLA_STRING, .len = IFNAMSIZ - 1},
+ [DCB_ATTR_STATE] = {.type = NLA_U8},
+ [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED},
+ [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED},
+ [DCB_ATTR_SET_ALL] = {.type = NLA_U8},
+ [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
+};
+
+/* DCB priority flow control to User Priority nested attributes */
+static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
+ [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB priority grouping nested attributes */
+static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
+ [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_3] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_4] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_5] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_6] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_7] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_TC_ALL] = {.type = NLA_NESTED},
+ [DCB_PG_ATTR_BW_ID_0] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_1] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_2] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_3] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_4] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_5] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_6] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_7] = {.type = NLA_U8},
+ [DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB traffic class nested attributes. */
+static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
+ [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8},
+ [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8},
+ [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8},
+ [DCB_TC_ATTR_PARAM_BW_PCT] = {.type = NLA_U8},
+ [DCB_TC_ATTR_PARAM_ALL] = {.type = NLA_FLAG},
+};
+
+
+/* standard netlink reply call */
+static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
+ u32 seq, u16 flags)
+{
+ struct sk_buff *dcbnl_skb;
+ struct dcbmsg *dcb;
+ struct nlmsghdr *nlh;
+ int ret = -EINVAL;
+
+ dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!dcbnl_skb)
+ return ret;
+
+ nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags);
+
+ dcb = NLMSG_DATA(nlh);
+ dcb->dcb_family = AF_UNSPEC;
+ dcb->cmd = cmd;
+ dcb->dcb_pad = 0;
+
+ ret = nla_put_u8(dcbnl_skb, attr, value);
+ if (ret)
+ goto err;
+
+ /* end the message, assign the nlmsg_len. */
+ nlmsg_end(dcbnl_skb, nlh);
+ ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+ if (ret)
+ goto err;
+
+ return 0;
+nlmsg_failure:
+err:
+ kfree(dcbnl_skb);
+ return ret;
+}
+
+static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ int ret = -EINVAL;
+
+ /* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */
+ if (!netdev->dcbnl_ops->getstate)
+ return ret;
+
+ ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB,
+ DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags);
+
+ return ret;
+}
+
+static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ struct sk_buff *dcbnl_skb;
+ struct nlmsghdr *nlh;
+ struct dcbmsg *dcb;
+ struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest;
+ u8 value;
+ int ret = -EINVAL;
+ int i;
+ int getall = 0;
+
+ if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg)
+ return ret;
+
+ ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+ tb[DCB_ATTR_PFC_CFG],
+ dcbnl_pfc_up_nest);
+ if (ret)
+ goto err_out;
+
+ dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!dcbnl_skb)
+ goto err_out;
+
+ nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+ dcb = NLMSG_DATA(nlh);
+ dcb->dcb_family = AF_UNSPEC;
+ dcb->cmd = DCB_CMD_PFC_GCFG;
+
+ nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG);
+ if (!nest)
+ goto err;
+
+ if (data[DCB_PFC_UP_ATTR_ALL])
+ getall = 1;
+
+ for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+ if (!getall && !data[i])
+ continue;
+
+ netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0,
+ &value);
+ ret = nla_put_u8(dcbnl_skb, i, value);
+
+ if (ret) {
+ nla_nest_cancel(dcbnl_skb, nest);
+ goto err;
+ }
+ }
+ nla_nest_end(dcbnl_skb, nest);
+
+ nlmsg_end(dcbnl_skb, nlh);
+
+ ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+ if (ret)
+ goto err;
+
+ return 0;
+nlmsg_failure:
+err:
+ kfree(dcbnl_skb);
+err_out:
+ return -EINVAL;
+}
+
+static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ struct sk_buff *dcbnl_skb;
+ struct nlmsghdr *nlh;
+ struct dcbmsg *dcb;
+ u8 perm_addr[MAX_ADDR_LEN];
+ int ret = -EINVAL;
+
+ if (!netdev->dcbnl_ops->getpermhwaddr)
+ return ret;
+
+ dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!dcbnl_skb)
+ goto err_out;
+
+ nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+ dcb = NLMSG_DATA(nlh);
+ dcb->dcb_family = AF_UNSPEC;
+ dcb->cmd = DCB_CMD_GPERM_HWADDR;
+
+ netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr);
+
+ ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr),
+ perm_addr);
+
+ nlmsg_end(dcbnl_skb, nlh);
+
+ ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+ if (ret)
+ goto err;
+
+ return 0;
+
+nlmsg_failure:
+err:
+ kfree(dcbnl_skb);
+err_out:
+ return -EINVAL;
+}
+
+static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags, int dir)
+{
+ struct sk_buff *dcbnl_skb;
+ struct nlmsghdr *nlh;
+ struct dcbmsg *dcb;
+ struct nlattr *pg_nest, *param_nest, *data;
+ struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+ struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+ u8 prio, pgid, tc_pct, up_map;
+ int ret = -EINVAL;
+ int getall = 0;
+ int i;
+
+ if (!tb[DCB_ATTR_PG_CFG] ||
+ !netdev->dcbnl_ops->getpgtccfgtx ||
+ !netdev->dcbnl_ops->getpgtccfgrx ||
+ !netdev->dcbnl_ops->getpgbwgcfgtx ||
+ !netdev->dcbnl_ops->getpgbwgcfgrx)
+ return ret;
+
+ ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+ tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+
+ if (ret)
+ goto err_out;
+
+ dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!dcbnl_skb)
+ goto err_out;
+
+ nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+ dcb = NLMSG_DATA(nlh);
+ dcb->dcb_family = AF_UNSPEC;
+ dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG;
+
+ pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG);
+ if (!pg_nest)
+ goto err;
+
+ if (pg_tb[DCB_PG_ATTR_TC_ALL])
+ getall = 1;
+
+ for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+ if (!getall && !pg_tb[i])
+ continue;
+
+ if (pg_tb[DCB_PG_ATTR_TC_ALL])
+ data = pg_tb[DCB_PG_ATTR_TC_ALL];
+ else
+ data = pg_tb[i];
+ ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+ data, dcbnl_tc_param_nest);
+ if (ret)
+ goto err_pg;
+
+ param_nest = nla_nest_start(dcbnl_skb, i);
+ if (!param_nest)
+ goto err_pg;
+
+ pgid = DCB_ATTR_VALUE_UNDEFINED;
+ prio = DCB_ATTR_VALUE_UNDEFINED;
+ tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+ up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+ if (dir) {
+ /* Rx */
+ netdev->dcbnl_ops->getpgtccfgrx(netdev,
+ i - DCB_PG_ATTR_TC_0, &prio,
+ &pgid, &tc_pct, &up_map);
+ } else {
+ /* Tx */
+ netdev->dcbnl_ops->getpgtccfgtx(netdev,
+ i - DCB_PG_ATTR_TC_0, &prio,
+ &pgid, &tc_pct, &up_map);
+ }
+
+ if (param_tb[DCB_TC_ATTR_PARAM_PGID] ||
+ param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+ ret = nla_put_u8(dcbnl_skb,
+ DCB_TC_ATTR_PARAM_PGID, pgid);
+ if (ret)
+ goto err_param;
+ }
+ if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] ||
+ param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+ ret = nla_put_u8(dcbnl_skb,
+ DCB_TC_ATTR_PARAM_UP_MAPPING, up_map);
+ if (ret)
+ goto err_param;
+ }
+ if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] ||
+ param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+ ret = nla_put_u8(dcbnl_skb,
+ DCB_TC_ATTR_PARAM_STRICT_PRIO, prio);
+ if (ret)
+ goto err_param;
+ }
+ if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] ||
+ param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+ ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT,
+ tc_pct);
+ if (ret)
+ goto err_param;
+ }
+ nla_nest_end(dcbnl_skb, param_nest);
+ }
+
+ if (pg_tb[DCB_PG_ATTR_BW_ID_ALL])
+ getall = 1;
+ else
+ getall = 0;
+
+ for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+ if (!getall && !pg_tb[i])
+ continue;
+
+ tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+
+ if (dir) {
+ /* Rx */
+ netdev->dcbnl_ops->getpgbwgcfgrx(netdev,
+ i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+ } else {
+ /* Tx */
+ netdev->dcbnl_ops->getpgbwgcfgtx(netdev,
+ i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+ }
+ ret = nla_put_u8(dcbnl_skb, i, tc_pct);
+
+ if (ret)
+ goto err_pg;
+ }
+
+ nla_nest_end(dcbnl_skb, pg_nest);
+
+ nlmsg_end(dcbnl_skb, nlh);
+
+ ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err_param:
+ nla_nest_cancel(dcbnl_skb, param_nest);
+err_pg:
+ nla_nest_cancel(dcbnl_skb, pg_nest);
+nlmsg_failure:
+err:
+ kfree(dcbnl_skb);
+err_out:
+ ret = -EINVAL;
+ return ret;
+}
+
+static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ int ret = -EINVAL;
+ u8 value;
+
+ if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate)
+ return ret;
+
+ value = nla_get_u8(tb[DCB_ATTR_STATE]);
+
+ netdev->dcbnl_ops->setstate(netdev, value);
+
+ ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE,
+ pid, seq, flags);
+
+ return ret;
+}
+
+static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1];
+ int i;
+ int ret = -EINVAL;
+ u8 value;
+
+ if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg)
+ return ret;
+
+ ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+ tb[DCB_ATTR_PFC_CFG],
+ dcbnl_pfc_up_nest);
+ if (ret)
+ goto err;
+
+ for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+ if (data[i] == NULL)
+ continue;
+ value = nla_get_u8(data[i]);
+ netdev->dcbnl_ops->setpfccfg(netdev,
+ data[i]->nla_type - DCB_PFC_UP_ATTR_0, value);
+ }
+
+ ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG,
+ pid, seq, flags);
+err:
+ return ret;
+}
+
+static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ int ret = -EINVAL;
+
+ if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall)
+ return ret;
+
+ ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB,
+ DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags);
+
+ return ret;
+}
+
+static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags, int dir)
+{
+ struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+ struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+ int ret = -EINVAL;
+ int i;
+ u8 pgid;
+ u8 up_map;
+ u8 prio;
+ u8 tc_pct;
+
+ if (!tb[DCB_ATTR_PG_CFG] ||
+ !netdev->dcbnl_ops->setpgtccfgtx ||
+ !netdev->dcbnl_ops->setpgtccfgrx ||
+ !netdev->dcbnl_ops->setpgbwgcfgtx ||
+ !netdev->dcbnl_ops->setpgbwgcfgrx)
+ return ret;
+
+ ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+ tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+ if (ret)
+ goto err;
+
+ for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+ if (!pg_tb[i])
+ continue;
+
+ ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+ pg_tb[i], dcbnl_tc_param_nest);
+ if (ret)
+ goto err;
+
+ pgid = DCB_ATTR_VALUE_UNDEFINED;
+ prio = DCB_ATTR_VALUE_UNDEFINED;
+ tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+ up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+ if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO])
+ prio =
+ nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]);
+
+ if (param_tb[DCB_TC_ATTR_PARAM_PGID])
+ pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]);
+
+ if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT])
+ tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]);
+
+ if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING])
+ up_map =
+ nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]);
+
+ /* dir: Tx = 0, Rx = 1 */
+ if (dir) {
+ /* Rx */
+ netdev->dcbnl_ops->setpgtccfgrx(netdev,
+ i - DCB_PG_ATTR_TC_0,
+ prio, pgid, tc_pct, up_map);
+ } else {
+ /* Tx */
+ netdev->dcbnl_ops->setpgtccfgtx(netdev,
+ i - DCB_PG_ATTR_TC_0,
+ prio, pgid, tc_pct, up_map);
+ }
+ }
+
+ for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+ if (!pg_tb[i])
+ continue;
+
+ tc_pct = nla_get_u8(pg_tb[i]);
+
+ /* dir: Tx = 0, Rx = 1 */
+ if (dir) {
+ /* Rx */
+ netdev->dcbnl_ops->setpgbwgcfgrx(netdev,
+ i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+ } else {
+ /* Tx */
+ netdev->dcbnl_ops->setpgbwgcfgtx(netdev,
+ i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+ }
+ }
+
+ ret = dcbnl_reply(0, RTM_SETDCB,
+ (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG),
+ DCB_ATTR_PG_CFG, pid, seq, flags);
+
+err:
+ return ret;
+}
+
+static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb,
+ u32 pid, u32 seq, u16 flags)
+{
+ return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ struct dcbmsg *dcb = (struct dcbmsg *)NLMSG_DATA(nlh);
+ struct nlattr *tb[DCB_ATTR_MAX + 1];
+ u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+ int ret = -EINVAL;
+
+ if (net != &init_net)
+ return -EINVAL;
+
+ ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
+ dcbnl_rtnl_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[DCB_ATTR_IFNAME])
+ return -EINVAL;
+
+ netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME]));
+ if (!netdev)
+ return -EINVAL;
+
+ if (!netdev->dcbnl_ops)
+ goto errout;
+
+ switch (dcb->cmd) {
+ case DCB_CMD_GSTATE:
+ ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_PFC_GCFG:
+ ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_GPERM_HWADDR:
+ ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_PGTX_GCFG:
+ ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_PGRX_GCFG:
+ ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_SSTATE:
+ ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_PFC_SCFG:
+ ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+
+ case DCB_CMD_SET_ALL:
+ ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_PGTX_SCFG:
+ ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ case DCB_CMD_PGRX_SCFG:
+ ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+ nlh->nlmsg_flags);
+ goto out;
+ default:
+ goto errout;
+ }
+errout:
+ ret = -EINVAL;
+out:
+ dev_put(netdev);
+ return ret;
+}
+
+static int __init dcbnl_init(void)
+{
+ rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
+ rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
+
+ return 0;
+}
+module_init(dcbnl_init);
+
+static void __exit dcbnl_exit(void)
+{
+ rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
+ rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
+}
+module_exit(dcbnl_exit);
+
+