1/*
2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/module.h>
37#include <linux/init.h>
38#include <linux/errno.h>
39#include <linux/pci.h>
40#include <linux/dma-mapping.h>
41#include <linux/slab.h>
42#include <linux/io-mapping.h>
43#include <linux/delay.h>
44#include <linux/kmod.h>
45
46#include <linux/mlx4/device.h>
47#include <linux/mlx4/doorbell.h>
48
49#include "mlx4.h"
50#include "fw.h"
51#include "icm.h"
52
53MODULE_AUTHOR("Roland Dreier");
54MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
55MODULE_LICENSE("Dual BSD/GPL");
56MODULE_VERSION(DRV_VERSION);
57
58struct workqueue_struct *mlx4_wq;
59
60#ifdef CONFIG_MLX4_DEBUG
61
62int mlx4_debug_level = 0;
63module_param_named(debug_level, mlx4_debug_level, int, 0644);
64MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
65
66#endif /* CONFIG_MLX4_DEBUG */
67
68#ifdef CONFIG_PCI_MSI
69
70static int msi_x = 1;
71module_param(msi_x, int, 0444);
72MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
73
74#else /* CONFIG_PCI_MSI */
75
76#define msi_x (0)
77
78#endif /* CONFIG_PCI_MSI */
79
80static uint8_t num_vfs[3] = {0, 0, 0};
81static int num_vfs_argc;
82module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
83MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
84			  "num_vfs=port1,port2,port1+2");
85
86static uint8_t probe_vf[3] = {0, 0, 0};
87static int probe_vfs_argc;
88module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
89MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
90			   "probe_vf=port1,port2,port1+2");
91
92int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
93module_param_named(log_num_mgm_entry_size,
94			mlx4_log_num_mgm_entry_size, int, 0444);
95MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
96					 " of qp per mcg, for example:"
97					 " 10 gives 248.range: 7 <="
98					 " log_num_mgm_entry_size <= 12."
99					 " To activate device managed"
100					 " flow steering when available, set to -1");
101
102static bool enable_64b_cqe_eqe = true;
103module_param(enable_64b_cqe_eqe, bool, 0444);
104MODULE_PARM_DESC(enable_64b_cqe_eqe,
105		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
106
107#define PF_CONTEXT_BEHAVIOUR_MASK	(MLX4_FUNC_CAP_64B_EQE_CQE | \
108					 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
109					 MLX4_FUNC_CAP_DMFS_A0_STATIC)
110
111#define RESET_PERSIST_MASK_FLAGS	(MLX4_FLAG_SRIOV)
112
113static char mlx4_version[] =
114	DRV_NAME ": Mellanox ConnectX core driver v"
115	DRV_VERSION " (" DRV_RELDATE ")\n";
116
117static struct mlx4_profile default_profile = {
118	.num_qp		= 1 << 18,
119	.num_srq	= 1 << 16,
120	.rdmarc_per_qp	= 1 << 4,
121	.num_cq		= 1 << 16,
122	.num_mcg	= 1 << 13,
123	.num_mpt	= 1 << 19,
124	.num_mtt	= 1 << 20, /* It is really num mtt segements */
125};
126
127static struct mlx4_profile low_mem_profile = {
128	.num_qp		= 1 << 17,
129	.num_srq	= 1 << 6,
130	.rdmarc_per_qp	= 1 << 4,
131	.num_cq		= 1 << 8,
132	.num_mcg	= 1 << 8,
133	.num_mpt	= 1 << 9,
134	.num_mtt	= 1 << 7,
135};
136
137static int log_num_mac = 7;
138module_param_named(log_num_mac, log_num_mac, int, 0444);
139MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
140
141static int log_num_vlan;
142module_param_named(log_num_vlan, log_num_vlan, int, 0444);
143MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
144/* Log2 max number of VLANs per ETH port (0-7) */
145#define MLX4_LOG_NUM_VLANS 7
146#define MLX4_MIN_LOG_NUM_VLANS 0
147#define MLX4_MIN_LOG_NUM_MAC 1
148
149static bool use_prio;
150module_param_named(use_prio, use_prio, bool, 0444);
151MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
152
153int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
154module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
155MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
156
157static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
158static int arr_argc = 2;
159module_param_array(port_type_array, int, &arr_argc, 0444);
160MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
161				"1 for IB, 2 for Ethernet");
162
163struct mlx4_port_config {
164	struct list_head list;
165	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
166	struct pci_dev *pdev;
167};
168
169static atomic_t pf_loading = ATOMIC_INIT(0);
170
171int mlx4_check_port_params(struct mlx4_dev *dev,
172			   enum mlx4_port_type *port_type)
173{
174	int i;
175
176	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
177		for (i = 0; i < dev->caps.num_ports - 1; i++) {
178			if (port_type[i] != port_type[i + 1]) {
179				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
180				return -EINVAL;
181			}
182		}
183	}
184
185	for (i = 0; i < dev->caps.num_ports; i++) {
186		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
187			mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
188				 i + 1);
189			return -EINVAL;
190		}
191	}
192	return 0;
193}
194
195static void mlx4_set_port_mask(struct mlx4_dev *dev)
196{
197	int i;
198
199	for (i = 1; i <= dev->caps.num_ports; ++i)
200		dev->caps.port_mask[i] = dev->caps.port_type[i];
201}
202
203enum {
204	MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
205};
206
207static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
208{
209	int err = 0;
210	struct mlx4_func func;
211
212	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
213		err = mlx4_QUERY_FUNC(dev, &func, 0);
214		if (err) {
215			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
216			return err;
217		}
218		dev_cap->max_eqs = func.max_eq;
219		dev_cap->reserved_eqs = func.rsvd_eqs;
220		dev_cap->reserved_uars = func.rsvd_uars;
221		err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
222	}
223	return err;
224}
225
226static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
227{
228	struct mlx4_caps *dev_cap = &dev->caps;
229
230	/* FW not supporting or cancelled by user */
231	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
232	    !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
233		return;
234
235	/* Must have 64B CQE_EQE enabled by FW to use bigger stride
236	 * When FW has NCSI it may decide not to report 64B CQE/EQEs
237	 */
238	if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
239	    !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
240		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
241		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
242		return;
243	}
244
245	if (cache_line_size() == 128 || cache_line_size() == 256) {
246		mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
247		/* Changing the real data inside CQE size to 32B */
248		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
249		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
250
251		if (mlx4_is_master(dev))
252			dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
253	} else {
254		if (cache_line_size() != 32  && cache_line_size() != 64)
255			mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
256		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
257		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
258	}
259}
260
261static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
262			  struct mlx4_port_cap *port_cap)
263{
264	dev->caps.vl_cap[port]	    = port_cap->max_vl;
265	dev->caps.ib_mtu_cap[port]	    = port_cap->ib_mtu;
266	dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
267	dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
268	/* set gid and pkey table operating lengths by default
269	 * to non-sriov values
270	 */
271	dev->caps.gid_table_len[port]  = port_cap->max_gids;
272	dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
273	dev->caps.port_width_cap[port] = port_cap->max_port_width;
274	dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
275	dev->caps.def_mac[port]        = port_cap->def_mac;
276	dev->caps.supported_type[port] = port_cap->supported_port_types;
277	dev->caps.suggested_type[port] = port_cap->suggested_type;
278	dev->caps.default_sense[port] = port_cap->default_sense;
279	dev->caps.trans_type[port]	    = port_cap->trans_type;
280	dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
281	dev->caps.wavelength[port]     = port_cap->wavelength;
282	dev->caps.trans_code[port]     = port_cap->trans_code;
283
284	return 0;
285}
286
287static int mlx4_dev_port(struct mlx4_dev *dev, int port,
288			 struct mlx4_port_cap *port_cap)
289{
290	int err = 0;
291
292	err = mlx4_QUERY_PORT(dev, port, port_cap);
293
294	if (err)
295		mlx4_err(dev, "QUERY_PORT command failed.\n");
296
297	return err;
298}
299
300static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
301{
302	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
303		return;
304
305	if (mlx4_is_mfunc(dev)) {
306		mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
307		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
308		return;
309	}
310
311	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
312		mlx4_dbg(dev,
313			 "Keep FCS is not supported - Disabling Ignore FCS");
314		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
315		return;
316	}
317}
318
319#define MLX4_A0_STEERING_TABLE_SIZE	256
320static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
321{
322	int err;
323	int i;
324
325	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
326	if (err) {
327		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
328		return err;
329	}
330	mlx4_dev_cap_dump(dev, dev_cap);
331
332	if (dev_cap->min_page_sz > PAGE_SIZE) {
333		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
334			 dev_cap->min_page_sz, PAGE_SIZE);
335		return -ENODEV;
336	}
337	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
338		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
339			 dev_cap->num_ports, MLX4_MAX_PORTS);
340		return -ENODEV;
341	}
342
343	if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
344		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
345			 dev_cap->uar_size,
346			 (unsigned long long)
347			 pci_resource_len(dev->persist->pdev, 2));
348		return -ENODEV;
349	}
350
351	dev->caps.num_ports	     = dev_cap->num_ports;
352	dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
353	dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
354				      dev->caps.num_sys_eqs :
355				      MLX4_MAX_EQ_NUM;
356	for (i = 1; i <= dev->caps.num_ports; ++i) {
357		err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
358		if (err) {
359			mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
360			return err;
361		}
362	}
363
364	dev->caps.uar_page_size	     = PAGE_SIZE;
365	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
366	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
367	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
368	dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
369	dev->caps.max_sq_sg	     = dev_cap->max_sq_sg;
370	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
371	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
372	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
373	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
374	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
375	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
376	dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
377	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
378	/*
379	 * Subtract 1 from the limit because we need to allocate a
380	 * spare CQE so the HCA HW can tell the difference between an
381	 * empty CQ and a full CQ.
382	 */
383	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
384	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
385	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
386	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
387	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
388
389	/* The first 128 UARs are used for EQ doorbells */
390	dev->caps.reserved_uars	     = max_t(int, 128, dev_cap->reserved_uars);
391	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
392	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
393					dev_cap->reserved_xrcds : 0;
394	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
395					dev_cap->max_xrcds : 0;
396	dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
397
398	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
399	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
400	dev->caps.flags		     = dev_cap->flags;
401	dev->caps.flags2	     = dev_cap->flags2;
402	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
403	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
404	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
405	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
406	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
407
408	/* Sense port always allowed on supported devices for ConnectX-1 and -2 */
409	if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
410		dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
411	/* Don't do sense port on multifunction devices (for now at least) */
412	if (mlx4_is_mfunc(dev))
413		dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
414
415	if (mlx4_low_memory_profile()) {
416		dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
417		dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
418	} else {
419		dev->caps.log_num_macs  = log_num_mac;
420		dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
421	}
422
423	for (i = 1; i <= dev->caps.num_ports; ++i) {
424		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
425		if (dev->caps.supported_type[i]) {
426			/* if only ETH is supported - assign ETH */
427			if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
428				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
429			/* if only IB is supported, assign IB */
430			else if (dev->caps.supported_type[i] ==
431				 MLX4_PORT_TYPE_IB)
432				dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
433			else {
434				/* if IB and ETH are supported, we set the port
435				 * type according to user selection of port type;
436				 * if user selected none, take the FW hint */
437				if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
438					dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
439						MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
440				else
441					dev->caps.port_type[i] = port_type_array[i - 1];
442			}
443		}
444		/*
445		 * Link sensing is allowed on the port if 3 conditions are true:
446		 * 1. Both protocols are supported on the port.
447		 * 2. Different types are supported on the port
448		 * 3. FW declared that it supports link sensing
449		 */
450		mlx4_priv(dev)->sense.sense_allowed[i] =
451			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
452			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
453			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
454
455		/*
456		 * If "default_sense" bit is set, we move the port to "AUTO" mode
457		 * and perform sense_port FW command to try and set the correct
458		 * port type from beginning
459		 */
460		if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
461			enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
462			dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
463			mlx4_SENSE_PORT(dev, i, &sensed_port);
464			if (sensed_port != MLX4_PORT_TYPE_NONE)
465				dev->caps.port_type[i] = sensed_port;
466		} else {
467			dev->caps.possible_type[i] = dev->caps.port_type[i];
468		}
469
470		if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
471			dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
472			mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
473				  i, 1 << dev->caps.log_num_macs);
474		}
475		if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
476			dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
477			mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
478				  i, 1 << dev->caps.log_num_vlans);
479		}
480	}
481
482	dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
483
484	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
485	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
486		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
487		(1 << dev->caps.log_num_macs) *
488		(1 << dev->caps.log_num_vlans) *
489		dev->caps.num_ports;
490	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
491
492	if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
493	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
494		dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
495	else
496		dev->caps.dmfs_high_rate_qpn_base =
497			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
498
499	if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
500	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
501		dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
502		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
503		dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
504	} else {
505		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
506		dev->caps.dmfs_high_rate_qpn_base =
507			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
508		dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
509	}
510
511	dev->caps.rl_caps = dev_cap->rl_caps;
512
513	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
514		dev->caps.dmfs_high_rate_qpn_range;
515
516	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
517		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
518		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
519		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
520
521	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
522
523	if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
524		if (dev_cap->flags &
525		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
526			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
527			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
528			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
529		}
530
531		if (dev_cap->flags2 &
532		    (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
533		     MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
534			mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
535			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
536			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
537		}
538	}
539
540	if ((dev->caps.flags &
541	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
542	    mlx4_is_master(dev))
543		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
544
545	if (!mlx4_is_slave(dev)) {
546		mlx4_enable_cqe_eqe_stride(dev);
547		dev->caps.alloc_res_qp_mask =
548			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
549			MLX4_RESERVE_A0_QP;
550
551		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
552		    dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
553			mlx4_warn(dev, "Old device ETS support detected\n");
554			mlx4_warn(dev, "Consider upgrading device FW.\n");
555			dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
556		}
557
558	} else {
559		dev->caps.alloc_res_qp_mask = 0;
560	}
561
562	mlx4_enable_ignore_fcs(dev);
563
564	return 0;
565}
566
567static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
568				       enum pci_bus_speed *speed,
569				       enum pcie_link_width *width)
570{
571	u32 lnkcap1, lnkcap2;
572	int err1, err2;
573
574#define  PCIE_MLW_CAP_SHIFT 4	/* start of MLW mask in link capabilities */
575
576	*speed = PCI_SPEED_UNKNOWN;
577	*width = PCIE_LNK_WIDTH_UNKNOWN;
578
579	err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
580					  &lnkcap1);
581	err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
582					  &lnkcap2);
583	if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
584		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
585			*speed = PCIE_SPEED_8_0GT;
586		else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
587			*speed = PCIE_SPEED_5_0GT;
588		else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
589			*speed = PCIE_SPEED_2_5GT;
590	}
591	if (!err1) {
592		*width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
593		if (!lnkcap2) { /* pre-r3.0 */
594			if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
595				*speed = PCIE_SPEED_5_0GT;
596			else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
597				*speed = PCIE_SPEED_2_5GT;
598		}
599	}
600
601	if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
602		return err1 ? err1 :
603			err2 ? err2 : -EINVAL;
604	}
605	return 0;
606}
607
608static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
609{
610	enum pcie_link_width width, width_cap;
611	enum pci_bus_speed speed, speed_cap;
612	int err;
613
614#define PCIE_SPEED_STR(speed) \
615	(speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
616	 speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
617	 speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
618	 "Unknown")
619
620	err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
621	if (err) {
622		mlx4_warn(dev,
623			  "Unable to determine PCIe device BW capabilities\n");
624		return;
625	}
626
627	err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
628	if (err || speed == PCI_SPEED_UNKNOWN ||
629	    width == PCIE_LNK_WIDTH_UNKNOWN) {
630		mlx4_warn(dev,
631			  "Unable to determine PCI device chain minimum BW\n");
632		return;
633	}
634
635	if (width != width_cap || speed != speed_cap)
636		mlx4_warn(dev,
637			  "PCIe BW is different than device's capability\n");
638
639	mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
640		  PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
641	mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
642		  width, width_cap);
643	return;
644}
645
646/*The function checks if there are live vf, return the num of them*/
647static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
648{
649	struct mlx4_priv *priv = mlx4_priv(dev);
650	struct mlx4_slave_state *s_state;
651	int i;
652	int ret = 0;
653
654	for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
655		s_state = &priv->mfunc.master.slave_state[i];
656		if (s_state->active && s_state->last_cmd !=
657		    MLX4_COMM_CMD_RESET) {
658			mlx4_warn(dev, "%s: slave: %d is still active\n",
659				  __func__, i);
660			ret++;
661		}
662	}
663	return ret;
664}
665
666int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
667{
668	u32 qk = MLX4_RESERVED_QKEY_BASE;
669
670	if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
671	    qpn < dev->phys_caps.base_proxy_sqpn)
672		return -EINVAL;
673
674	if (qpn >= dev->phys_caps.base_tunnel_sqpn)
675		/* tunnel qp */
676		qk += qpn - dev->phys_caps.base_tunnel_sqpn;
677	else
678		qk += qpn - dev->phys_caps.base_proxy_sqpn;
679	*qkey = qk;
680	return 0;
681}
682EXPORT_SYMBOL(mlx4_get_parav_qkey);
683
684void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
685{
686	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
687
688	if (!mlx4_is_master(dev))
689		return;
690
691	priv->virt2phys_pkey[slave][port - 1][i] = val;
692}
693EXPORT_SYMBOL(mlx4_sync_pkey_table);
694
695void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
696{
697	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
698
699	if (!mlx4_is_master(dev))
700		return;
701
702	priv->slave_node_guids[slave] = guid;
703}
704EXPORT_SYMBOL(mlx4_put_slave_node_guid);
705
706__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
707{
708	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
709
710	if (!mlx4_is_master(dev))
711		return 0;
712
713	return priv->slave_node_guids[slave];
714}
715EXPORT_SYMBOL(mlx4_get_slave_node_guid);
716
717int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
718{
719	struct mlx4_priv *priv = mlx4_priv(dev);
720	struct mlx4_slave_state *s_slave;
721
722	if (!mlx4_is_master(dev))
723		return 0;
724
725	s_slave = &priv->mfunc.master.slave_state[slave];
726	return !!s_slave->active;
727}
728EXPORT_SYMBOL(mlx4_is_slave_active);
729
730static void slave_adjust_steering_mode(struct mlx4_dev *dev,
731				       struct mlx4_dev_cap *dev_cap,
732				       struct mlx4_init_hca_param *hca_param)
733{
734	dev->caps.steering_mode = hca_param->steering_mode;
735	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
736		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
737		dev->caps.fs_log_max_ucast_qp_range_size =
738			dev_cap->fs_log_max_ucast_qp_range_size;
739	} else
740		dev->caps.num_qp_per_mgm =
741			4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
742
743	mlx4_dbg(dev, "Steering mode is: %s\n",
744		 mlx4_steering_mode_str(dev->caps.steering_mode));
745}
746
747static int mlx4_slave_cap(struct mlx4_dev *dev)
748{
749	int			   err;
750	u32			   page_size;
751	struct mlx4_dev_cap	   dev_cap;
752	struct mlx4_func_cap	   func_cap;
753	struct mlx4_init_hca_param hca_param;
754	u8			   i;
755
756	memset(&hca_param, 0, sizeof(hca_param));
757	err = mlx4_QUERY_HCA(dev, &hca_param);
758	if (err) {
759		mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
760		return err;
761	}
762
763	/* fail if the hca has an unknown global capability
764	 * at this time global_caps should be always zeroed
765	 */
766	if (hca_param.global_caps) {
767		mlx4_err(dev, "Unknown hca global capabilities\n");
768		return -ENOSYS;
769	}
770
771	mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
772
773	dev->caps.hca_core_clock = hca_param.hca_core_clock;
774
775	memset(&dev_cap, 0, sizeof(dev_cap));
776	dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
777	err = mlx4_dev_cap(dev, &dev_cap);
778	if (err) {
779		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
780		return err;
781	}
782
783	err = mlx4_QUERY_FW(dev);
784	if (err)
785		mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
786
787	page_size = ~dev->caps.page_size_cap + 1;
788	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
789	if (page_size > PAGE_SIZE) {
790		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
791			 page_size, PAGE_SIZE);
792		return -ENODEV;
793	}
794
795	/* slave gets uar page size from QUERY_HCA fw command */
796	dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
797
798	/* TODO: relax this assumption */
799	if (dev->caps.uar_page_size != PAGE_SIZE) {
800		mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
801			 dev->caps.uar_page_size, PAGE_SIZE);
802		return -ENODEV;
803	}
804
805	memset(&func_cap, 0, sizeof(func_cap));
806	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
807	if (err) {
808		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
809			 err);
810		return err;
811	}
812
813	if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
814	    PF_CONTEXT_BEHAVIOUR_MASK) {
815		mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
816			 func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
817		return -ENOSYS;
818	}
819
820	dev->caps.num_ports		= func_cap.num_ports;
821	dev->quotas.qp			= func_cap.qp_quota;
822	dev->quotas.srq			= func_cap.srq_quota;
823	dev->quotas.cq			= func_cap.cq_quota;
824	dev->quotas.mpt			= func_cap.mpt_quota;
825	dev->quotas.mtt			= func_cap.mtt_quota;
826	dev->caps.num_qps		= 1 << hca_param.log_num_qps;
827	dev->caps.num_srqs		= 1 << hca_param.log_num_srqs;
828	dev->caps.num_cqs		= 1 << hca_param.log_num_cqs;
829	dev->caps.num_mpts		= 1 << hca_param.log_mpt_sz;
830	dev->caps.num_eqs		= func_cap.max_eq;
831	dev->caps.reserved_eqs		= func_cap.reserved_eq;
832	dev->caps.reserved_lkey		= func_cap.reserved_lkey;
833	dev->caps.num_pds               = MLX4_NUM_PDS;
834	dev->caps.num_mgms              = 0;
835	dev->caps.num_amgms             = 0;
836
837	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
838		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
839			 dev->caps.num_ports, MLX4_MAX_PORTS);
840		return -ENODEV;
841	}
842
843	dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
844	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
845	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
846	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
847	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
848
849	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
850	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
851	    !dev->caps.qp0_qkey) {
852		err = -ENOMEM;
853		goto err_mem;
854	}
855
856	for (i = 1; i <= dev->caps.num_ports; ++i) {
857		err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
858		if (err) {
859			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
860				 i, err);
861			goto err_mem;
862		}
863		dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
864		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
865		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
866		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
867		dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
868		dev->caps.port_mask[i] = dev->caps.port_type[i];
869		dev->caps.phys_port_id[i] = func_cap.phys_port_id;
870		if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
871						    &dev->caps.gid_table_len[i],
872						    &dev->caps.pkey_table_len[i]))
873			goto err_mem;
874	}
875
876	if (dev->caps.uar_page_size * (dev->caps.num_uars -
877				       dev->caps.reserved_uars) >
878				       pci_resource_len(dev->persist->pdev,
879							2)) {
880		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
881			 dev->caps.uar_page_size * dev->caps.num_uars,
882			 (unsigned long long)
883			 pci_resource_len(dev->persist->pdev, 2));
884		goto err_mem;
885	}
886
887	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
888		dev->caps.eqe_size   = 64;
889		dev->caps.eqe_factor = 1;
890	} else {
891		dev->caps.eqe_size   = 32;
892		dev->caps.eqe_factor = 0;
893	}
894
895	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
896		dev->caps.cqe_size   = 64;
897		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
898	} else {
899		dev->caps.cqe_size   = 32;
900	}
901
902	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
903		dev->caps.eqe_size = hca_param.eqe_size;
904		dev->caps.eqe_factor = 0;
905	}
906
907	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
908		dev->caps.cqe_size = hca_param.cqe_size;
909		/* User still need to know when CQE > 32B */
910		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
911	}
912
913	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
914	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
915
916	slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
917	mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
918		 hca_param.rss_ip_frags ? "on" : "off");
919
920	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
921	    dev->caps.bf_reg_size)
922		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
923
924	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
925		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
926
927	return 0;
928
929err_mem:
930	kfree(dev->caps.qp0_qkey);
931	kfree(dev->caps.qp0_tunnel);
932	kfree(dev->caps.qp0_proxy);
933	kfree(dev->caps.qp1_tunnel);
934	kfree(dev->caps.qp1_proxy);
935	dev->caps.qp0_qkey = NULL;
936	dev->caps.qp0_tunnel = NULL;
937	dev->caps.qp0_proxy = NULL;
938	dev->caps.qp1_tunnel = NULL;
939	dev->caps.qp1_proxy = NULL;
940
941	return err;
942}
943
944static void mlx4_request_modules(struct mlx4_dev *dev)
945{
946	int port;
947	int has_ib_port = false;
948	int has_eth_port = false;
949#define EN_DRV_NAME	"mlx4_en"
950#define IB_DRV_NAME	"mlx4_ib"
951
952	for (port = 1; port <= dev->caps.num_ports; port++) {
953		if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
954			has_ib_port = true;
955		else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
956			has_eth_port = true;
957	}
958
959	if (has_eth_port)
960		request_module_nowait(EN_DRV_NAME);
961	if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
962		request_module_nowait(IB_DRV_NAME);
963}
964
965/*
966 * Change the port configuration of the device.
967 * Every user of this function must hold the port mutex.
968 */
969int mlx4_change_port_types(struct mlx4_dev *dev,
970			   enum mlx4_port_type *port_types)
971{
972	int err = 0;
973	int change = 0;
974	int port;
975
976	for (port = 0; port <  dev->caps.num_ports; port++) {
977		/* Change the port type only if the new type is different
978		 * from the current, and not set to Auto */
979		if (port_types[port] != dev->caps.port_type[port + 1])
980			change = 1;
981	}
982	if (change) {
983		mlx4_unregister_device(dev);
984		for (port = 1; port <= dev->caps.num_ports; port++) {
985			mlx4_CLOSE_PORT(dev, port);
986			dev->caps.port_type[port] = port_types[port - 1];
987			err = mlx4_SET_PORT(dev, port, -1);
988			if (err) {
989				mlx4_err(dev, "Failed to set port %d, aborting\n",
990					 port);
991				goto out;
992			}
993		}
994		mlx4_set_port_mask(dev);
995		err = mlx4_register_device(dev);
996		if (err) {
997			mlx4_err(dev, "Failed to register device\n");
998			goto out;
999		}
1000		mlx4_request_modules(dev);
1001	}
1002
1003out:
1004	return err;
1005}
1006
1007static ssize_t show_port_type(struct device *dev,
1008			      struct device_attribute *attr,
1009			      char *buf)
1010{
1011	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1012						   port_attr);
1013	struct mlx4_dev *mdev = info->dev;
1014	char type[8];
1015
1016	sprintf(type, "%s",
1017		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1018		"ib" : "eth");
1019	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1020		sprintf(buf, "auto (%s)\n", type);
1021	else
1022		sprintf(buf, "%s\n", type);
1023
1024	return strlen(buf);
1025}
1026
1027static ssize_t set_port_type(struct device *dev,
1028			     struct device_attribute *attr,
1029			     const char *buf, size_t count)
1030{
1031	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1032						   port_attr);
1033	struct mlx4_dev *mdev = info->dev;
1034	struct mlx4_priv *priv = mlx4_priv(mdev);
1035	enum mlx4_port_type types[MLX4_MAX_PORTS];
1036	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1037	static DEFINE_MUTEX(set_port_type_mutex);
1038	int i;
1039	int err = 0;
1040
1041	mutex_lock(&set_port_type_mutex);
1042
1043	if (!strcmp(buf, "ib\n"))
1044		info->tmp_type = MLX4_PORT_TYPE_IB;
1045	else if (!strcmp(buf, "eth\n"))
1046		info->tmp_type = MLX4_PORT_TYPE_ETH;
1047	else if (!strcmp(buf, "auto\n"))
1048		info->tmp_type = MLX4_PORT_TYPE_AUTO;
1049	else {
1050		mlx4_err(mdev, "%s is not supported port type\n", buf);
1051		err = -EINVAL;
1052		goto err_out;
1053	}
1054
1055	mlx4_stop_sense(mdev);
1056	mutex_lock(&priv->port_mutex);
1057	/* Possible type is always the one that was delivered */
1058	mdev->caps.possible_type[info->port] = info->tmp_type;
1059
1060	for (i = 0; i < mdev->caps.num_ports; i++) {
1061		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1062					mdev->caps.possible_type[i+1];
1063		if (types[i] == MLX4_PORT_TYPE_AUTO)
1064			types[i] = mdev->caps.port_type[i+1];
1065	}
1066
1067	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1068	    !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1069		for (i = 1; i <= mdev->caps.num_ports; i++) {
1070			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1071				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1072				err = -EINVAL;
1073			}
1074		}
1075	}
1076	if (err) {
1077		mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1078		goto out;
1079	}
1080
1081	mlx4_do_sense_ports(mdev, new_types, types);
1082
1083	err = mlx4_check_port_params(mdev, new_types);
1084	if (err)
1085		goto out;
1086
1087	/* We are about to apply the changes after the configuration
1088	 * was verified, no need to remember the temporary types
1089	 * any more */
1090	for (i = 0; i < mdev->caps.num_ports; i++)
1091		priv->port[i + 1].tmp_type = 0;
1092
1093	err = mlx4_change_port_types(mdev, new_types);
1094
1095out:
1096	mlx4_start_sense(mdev);
1097	mutex_unlock(&priv->port_mutex);
1098err_out:
1099	mutex_unlock(&set_port_type_mutex);
1100
1101	return err ? err : count;
1102}
1103
1104enum ibta_mtu {
1105	IB_MTU_256  = 1,
1106	IB_MTU_512  = 2,
1107	IB_MTU_1024 = 3,
1108	IB_MTU_2048 = 4,
1109	IB_MTU_4096 = 5
1110};
1111
1112static inline int int_to_ibta_mtu(int mtu)
1113{
1114	switch (mtu) {
1115	case 256:  return IB_MTU_256;
1116	case 512:  return IB_MTU_512;
1117	case 1024: return IB_MTU_1024;
1118	case 2048: return IB_MTU_2048;
1119	case 4096: return IB_MTU_4096;
1120	default: return -1;
1121	}
1122}
1123
1124static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1125{
1126	switch (mtu) {
1127	case IB_MTU_256:  return  256;
1128	case IB_MTU_512:  return  512;
1129	case IB_MTU_1024: return 1024;
1130	case IB_MTU_2048: return 2048;
1131	case IB_MTU_4096: return 4096;
1132	default: return -1;
1133	}
1134}
1135
1136static ssize_t show_port_ib_mtu(struct device *dev,
1137			     struct device_attribute *attr,
1138			     char *buf)
1139{
1140	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1141						   port_mtu_attr);
1142	struct mlx4_dev *mdev = info->dev;
1143
1144	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1145		mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1146
1147	sprintf(buf, "%d\n",
1148			ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1149	return strlen(buf);
1150}
1151
1152static ssize_t set_port_ib_mtu(struct device *dev,
1153			     struct device_attribute *attr,
1154			     const char *buf, size_t count)
1155{
1156	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1157						   port_mtu_attr);
1158	struct mlx4_dev *mdev = info->dev;
1159	struct mlx4_priv *priv = mlx4_priv(mdev);
1160	int err, port, mtu, ibta_mtu = -1;
1161
1162	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1163		mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1164		return -EINVAL;
1165	}
1166
1167	err = kstrtoint(buf, 0, &mtu);
1168	if (!err)
1169		ibta_mtu = int_to_ibta_mtu(mtu);
1170
1171	if (err || ibta_mtu < 0) {
1172		mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1173		return -EINVAL;
1174	}
1175
1176	mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1177
1178	mlx4_stop_sense(mdev);
1179	mutex_lock(&priv->port_mutex);
1180	mlx4_unregister_device(mdev);
1181	for (port = 1; port <= mdev->caps.num_ports; port++) {
1182		mlx4_CLOSE_PORT(mdev, port);
1183		err = mlx4_SET_PORT(mdev, port, -1);
1184		if (err) {
1185			mlx4_err(mdev, "Failed to set port %d, aborting\n",
1186				 port);
1187			goto err_set_port;
1188		}
1189	}
1190	err = mlx4_register_device(mdev);
1191err_set_port:
1192	mutex_unlock(&priv->port_mutex);
1193	mlx4_start_sense(mdev);
1194	return err ? err : count;
1195}
1196
1197int mlx4_bond(struct mlx4_dev *dev)
1198{
1199	int ret = 0;
1200	struct mlx4_priv *priv = mlx4_priv(dev);
1201
1202	mutex_lock(&priv->bond_mutex);
1203
1204	if (!mlx4_is_bonded(dev))
1205		ret = mlx4_do_bond(dev, true);
1206	else
1207		ret = 0;
1208
1209	mutex_unlock(&priv->bond_mutex);
1210	if (ret)
1211		mlx4_err(dev, "Failed to bond device: %d\n", ret);
1212	else
1213		mlx4_dbg(dev, "Device is bonded\n");
1214	return ret;
1215}
1216EXPORT_SYMBOL_GPL(mlx4_bond);
1217
1218int mlx4_unbond(struct mlx4_dev *dev)
1219{
1220	int ret = 0;
1221	struct mlx4_priv *priv = mlx4_priv(dev);
1222
1223	mutex_lock(&priv->bond_mutex);
1224
1225	if (mlx4_is_bonded(dev))
1226		ret = mlx4_do_bond(dev, false);
1227
1228	mutex_unlock(&priv->bond_mutex);
1229	if (ret)
1230		mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1231	else
1232		mlx4_dbg(dev, "Device is unbonded\n");
1233	return ret;
1234}
1235EXPORT_SYMBOL_GPL(mlx4_unbond);
1236
1237
1238int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1239{
1240	u8 port1 = v2p->port1;
1241	u8 port2 = v2p->port2;
1242	struct mlx4_priv *priv = mlx4_priv(dev);
1243	int err;
1244
1245	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1246		return -ENOTSUPP;
1247
1248	mutex_lock(&priv->bond_mutex);
1249
1250	/* zero means keep current mapping for this port */
1251	if (port1 == 0)
1252		port1 = priv->v2p.port1;
1253	if (port2 == 0)
1254		port2 = priv->v2p.port2;
1255
1256	if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1257	    (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1258	    (port1 == 2 && port2 == 1)) {
1259		/* besides boundary checks cross mapping makes
1260		 * no sense and therefore not allowed */
1261		err = -EINVAL;
1262	} else if ((port1 == priv->v2p.port1) &&
1263		 (port2 == priv->v2p.port2)) {
1264		err = 0;
1265	} else {
1266		err = mlx4_virt2phy_port_map(dev, port1, port2);
1267		if (!err) {
1268			mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1269				 port1, port2);
1270			priv->v2p.port1 = port1;
1271			priv->v2p.port2 = port2;
1272		} else {
1273			mlx4_err(dev, "Failed to change port mape: %d\n", err);
1274		}
1275	}
1276
1277	mutex_unlock(&priv->bond_mutex);
1278	return err;
1279}
1280EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1281
1282static int mlx4_load_fw(struct mlx4_dev *dev)
1283{
1284	struct mlx4_priv *priv = mlx4_priv(dev);
1285	int err;
1286
1287	priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1288					 GFP_HIGHUSER | __GFP_NOWARN, 0);
1289	if (!priv->fw.fw_icm) {
1290		mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1291		return -ENOMEM;
1292	}
1293
1294	err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1295	if (err) {
1296		mlx4_err(dev, "MAP_FA command failed, aborting\n");
1297		goto err_free;
1298	}
1299
1300	err = mlx4_RUN_FW(dev);
1301	if (err) {
1302		mlx4_err(dev, "RUN_FW command failed, aborting\n");
1303		goto err_unmap_fa;
1304	}
1305
1306	return 0;
1307
1308err_unmap_fa:
1309	mlx4_UNMAP_FA(dev);
1310
1311err_free:
1312	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1313	return err;
1314}
1315
1316static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1317				int cmpt_entry_sz)
1318{
1319	struct mlx4_priv *priv = mlx4_priv(dev);
1320	int err;
1321	int num_eqs;
1322
1323	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1324				  cmpt_base +
1325				  ((u64) (MLX4_CMPT_TYPE_QP *
1326					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1327				  cmpt_entry_sz, dev->caps.num_qps,
1328				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1329				  0, 0);
1330	if (err)
1331		goto err;
1332
1333	err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1334				  cmpt_base +
1335				  ((u64) (MLX4_CMPT_TYPE_SRQ *
1336					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1337				  cmpt_entry_sz, dev->caps.num_srqs,
1338				  dev->caps.reserved_srqs, 0, 0);
1339	if (err)
1340		goto err_qp;
1341
1342	err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1343				  cmpt_base +
1344				  ((u64) (MLX4_CMPT_TYPE_CQ *
1345					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1346				  cmpt_entry_sz, dev->caps.num_cqs,
1347				  dev->caps.reserved_cqs, 0, 0);
1348	if (err)
1349		goto err_srq;
1350
1351	num_eqs = dev->phys_caps.num_phys_eqs;
1352	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1353				  cmpt_base +
1354				  ((u64) (MLX4_CMPT_TYPE_EQ *
1355					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1356				  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1357	if (err)
1358		goto err_cq;
1359
1360	return 0;
1361
1362err_cq:
1363	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1364
1365err_srq:
1366	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1367
1368err_qp:
1369	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1370
1371err:
1372	return err;
1373}
1374
1375static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1376			 struct mlx4_init_hca_param *init_hca, u64 icm_size)
1377{
1378	struct mlx4_priv *priv = mlx4_priv(dev);
1379	u64 aux_pages;
1380	int num_eqs;
1381	int err;
1382
1383	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1384	if (err) {
1385		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1386		return err;
1387	}
1388
1389	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1390		 (unsigned long long) icm_size >> 10,
1391		 (unsigned long long) aux_pages << 2);
1392
1393	priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1394					  GFP_HIGHUSER | __GFP_NOWARN, 0);
1395	if (!priv->fw.aux_icm) {
1396		mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1397		return -ENOMEM;
1398	}
1399
1400	err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1401	if (err) {
1402		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1403		goto err_free_aux;
1404	}
1405
1406	err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1407	if (err) {
1408		mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1409		goto err_unmap_aux;
1410	}
1411
1412
1413	num_eqs = dev->phys_caps.num_phys_eqs;
1414	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1415				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
1416				  num_eqs, num_eqs, 0, 0);
1417	if (err) {
1418		mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1419		goto err_unmap_cmpt;
1420	}
1421
1422	/*
1423	 * Reserved MTT entries must be aligned up to a cacheline
1424	 * boundary, since the FW will write to them, while the driver
1425	 * writes to all other MTT entries. (The variable
1426	 * dev->caps.mtt_entry_sz below is really the MTT segment
1427	 * size, not the raw entry size)
1428	 */
1429	dev->caps.reserved_mtts =
1430		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1431		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1432
1433	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1434				  init_hca->mtt_base,
1435				  dev->caps.mtt_entry_sz,
1436				  dev->caps.num_mtts,
1437				  dev->caps.reserved_mtts, 1, 0);
1438	if (err) {
1439		mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1440		goto err_unmap_eq;
1441	}
1442
1443	err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1444				  init_hca->dmpt_base,
1445				  dev_cap->dmpt_entry_sz,
1446				  dev->caps.num_mpts,
1447				  dev->caps.reserved_mrws, 1, 1);
1448	if (err) {
1449		mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1450		goto err_unmap_mtt;
1451	}
1452
1453	err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1454				  init_hca->qpc_base,
1455				  dev_cap->qpc_entry_sz,
1456				  dev->caps.num_qps,
1457				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1458				  0, 0);
1459	if (err) {
1460		mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1461		goto err_unmap_dmpt;
1462	}
1463
1464	err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1465				  init_hca->auxc_base,
1466				  dev_cap->aux_entry_sz,
1467				  dev->caps.num_qps,
1468				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1469				  0, 0);
1470	if (err) {
1471		mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1472		goto err_unmap_qp;
1473	}
1474
1475	err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1476				  init_hca->altc_base,
1477				  dev_cap->altc_entry_sz,
1478				  dev->caps.num_qps,
1479				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1480				  0, 0);
1481	if (err) {
1482		mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1483		goto err_unmap_auxc;
1484	}
1485
1486	err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1487				  init_hca->rdmarc_base,
1488				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1489				  dev->caps.num_qps,
1490				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1491				  0, 0);
1492	if (err) {
1493		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1494		goto err_unmap_altc;
1495	}
1496
1497	err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1498				  init_hca->cqc_base,
1499				  dev_cap->cqc_entry_sz,
1500				  dev->caps.num_cqs,
1501				  dev->caps.reserved_cqs, 0, 0);
1502	if (err) {
1503		mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1504		goto err_unmap_rdmarc;
1505	}
1506
1507	err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1508				  init_hca->srqc_base,
1509				  dev_cap->srq_entry_sz,
1510				  dev->caps.num_srqs,
1511				  dev->caps.reserved_srqs, 0, 0);
1512	if (err) {
1513		mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1514		goto err_unmap_cq;
1515	}
1516
1517	/*
1518	 * For flow steering device managed mode it is required to use
1519	 * mlx4_init_icm_table. For B0 steering mode it's not strictly
1520	 * required, but for simplicity just map the whole multicast
1521	 * group table now.  The table isn't very big and it's a lot
1522	 * easier than trying to track ref counts.
1523	 */
1524	err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1525				  init_hca->mc_base,
1526				  mlx4_get_mgm_entry_size(dev),
1527				  dev->caps.num_mgms + dev->caps.num_amgms,
1528				  dev->caps.num_mgms + dev->caps.num_amgms,
1529				  0, 0);
1530	if (err) {
1531		mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1532		goto err_unmap_srq;
1533	}
1534
1535	return 0;
1536
1537err_unmap_srq:
1538	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1539
1540err_unmap_cq:
1541	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1542
1543err_unmap_rdmarc:
1544	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1545
1546err_unmap_altc:
1547	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1548
1549err_unmap_auxc:
1550	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1551
1552err_unmap_qp:
1553	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1554
1555err_unmap_dmpt:
1556	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1557
1558err_unmap_mtt:
1559	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1560
1561err_unmap_eq:
1562	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1563
1564err_unmap_cmpt:
1565	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1566	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1567	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1568	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1569
1570err_unmap_aux:
1571	mlx4_UNMAP_ICM_AUX(dev);
1572
1573err_free_aux:
1574	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1575
1576	return err;
1577}
1578
1579static void mlx4_free_icms(struct mlx4_dev *dev)
1580{
1581	struct mlx4_priv *priv = mlx4_priv(dev);
1582
1583	mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1584	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1585	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1586	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1587	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1588	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1589	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1590	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1591	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1592	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1593	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1594	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1595	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1596	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1597
1598	mlx4_UNMAP_ICM_AUX(dev);
1599	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1600}
1601
1602static void mlx4_slave_exit(struct mlx4_dev *dev)
1603{
1604	struct mlx4_priv *priv = mlx4_priv(dev);
1605
1606	mutex_lock(&priv->cmd.slave_cmd_mutex);
1607	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1608			  MLX4_COMM_TIME))
1609		mlx4_warn(dev, "Failed to close slave function\n");
1610	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1611}
1612
1613static int map_bf_area(struct mlx4_dev *dev)
1614{
1615	struct mlx4_priv *priv = mlx4_priv(dev);
1616	resource_size_t bf_start;
1617	resource_size_t bf_len;
1618	int err = 0;
1619
1620	if (!dev->caps.bf_reg_size)
1621		return -ENXIO;
1622
1623	bf_start = pci_resource_start(dev->persist->pdev, 2) +
1624			(dev->caps.num_uars << PAGE_SHIFT);
1625	bf_len = pci_resource_len(dev->persist->pdev, 2) -
1626			(dev->caps.num_uars << PAGE_SHIFT);
1627	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1628	if (!priv->bf_mapping)
1629		err = -ENOMEM;
1630
1631	return err;
1632}
1633
1634static void unmap_bf_area(struct mlx4_dev *dev)
1635{
1636	if (mlx4_priv(dev)->bf_mapping)
1637		io_mapping_free(mlx4_priv(dev)->bf_mapping);
1638}
1639
1640cycle_t mlx4_read_clock(struct mlx4_dev *dev)
1641{
1642	u32 clockhi, clocklo, clockhi1;
1643	cycle_t cycles;
1644	int i;
1645	struct mlx4_priv *priv = mlx4_priv(dev);
1646
1647	for (i = 0; i < 10; i++) {
1648		clockhi = swab32(readl(priv->clock_mapping));
1649		clocklo = swab32(readl(priv->clock_mapping + 4));
1650		clockhi1 = swab32(readl(priv->clock_mapping));
1651		if (clockhi == clockhi1)
1652			break;
1653	}
1654
1655	cycles = (u64) clockhi << 32 | (u64) clocklo;
1656
1657	return cycles;
1658}
1659EXPORT_SYMBOL_GPL(mlx4_read_clock);
1660
1661
1662static int map_internal_clock(struct mlx4_dev *dev)
1663{
1664	struct mlx4_priv *priv = mlx4_priv(dev);
1665
1666	priv->clock_mapping =
1667		ioremap(pci_resource_start(dev->persist->pdev,
1668					   priv->fw.clock_bar) +
1669			priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1670
1671	if (!priv->clock_mapping)
1672		return -ENOMEM;
1673
1674	return 0;
1675}
1676
1677static void unmap_internal_clock(struct mlx4_dev *dev)
1678{
1679	struct mlx4_priv *priv = mlx4_priv(dev);
1680
1681	if (priv->clock_mapping)
1682		iounmap(priv->clock_mapping);
1683}
1684
1685static void mlx4_close_hca(struct mlx4_dev *dev)
1686{
1687	unmap_internal_clock(dev);
1688	unmap_bf_area(dev);
1689	if (mlx4_is_slave(dev))
1690		mlx4_slave_exit(dev);
1691	else {
1692		mlx4_CLOSE_HCA(dev, 0);
1693		mlx4_free_icms(dev);
1694	}
1695}
1696
1697static void mlx4_close_fw(struct mlx4_dev *dev)
1698{
1699	if (!mlx4_is_slave(dev)) {
1700		mlx4_UNMAP_FA(dev);
1701		mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1702	}
1703}
1704
1705static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1706{
1707#define COMM_CHAN_OFFLINE_OFFSET 0x09
1708
1709	u32 comm_flags;
1710	u32 offline_bit;
1711	unsigned long end;
1712	struct mlx4_priv *priv = mlx4_priv(dev);
1713
1714	end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
1715	while (time_before(jiffies, end)) {
1716		comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
1717					  MLX4_COMM_CHAN_FLAGS));
1718		offline_bit = (comm_flags &
1719			       (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
1720		if (!offline_bit)
1721			return 0;
1722		/* There are cases as part of AER/Reset flow that PF needs
1723		 * around 100 msec to load. We therefore sleep for 100 msec
1724		 * to allow other tasks to make use of that CPU during this
1725		 * time interval.
1726		 */
1727		msleep(100);
1728	}
1729	mlx4_err(dev, "Communication channel is offline.\n");
1730	return -EIO;
1731}
1732
1733static void mlx4_reset_vf_support(struct mlx4_dev *dev)
1734{
1735#define COMM_CHAN_RST_OFFSET 0x1e
1736
1737	struct mlx4_priv *priv = mlx4_priv(dev);
1738	u32 comm_rst;
1739	u32 comm_caps;
1740
1741	comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
1742				 MLX4_COMM_CHAN_CAPS));
1743	comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
1744
1745	if (comm_rst)
1746		dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
1747}
1748
1749static int mlx4_init_slave(struct mlx4_dev *dev)
1750{
1751	struct mlx4_priv *priv = mlx4_priv(dev);
1752	u64 dma = (u64) priv->mfunc.vhcr_dma;
1753	int ret_from_reset = 0;
1754	u32 slave_read;
1755	u32 cmd_channel_ver;
1756
1757	if (atomic_read(&pf_loading)) {
1758		mlx4_warn(dev, "PF is not ready - Deferring probe\n");
1759		return -EPROBE_DEFER;
1760	}
1761
1762	mutex_lock(&priv->cmd.slave_cmd_mutex);
1763	priv->cmd.max_cmds = 1;
1764	if (mlx4_comm_check_offline(dev)) {
1765		mlx4_err(dev, "PF is not responsive, skipping initialization\n");
1766		goto err_offline;
1767	}
1768
1769	mlx4_reset_vf_support(dev);
1770	mlx4_warn(dev, "Sending reset\n");
1771	ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1772				       MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
1773	/* if we are in the middle of flr the slave will try
1774	 * NUM_OF_RESET_RETRIES times before leaving.*/
1775	if (ret_from_reset) {
1776		if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1777			mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
1778			mutex_unlock(&priv->cmd.slave_cmd_mutex);
1779			return -EPROBE_DEFER;
1780		} else
1781			goto err;
1782	}
1783
1784	/* check the driver version - the slave I/F revision
1785	 * must match the master's */
1786	slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1787	cmd_channel_ver = mlx4_comm_get_version();
1788
1789	if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1790		MLX4_COMM_GET_IF_REV(slave_read)) {
1791		mlx4_err(dev, "slave driver version is not supported by the master\n");
1792		goto err;
1793	}
1794
1795	mlx4_warn(dev, "Sending vhcr0\n");
1796	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1797			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1798		goto err;
1799	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1800			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1801		goto err;
1802	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1803			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1804		goto err;
1805	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
1806			  MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1807		goto err;
1808
1809	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1810	return 0;
1811
1812err:
1813	mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
1814err_offline:
1815	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1816	return -EIO;
1817}
1818
1819static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1820{
1821	int i;
1822
1823	for (i = 1; i <= dev->caps.num_ports; i++) {
1824		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
1825			dev->caps.gid_table_len[i] =
1826				mlx4_get_slave_num_gids(dev, 0, i);
1827		else
1828			dev->caps.gid_table_len[i] = 1;
1829		dev->caps.pkey_table_len[i] =
1830			dev->phys_caps.pkey_phys_table_len[i] - 1;
1831	}
1832}
1833
1834static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1835{
1836	int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
1837
1838	for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
1839	      i++) {
1840		if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
1841			break;
1842	}
1843
1844	return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
1845}
1846
1847static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
1848{
1849	switch (dmfs_high_steer_mode) {
1850	case MLX4_STEERING_DMFS_A0_DEFAULT:
1851		return "default performance";
1852
1853	case MLX4_STEERING_DMFS_A0_DYNAMIC:
1854		return "dynamic hybrid mode";
1855
1856	case MLX4_STEERING_DMFS_A0_STATIC:
1857		return "performance optimized for limited rule configuration (static)";
1858
1859	case MLX4_STEERING_DMFS_A0_DISABLE:
1860		return "disabled performance optimized steering";
1861
1862	case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
1863		return "performance optimized steering not supported";
1864
1865	default:
1866		return "Unrecognized mode";
1867	}
1868}
1869
1870#define MLX4_DMFS_A0_STEERING			(1UL << 2)
1871
1872static void choose_steering_mode(struct mlx4_dev *dev,
1873				 struct mlx4_dev_cap *dev_cap)
1874{
1875	if (mlx4_log_num_mgm_entry_size <= 0) {
1876		if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
1877			if (dev->caps.dmfs_high_steer_mode ==
1878			    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1879				mlx4_err(dev, "DMFS high rate mode not supported\n");
1880			else
1881				dev->caps.dmfs_high_steer_mode =
1882					MLX4_STEERING_DMFS_A0_STATIC;
1883		}
1884	}
1885
1886	if (mlx4_log_num_mgm_entry_size <= 0 &&
1887	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
1888	    (!mlx4_is_mfunc(dev) ||
1889	     (dev_cap->fs_max_num_qp_per_entry >=
1890	     (dev->persist->num_vfs + 1))) &&
1891	    choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
1892		MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
1893		dev->oper_log_mgm_entry_size =
1894			choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
1895		dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
1896		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
1897		dev->caps.fs_log_max_ucast_qp_range_size =
1898			dev_cap->fs_log_max_ucast_qp_range_size;
1899	} else {
1900		if (dev->caps.dmfs_high_steer_mode !=
1901		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1902			dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
1903		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
1904		    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1905			dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
1906		else {
1907			dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
1908
1909			if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
1910			    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1911				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
1912		}
1913		dev->oper_log_mgm_entry_size =
1914			mlx4_log_num_mgm_entry_size > 0 ?
1915			mlx4_log_num_mgm_entry_size :
1916			MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
1917		dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
1918	}
1919	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
1920		 mlx4_steering_mode_str(dev->caps.steering_mode),
1921		 dev->oper_log_mgm_entry_size,
1922		 mlx4_log_num_mgm_entry_size);
1923}
1924
1925static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
1926				       struct mlx4_dev_cap *dev_cap)
1927{
1928	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
1929	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
1930		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
1931	else
1932		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
1933
1934	mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
1935		 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
1936}
1937
1938static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
1939{
1940	int i;
1941	struct mlx4_port_cap port_cap;
1942
1943	if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1944		return -EINVAL;
1945
1946	for (i = 1; i <= dev->caps.num_ports; i++) {
1947		if (mlx4_dev_port(dev, i, &port_cap)) {
1948			mlx4_err(dev,
1949				 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
1950		} else if ((dev->caps.dmfs_high_steer_mode !=
1951			    MLX4_STEERING_DMFS_A0_DEFAULT) &&
1952			   (port_cap.dmfs_optimized_state ==
1953			    !!(dev->caps.dmfs_high_steer_mode ==
1954			    MLX4_STEERING_DMFS_A0_DISABLE))) {
1955			mlx4_err(dev,
1956				 "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
1957				 dmfs_high_rate_steering_mode_str(
1958					dev->caps.dmfs_high_steer_mode),
1959				 (port_cap.dmfs_optimized_state ?
1960					"enabled" : "disabled"));
1961		}
1962	}
1963
1964	return 0;
1965}
1966
1967static int mlx4_init_fw(struct mlx4_dev *dev)
1968{
1969	struct mlx4_mod_stat_cfg   mlx4_cfg;
1970	int err = 0;
1971
1972	if (!mlx4_is_slave(dev)) {
1973		err = mlx4_QUERY_FW(dev);
1974		if (err) {
1975			if (err == -EACCES)
1976				mlx4_info(dev, "non-primary physical function, skipping\n");
1977			else
1978				mlx4_err(dev, "QUERY_FW command failed, aborting\n");
1979			return err;
1980		}
1981
1982		err = mlx4_load_fw(dev);
1983		if (err) {
1984			mlx4_err(dev, "Failed to start FW, aborting\n");
1985			return err;
1986		}
1987
1988		mlx4_cfg.log_pg_sz_m = 1;
1989		mlx4_cfg.log_pg_sz = 0;
1990		err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
1991		if (err)
1992			mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
1993	}
1994
1995	return err;
1996}
1997
1998static int mlx4_init_hca(struct mlx4_dev *dev)
1999{
2000	struct mlx4_priv	  *priv = mlx4_priv(dev);
2001	struct mlx4_adapter	   adapter;
2002	struct mlx4_dev_cap	   dev_cap;
2003	struct mlx4_profile	   profile;
2004	struct mlx4_init_hca_param init_hca;
2005	u64 icm_size;
2006	struct mlx4_config_dev_params params;
2007	int err;
2008
2009	if (!mlx4_is_slave(dev)) {
2010		err = mlx4_dev_cap(dev, &dev_cap);
2011		if (err) {
2012			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2013			return err;
2014		}
2015
2016		choose_steering_mode(dev, &dev_cap);
2017		choose_tunnel_offload_mode(dev, &dev_cap);
2018
2019		if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2020		    mlx4_is_master(dev))
2021			dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2022
2023		err = mlx4_get_phys_port_id(dev);
2024		if (err)
2025			mlx4_err(dev, "Fail to get physical port id\n");
2026
2027		if (mlx4_is_master(dev))
2028			mlx4_parav_master_pf_caps(dev);
2029
2030		if (mlx4_low_memory_profile()) {
2031			mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2032			profile = low_mem_profile;
2033		} else {
2034			profile = default_profile;
2035		}
2036		if (dev->caps.steering_mode ==
2037		    MLX4_STEERING_MODE_DEVICE_MANAGED)
2038			profile.num_mcg = MLX4_FS_NUM_MCG;
2039
2040		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
2041					     &init_hca);
2042		if ((long long) icm_size < 0) {
2043			err = icm_size;
2044			return err;
2045		}
2046
2047		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2048
2049		init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2050		init_hca.uar_page_sz = PAGE_SHIFT - 12;
2051		init_hca.mw_enabled = 0;
2052		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2053		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2054			init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2055
2056		err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
2057		if (err)
2058			return err;
2059
2060		err = mlx4_INIT_HCA(dev, &init_hca);
2061		if (err) {
2062			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2063			goto err_free_icm;
2064		}
2065
2066		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2067			err = mlx4_query_func(dev, &dev_cap);
2068			if (err < 0) {
2069				mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2070				goto err_close;
2071			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2072				dev->caps.num_eqs = dev_cap.max_eqs;
2073				dev->caps.reserved_eqs = dev_cap.reserved_eqs;
2074				dev->caps.reserved_uars = dev_cap.reserved_uars;
2075			}
2076		}
2077
2078		/*
2079		 * If TS is supported by FW
2080		 * read HCA frequency by QUERY_HCA command
2081		 */
2082		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2083			memset(&init_hca, 0, sizeof(init_hca));
2084			err = mlx4_QUERY_HCA(dev, &init_hca);
2085			if (err) {
2086				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2087				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2088			} else {
2089				dev->caps.hca_core_clock =
2090					init_hca.hca_core_clock;
2091			}
2092
2093			/* In case we got HCA frequency 0 - disable timestamping
2094			 * to avoid dividing by zero
2095			 */
2096			if (!dev->caps.hca_core_clock) {
2097				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2098				mlx4_err(dev,
2099					 "HCA frequency is 0 - timestamping is not supported\n");
2100			} else if (map_internal_clock(dev)) {
2101				/*
2102				 * Map internal clock,
2103				 * in case of failure disable timestamping
2104				 */
2105				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2106				mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2107			}
2108		}
2109
2110		if (dev->caps.dmfs_high_steer_mode !=
2111		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2112			if (mlx4_validate_optimized_steering(dev))
2113				mlx4_warn(dev, "Optimized steering validation failed\n");
2114
2115			if (dev->caps.dmfs_high_steer_mode ==
2116			    MLX4_STEERING_DMFS_A0_DISABLE) {
2117				dev->caps.dmfs_high_rate_qpn_base =
2118					dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2119				dev->caps.dmfs_high_rate_qpn_range =
2120					MLX4_A0_STEERING_TABLE_SIZE;
2121			}
2122
2123			mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
2124				 dmfs_high_rate_steering_mode_str(
2125					dev->caps.dmfs_high_steer_mode));
2126		}
2127	} else {
2128		err = mlx4_init_slave(dev);
2129		if (err) {
2130			if (err != -EPROBE_DEFER)
2131				mlx4_err(dev, "Failed to initialize slave\n");
2132			return err;
2133		}
2134
2135		err = mlx4_slave_cap(dev);
2136		if (err) {
2137			mlx4_err(dev, "Failed to obtain slave caps\n");
2138			goto err_close;
2139		}
2140	}
2141
2142	if (map_bf_area(dev))
2143		mlx4_dbg(dev, "Failed to map blue flame area\n");
2144
2145	/*Only the master set the ports, all the rest got it from it.*/
2146	if (!mlx4_is_slave(dev))
2147		mlx4_set_port_mask(dev);
2148
2149	err = mlx4_QUERY_ADAPTER(dev, &adapter);
2150	if (err) {
2151		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2152		goto unmap_bf;
2153	}
2154
2155	/* Query CONFIG_DEV parameters */
2156	err = mlx4_config_dev_retrieval(dev, &params);
2157	if (err && err != -ENOTSUPP) {
2158		mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2159	} else if (!err) {
2160		dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2161		dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2162	}
2163	priv->eq_table.inta_pin = adapter.inta_pin;
2164	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
2165
2166	return 0;
2167
2168unmap_bf:
2169	unmap_internal_clock(dev);
2170	unmap_bf_area(dev);
2171
2172	if (mlx4_is_slave(dev)) {
2173		kfree(dev->caps.qp0_qkey);
2174		kfree(dev->caps.qp0_tunnel);
2175		kfree(dev->caps.qp0_proxy);
2176		kfree(dev->caps.qp1_tunnel);
2177		kfree(dev->caps.qp1_proxy);
2178	}
2179
2180err_close:
2181	if (mlx4_is_slave(dev))
2182		mlx4_slave_exit(dev);
2183	else
2184		mlx4_CLOSE_HCA(dev, 0);
2185
2186err_free_icm:
2187	if (!mlx4_is_slave(dev))
2188		mlx4_free_icms(dev);
2189
2190	return err;
2191}
2192
2193static int mlx4_init_counters_table(struct mlx4_dev *dev)
2194{
2195	struct mlx4_priv *priv = mlx4_priv(dev);
2196	int nent;
2197
2198	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2199		return -ENOENT;
2200
2201	nent = dev->caps.max_counters;
2202	return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
2203}
2204
2205static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2206{
2207	mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2208}
2209
2210int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2211{
2212	struct mlx4_priv *priv = mlx4_priv(dev);
2213
2214	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2215		return -ENOENT;
2216
2217	*idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2218	if (*idx == -1)
2219		return -ENOMEM;
2220
2221	return 0;
2222}
2223
2224int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2225{
2226	u64 out_param;
2227	int err;
2228
2229	if (mlx4_is_mfunc(dev)) {
2230		err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
2231				   RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2232				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2233		if (!err)
2234			*idx = get_param_l(&out_param);
2235
2236		return err;
2237	}
2238	return __mlx4_counter_alloc(dev, idx);
2239}
2240EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2241
2242void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2243{
2244	mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2245	return;
2246}
2247
2248void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2249{
2250	u64 in_param = 0;
2251
2252	if (mlx4_is_mfunc(dev)) {
2253		set_param_l(&in_param, idx);
2254		mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2255			 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2256			 MLX4_CMD_WRAPPED);
2257		return;
2258	}
2259	__mlx4_counter_free(dev, idx);
2260}
2261EXPORT_SYMBOL_GPL(mlx4_counter_free);
2262
2263void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2264{
2265	struct mlx4_priv *priv = mlx4_priv(dev);
2266
2267	priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2268}
2269EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2270
2271__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2272{
2273	struct mlx4_priv *priv = mlx4_priv(dev);
2274
2275	return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2276}
2277EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2278
2279void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2280{
2281	struct mlx4_priv *priv = mlx4_priv(dev);
2282	__be64 guid;
2283
2284	/* hw GUID */
2285	if (entry == 0)
2286		return;
2287
2288	get_random_bytes((char *)&guid, sizeof(guid));
2289	guid &= ~(cpu_to_be64(1ULL << 56));
2290	guid |= cpu_to_be64(1ULL << 57);
2291	priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2292}
2293
2294static int mlx4_setup_hca(struct mlx4_dev *dev)
2295{
2296	struct mlx4_priv *priv = mlx4_priv(dev);
2297	int err;
2298	int port;
2299	__be32 ib_port_default_caps;
2300
2301	err = mlx4_init_uar_table(dev);
2302	if (err) {
2303		mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2304		 return err;
2305	}
2306
2307	err = mlx4_uar_alloc(dev, &priv->driver_uar);
2308	if (err) {
2309		mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2310		goto err_uar_table_free;
2311	}
2312
2313	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2314	if (!priv->kar) {
2315		mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2316		err = -ENOMEM;
2317		goto err_uar_free;
2318	}
2319
2320	err = mlx4_init_pd_table(dev);
2321	if (err) {
2322		mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2323		goto err_kar_unmap;
2324	}
2325
2326	err = mlx4_init_xrcd_table(dev);
2327	if (err) {
2328		mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2329		goto err_pd_table_free;
2330	}
2331
2332	err = mlx4_init_mr_table(dev);
2333	if (err) {
2334		mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2335		goto err_xrcd_table_free;
2336	}
2337
2338	if (!mlx4_is_slave(dev)) {
2339		err = mlx4_init_mcg_table(dev);
2340		if (err) {
2341			mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2342			goto err_mr_table_free;
2343		}
2344		err = mlx4_config_mad_demux(dev);
2345		if (err) {
2346			mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2347			goto err_mcg_table_free;
2348		}
2349	}
2350
2351	err = mlx4_init_eq_table(dev);
2352	if (err) {
2353		mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2354		goto err_mcg_table_free;
2355	}
2356
2357	err = mlx4_cmd_use_events(dev);
2358	if (err) {
2359		mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2360		goto err_eq_table_free;
2361	}
2362
2363	err = mlx4_NOP(dev);
2364	if (err) {
2365		if (dev->flags & MLX4_FLAG_MSI_X) {
2366			mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2367				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2368			mlx4_warn(dev, "Trying again without MSI-X\n");
2369		} else {
2370			mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2371				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2372			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2373		}
2374
2375		goto err_cmd_poll;
2376	}
2377
2378	mlx4_dbg(dev, "NOP command IRQ test passed\n");
2379
2380	err = mlx4_init_cq_table(dev);
2381	if (err) {
2382		mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2383		goto err_cmd_poll;
2384	}
2385
2386	err = mlx4_init_srq_table(dev);
2387	if (err) {
2388		mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2389		goto err_cq_table_free;
2390	}
2391
2392	err = mlx4_init_qp_table(dev);
2393	if (err) {
2394		mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2395		goto err_srq_table_free;
2396	}
2397
2398	err = mlx4_init_counters_table(dev);
2399	if (err && err != -ENOENT) {
2400		mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2401		goto err_qp_table_free;
2402	}
2403
2404	if (!mlx4_is_slave(dev)) {
2405		for (port = 1; port <= dev->caps.num_ports; port++) {
2406			ib_port_default_caps = 0;
2407			err = mlx4_get_port_ib_caps(dev, port,
2408						    &ib_port_default_caps);
2409			if (err)
2410				mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2411					  port, err);
2412			dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2413
2414			/* initialize per-slave default ib port capabilities */
2415			if (mlx4_is_master(dev)) {
2416				int i;
2417				for (i = 0; i < dev->num_slaves; i++) {
2418					if (i == mlx4_master_func_num(dev))
2419						continue;
2420					priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2421						ib_port_default_caps;
2422				}
2423			}
2424
2425			if (mlx4_is_mfunc(dev))
2426				dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2427			else
2428				dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2429
2430			err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2431					    dev->caps.pkey_table_len[port] : -1);
2432			if (err) {
2433				mlx4_err(dev, "Failed to set port %d, aborting\n",
2434					 port);
2435				goto err_counters_table_free;
2436			}
2437		}
2438	}
2439
2440	return 0;
2441
2442err_counters_table_free:
2443	mlx4_cleanup_counters_table(dev);
2444
2445err_qp_table_free:
2446	mlx4_cleanup_qp_table(dev);
2447
2448err_srq_table_free:
2449	mlx4_cleanup_srq_table(dev);
2450
2451err_cq_table_free:
2452	mlx4_cleanup_cq_table(dev);
2453
2454err_cmd_poll:
2455	mlx4_cmd_use_polling(dev);
2456
2457err_eq_table_free:
2458	mlx4_cleanup_eq_table(dev);
2459
2460err_mcg_table_free:
2461	if (!mlx4_is_slave(dev))
2462		mlx4_cleanup_mcg_table(dev);
2463
2464err_mr_table_free:
2465	mlx4_cleanup_mr_table(dev);
2466
2467err_xrcd_table_free:
2468	mlx4_cleanup_xrcd_table(dev);
2469
2470err_pd_table_free:
2471	mlx4_cleanup_pd_table(dev);
2472
2473err_kar_unmap:
2474	iounmap(priv->kar);
2475
2476err_uar_free:
2477	mlx4_uar_free(dev, &priv->driver_uar);
2478
2479err_uar_table_free:
2480	mlx4_cleanup_uar_table(dev);
2481	return err;
2482}
2483
2484static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2485{
2486	struct mlx4_priv *priv = mlx4_priv(dev);
2487	struct msix_entry *entries;
2488	int i;
2489
2490	if (msi_x) {
2491		int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
2492
2493		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2494			     nreq);
2495
2496		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2497		if (!entries)
2498			goto no_msi;
2499
2500		for (i = 0; i < nreq; ++i)
2501			entries[i].entry = i;
2502
2503		nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2504					     nreq);
2505
2506		if (nreq < 0) {
2507			kfree(entries);
2508			goto no_msi;
2509		} else if (nreq < MSIX_LEGACY_SZ +
2510			   dev->caps.num_ports * MIN_MSIX_P_PORT) {
2511			/*Working in legacy mode , all EQ's shared*/
2512			dev->caps.comp_pool           = 0;
2513			dev->caps.num_comp_vectors = nreq - 1;
2514		} else {
2515			dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
2516			dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
2517		}
2518		for (i = 0; i < nreq; ++i)
2519			priv->eq_table.eq[i].irq = entries[i].vector;
2520
2521		dev->flags |= MLX4_FLAG_MSI_X;
2522
2523		kfree(entries);
2524		return;
2525	}
2526
2527no_msi:
2528	dev->caps.num_comp_vectors = 1;
2529	dev->caps.comp_pool	   = 0;
2530
2531	for (i = 0; i < 2; ++i)
2532		priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2533}
2534
2535static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2536{
2537	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2538	int err = 0;
2539
2540	info->dev = dev;
2541	info->port = port;
2542	if (!mlx4_is_slave(dev)) {
2543		mlx4_init_mac_table(dev, &info->mac_table);
2544		mlx4_init_vlan_table(dev, &info->vlan_table);
2545		mlx4_init_roce_gid_table(dev, &info->gid_table);
2546		info->base_qpn = mlx4_get_base_qpn(dev, port);
2547	}
2548
2549	sprintf(info->dev_name, "mlx4_port%d", port);
2550	info->port_attr.attr.name = info->dev_name;
2551	if (mlx4_is_mfunc(dev))
2552		info->port_attr.attr.mode = S_IRUGO;
2553	else {
2554		info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
2555		info->port_attr.store     = set_port_type;
2556	}
2557	info->port_attr.show      = show_port_type;
2558	sysfs_attr_init(&info->port_attr.attr);
2559
2560	err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
2561	if (err) {
2562		mlx4_err(dev, "Failed to create file for port %d\n", port);
2563		info->port = -1;
2564	}
2565
2566	sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
2567	info->port_mtu_attr.attr.name = info->dev_mtu_name;
2568	if (mlx4_is_mfunc(dev))
2569		info->port_mtu_attr.attr.mode = S_IRUGO;
2570	else {
2571		info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
2572		info->port_mtu_attr.store     = set_port_ib_mtu;
2573	}
2574	info->port_mtu_attr.show      = show_port_ib_mtu;
2575	sysfs_attr_init(&info->port_mtu_attr.attr);
2576
2577	err = device_create_file(&dev->persist->pdev->dev,
2578				 &info->port_mtu_attr);
2579	if (err) {
2580		mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
2581		device_remove_file(&info->dev->persist->pdev->dev,
2582				   &info->port_attr);
2583		info->port = -1;
2584	}
2585
2586	return err;
2587}
2588
2589static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
2590{
2591	if (info->port < 0)
2592		return;
2593
2594	device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
2595	device_remove_file(&info->dev->persist->pdev->dev,
2596			   &info->port_mtu_attr);
2597}
2598
2599static int mlx4_init_steering(struct mlx4_dev *dev)
2600{
2601	struct mlx4_priv *priv = mlx4_priv(dev);
2602	int num_entries = dev->caps.num_ports;
2603	int i, j;
2604
2605	priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
2606	if (!priv->steer)
2607		return -ENOMEM;
2608
2609	for (i = 0; i < num_entries; i++)
2610		for (j = 0; j < MLX4_NUM_STEERS; j++) {
2611			INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
2612			INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
2613		}
2614	return 0;
2615}
2616
2617static void mlx4_clear_steering(struct mlx4_dev *dev)
2618{
2619	struct mlx4_priv *priv = mlx4_priv(dev);
2620	struct mlx4_steer_index *entry, *tmp_entry;
2621	struct mlx4_promisc_qp *pqp, *tmp_pqp;
2622	int num_entries = dev->caps.num_ports;
2623	int i, j;
2624
2625	for (i = 0; i < num_entries; i++) {
2626		for (j = 0; j < MLX4_NUM_STEERS; j++) {
2627			list_for_each_entry_safe(pqp, tmp_pqp,
2628						 &priv->steer[i].promisc_qps[j],
2629						 list) {
2630				list_del(&pqp->list);
2631				kfree(pqp);
2632			}
2633			list_for_each_entry_safe(entry, tmp_entry,
2634						 &priv->steer[i].steer_entries[j],
2635						 list) {
2636				list_del(&entry->list);
2637				list_for_each_entry_safe(pqp, tmp_pqp,
2638							 &entry->duplicates,
2639							 list) {
2640					list_del(&pqp->list);
2641					kfree(pqp);
2642				}
2643				kfree(entry);
2644			}
2645		}
2646	}
2647	kfree(priv->steer);
2648}
2649
2650static int extended_func_num(struct pci_dev *pdev)
2651{
2652	return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
2653}
2654
2655#define MLX4_OWNER_BASE	0x8069c
2656#define MLX4_OWNER_SIZE	4
2657
2658static int mlx4_get_ownership(struct mlx4_dev *dev)
2659{
2660	void __iomem *owner;
2661	u32 ret;
2662
2663	if (pci_channel_offline(dev->persist->pdev))
2664		return -EIO;
2665
2666	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2667			MLX4_OWNER_BASE,
2668			MLX4_OWNER_SIZE);
2669	if (!owner) {
2670		mlx4_err(dev, "Failed to obtain ownership bit\n");
2671		return -ENOMEM;
2672	}
2673
2674	ret = readl(owner);
2675	iounmap(owner);
2676	return (int) !!ret;
2677}
2678
2679static void mlx4_free_ownership(struct mlx4_dev *dev)
2680{
2681	void __iomem *owner;
2682
2683	if (pci_channel_offline(dev->persist->pdev))
2684		return;
2685
2686	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2687			MLX4_OWNER_BASE,
2688			MLX4_OWNER_SIZE);
2689	if (!owner) {
2690		mlx4_err(dev, "Failed to obtain ownership bit\n");
2691		return;
2692	}
2693	writel(0, owner);
2694	msleep(1000);
2695	iounmap(owner);
2696}
2697
2698#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV)	==\
2699				  !!((flags) & MLX4_FLAG_MASTER))
2700
2701static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
2702			     u8 total_vfs, int existing_vfs, int reset_flow)
2703{
2704	u64 dev_flags = dev->flags;
2705	int err = 0;
2706
2707	if (reset_flow) {
2708		dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
2709				       GFP_KERNEL);
2710		if (!dev->dev_vfs)
2711			goto free_mem;
2712		return dev_flags;
2713	}
2714
2715	atomic_inc(&pf_loading);
2716	if (dev->flags &  MLX4_FLAG_SRIOV) {
2717		if (existing_vfs != total_vfs) {
2718			mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
2719				 existing_vfs, total_vfs);
2720			total_vfs = existing_vfs;
2721		}
2722	}
2723
2724	dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
2725	if (NULL == dev->dev_vfs) {
2726		mlx4_err(dev, "Failed to allocate memory for VFs\n");
2727		goto disable_sriov;
2728	}
2729
2730	if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
2731		mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
2732		err = pci_enable_sriov(pdev, total_vfs);
2733	}
2734	if (err) {
2735		mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
2736			 err);
2737		goto disable_sriov;
2738	} else {
2739		mlx4_warn(dev, "Running in master mode\n");
2740		dev_flags |= MLX4_FLAG_SRIOV |
2741			MLX4_FLAG_MASTER;
2742		dev_flags &= ~MLX4_FLAG_SLAVE;
2743		dev->persist->num_vfs = total_vfs;
2744	}
2745	return dev_flags;
2746
2747disable_sriov:
2748	atomic_dec(&pf_loading);
2749free_mem:
2750	dev->persist->num_vfs = 0;
2751	kfree(dev->dev_vfs);
2752	return dev_flags & ~MLX4_FLAG_MASTER;
2753}
2754
2755enum {
2756	MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
2757};
2758
2759static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
2760			      int *nvfs)
2761{
2762	int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
2763	/* Checking for 64 VFs as a limitation of CX2 */
2764	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
2765	    requested_vfs >= 64) {
2766		mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
2767			 requested_vfs);
2768		return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
2769	}
2770	return 0;
2771}
2772
2773static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2774			 int total_vfs, int *nvfs, struct mlx4_priv *priv,
2775			 int reset_flow)
2776{
2777	struct mlx4_dev *dev;
2778	unsigned sum = 0;
2779	int err;
2780	int port;
2781	int i;
2782	struct mlx4_dev_cap *dev_cap = NULL;
2783	int existing_vfs = 0;
2784
2785	dev = &priv->dev;
2786
2787	INIT_LIST_HEAD(&priv->ctx_list);
2788	spin_lock_init(&priv->ctx_lock);
2789
2790	mutex_init(&priv->port_mutex);
2791	mutex_init(&priv->bond_mutex);
2792
2793	INIT_LIST_HEAD(&priv->pgdir_list);
2794	mutex_init(&priv->pgdir_mutex);
2795
2796	INIT_LIST_HEAD(&priv->bf_list);
2797	mutex_init(&priv->bf_mutex);
2798
2799	dev->rev_id = pdev->revision;
2800	dev->numa_node = dev_to_node(&pdev->dev);
2801
2802	/* Detect if this device is a virtual function */
2803	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
2804		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
2805		dev->flags |= MLX4_FLAG_SLAVE;
2806	} else {
2807		/* We reset the device and enable SRIOV only for physical
2808		 * devices.  Try to claim ownership on the device;
2809		 * if already taken, skip -- do not allow multiple PFs */
2810		err = mlx4_get_ownership(dev);
2811		if (err) {
2812			if (err < 0)
2813				return err;
2814			else {
2815				mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
2816				return -EINVAL;
2817			}
2818		}
2819
2820		atomic_set(&priv->opreq_count, 0);
2821		INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
2822
2823		/*
2824		 * Now reset the HCA before we touch the PCI capabilities or
2825		 * attempt a firmware command, since a boot ROM may have left
2826		 * the HCA in an undefined state.
2827		 */
2828		err = mlx4_reset(dev);
2829		if (err) {
2830			mlx4_err(dev, "Failed to reset HCA, aborting\n");
2831			goto err_sriov;
2832		}
2833
2834		if (total_vfs) {
2835			dev->flags = MLX4_FLAG_MASTER;
2836			existing_vfs = pci_num_vf(pdev);
2837			if (existing_vfs)
2838				dev->flags |= MLX4_FLAG_SRIOV;
2839			dev->persist->num_vfs = total_vfs;
2840		}
2841	}
2842
2843	/* on load remove any previous indication of internal error,
2844	 * device is up.
2845	 */
2846	dev->persist->state = MLX4_DEVICE_STATE_UP;
2847
2848slave_start:
2849	err = mlx4_cmd_init(dev);
2850	if (err) {
2851		mlx4_err(dev, "Failed to init command interface, aborting\n");
2852		goto err_sriov;
2853	}
2854
2855	/* In slave functions, the communication channel must be initialized
2856	 * before posting commands. Also, init num_slaves before calling
2857	 * mlx4_init_hca */
2858	if (mlx4_is_mfunc(dev)) {
2859		if (mlx4_is_master(dev)) {
2860			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
2861
2862		} else {
2863			dev->num_slaves = 0;
2864			err = mlx4_multi_func_init(dev);
2865			if (err) {
2866				mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
2867				goto err_cmd;
2868			}
2869		}
2870	}
2871
2872	err = mlx4_init_fw(dev);
2873	if (err) {
2874		mlx4_err(dev, "Failed to init fw, aborting.\n");
2875		goto err_mfunc;
2876	}
2877
2878	if (mlx4_is_master(dev)) {
2879		/* when we hit the goto slave_start below, dev_cap already initialized */
2880		if (!dev_cap) {
2881			dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
2882
2883			if (!dev_cap) {
2884				err = -ENOMEM;
2885				goto err_fw;
2886			}
2887
2888			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2889			if (err) {
2890				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2891				goto err_fw;
2892			}
2893
2894			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
2895				goto err_fw;
2896
2897			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2898				u64 dev_flags = mlx4_enable_sriov(dev, pdev,
2899								  total_vfs,
2900								  existing_vfs,
2901								  reset_flow);
2902
2903				mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2904				dev->flags = dev_flags;
2905				if (!SRIOV_VALID_STATE(dev->flags)) {
2906					mlx4_err(dev, "Invalid SRIOV state\n");
2907					goto err_sriov;
2908				}
2909				err = mlx4_reset(dev);
2910				if (err) {
2911					mlx4_err(dev, "Failed to reset HCA, aborting.\n");
2912					goto err_sriov;
2913				}
2914				goto slave_start;
2915			}
2916		} else {
2917			/* Legacy mode FW requires SRIOV to be enabled before
2918			 * doing QUERY_DEV_CAP, since max_eq's value is different if
2919			 * SRIOV is enabled.
2920			 */
2921			memset(dev_cap, 0, sizeof(*dev_cap));
2922			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2923			if (err) {
2924				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2925				goto err_fw;
2926			}
2927
2928			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
2929				goto err_fw;
2930		}
2931	}
2932
2933	err = mlx4_init_hca(dev);
2934	if (err) {
2935		if (err == -EACCES) {
2936			/* Not primary Physical function
2937			 * Running in slave mode */
2938			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2939			/* We're not a PF */
2940			if (dev->flags & MLX4_FLAG_SRIOV) {
2941				if (!existing_vfs)
2942					pci_disable_sriov(pdev);
2943				if (mlx4_is_master(dev) && !reset_flow)
2944					atomic_dec(&pf_loading);
2945				dev->flags &= ~MLX4_FLAG_SRIOV;
2946			}
2947			if (!mlx4_is_slave(dev))
2948				mlx4_free_ownership(dev);
2949			dev->flags |= MLX4_FLAG_SLAVE;
2950			dev->flags &= ~MLX4_FLAG_MASTER;
2951			goto slave_start;
2952		} else
2953			goto err_fw;
2954	}
2955
2956	if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2957		u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
2958						  existing_vfs, reset_flow);
2959
2960		if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
2961			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
2962			dev->flags = dev_flags;
2963			err = mlx4_cmd_init(dev);
2964			if (err) {
2965				/* Only VHCR is cleaned up, so could still
2966				 * send FW commands
2967				 */
2968				mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
2969				goto err_close;
2970			}
2971		} else {
2972			dev->flags = dev_flags;
2973		}
2974
2975		if (!SRIOV_VALID_STATE(dev->flags)) {
2976			mlx4_err(dev, "Invalid SRIOV state\n");
2977			goto err_close;
2978		}
2979	}
2980
2981	/* check if the device is functioning at its maximum possible speed.
2982	 * No return code for this call, just warn the user in case of PCI
2983	 * express device capabilities are under-satisfied by the bus.
2984	 */
2985	if (!mlx4_is_slave(dev))
2986		mlx4_check_pcie_caps(dev);
2987
2988	/* In master functions, the communication channel must be initialized
2989	 * after obtaining its address from fw */
2990	if (mlx4_is_master(dev)) {
2991		int ib_ports = 0;
2992
2993		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2994			ib_ports++;
2995
2996		if (ib_ports &&
2997		    (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
2998			mlx4_err(dev,
2999				 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
3000			err = -EINVAL;
3001			goto err_close;
3002		}
3003		if (dev->caps.num_ports < 2 &&
3004		    num_vfs_argc > 1) {
3005			err = -EINVAL;
3006			mlx4_err(dev,
3007				 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
3008				 dev->caps.num_ports);
3009			goto err_close;
3010		}
3011		memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
3012
3013		for (i = 0;
3014		     i < sizeof(dev->persist->nvfs)/
3015		     sizeof(dev->persist->nvfs[0]); i++) {
3016			unsigned j;
3017
3018			for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
3019				dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
3020				dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
3021					dev->caps.num_ports;
3022			}
3023		}
3024
3025		/* In master functions, the communication channel
3026		 * must be initialized after obtaining its address from fw
3027		 */
3028		err = mlx4_multi_func_init(dev);
3029		if (err) {
3030			mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
3031			goto err_close;
3032		}
3033	}
3034
3035	err = mlx4_alloc_eq_table(dev);
3036	if (err)
3037		goto err_master_mfunc;
3038
3039	priv->msix_ctl.pool_bm = 0;
3040	mutex_init(&priv->msix_ctl.pool_lock);
3041
3042	mlx4_enable_msi_x(dev);
3043	if ((mlx4_is_mfunc(dev)) &&
3044	    !(dev->flags & MLX4_FLAG_MSI_X)) {
3045		err = -ENOSYS;
3046		mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
3047		goto err_free_eq;
3048	}
3049
3050	if (!mlx4_is_slave(dev)) {
3051		err = mlx4_init_steering(dev);
3052		if (err)
3053			goto err_disable_msix;
3054	}
3055
3056	err = mlx4_setup_hca(dev);
3057	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3058	    !mlx4_is_mfunc(dev)) {
3059		dev->flags &= ~MLX4_FLAG_MSI_X;
3060		dev->caps.num_comp_vectors = 1;
3061		dev->caps.comp_pool	   = 0;
3062		pci_disable_msix(pdev);
3063		err = mlx4_setup_hca(dev);
3064	}
3065
3066	if (err)
3067		goto err_steer;
3068
3069	mlx4_init_quotas(dev);
3070	/* When PF resources are ready arm its comm channel to enable
3071	 * getting commands
3072	 */
3073	if (mlx4_is_master(dev)) {
3074		err = mlx4_ARM_COMM_CHANNEL(dev);
3075		if (err) {
3076			mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
3077				 err);
3078			goto err_steer;
3079		}
3080	}
3081
3082	for (port = 1; port <= dev->caps.num_ports; port++) {
3083		err = mlx4_init_port_info(dev, port);
3084		if (err)
3085			goto err_port;
3086	}
3087
3088	priv->v2p.port1 = 1;
3089	priv->v2p.port2 = 2;
3090
3091	err = mlx4_register_device(dev);
3092	if (err)
3093		goto err_port;
3094
3095	mlx4_request_modules(dev);
3096
3097	mlx4_sense_init(dev);
3098	mlx4_start_sense(dev);
3099
3100	priv->removed = 0;
3101
3102	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3103		atomic_dec(&pf_loading);
3104
3105	kfree(dev_cap);
3106	return 0;
3107
3108err_port:
3109	for (--port; port >= 1; --port)
3110		mlx4_cleanup_port_info(&priv->port[port]);
3111
3112	mlx4_cleanup_counters_table(dev);
3113	mlx4_cleanup_qp_table(dev);
3114	mlx4_cleanup_srq_table(dev);
3115	mlx4_cleanup_cq_table(dev);
3116	mlx4_cmd_use_polling(dev);
3117	mlx4_cleanup_eq_table(dev);
3118	mlx4_cleanup_mcg_table(dev);
3119	mlx4_cleanup_mr_table(dev);
3120	mlx4_cleanup_xrcd_table(dev);
3121	mlx4_cleanup_pd_table(dev);
3122	mlx4_cleanup_uar_table(dev);
3123
3124err_steer:
3125	if (!mlx4_is_slave(dev))
3126		mlx4_clear_steering(dev);
3127
3128err_disable_msix:
3129	if (dev->flags & MLX4_FLAG_MSI_X)
3130		pci_disable_msix(pdev);
3131
3132err_free_eq:
3133	mlx4_free_eq_table(dev);
3134
3135err_master_mfunc:
3136	if (mlx4_is_master(dev)) {
3137		mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3138		mlx4_multi_func_cleanup(dev);
3139	}
3140
3141	if (mlx4_is_slave(dev)) {
3142		kfree(dev->caps.qp0_qkey);
3143		kfree(dev->caps.qp0_tunnel);
3144		kfree(dev->caps.qp0_proxy);
3145		kfree(dev->caps.qp1_tunnel);
3146		kfree(dev->caps.qp1_proxy);
3147	}
3148
3149err_close:
3150	mlx4_close_hca(dev);
3151
3152err_fw:
3153	mlx4_close_fw(dev);
3154
3155err_mfunc:
3156	if (mlx4_is_slave(dev))
3157		mlx4_multi_func_cleanup(dev);
3158
3159err_cmd:
3160	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3161
3162err_sriov:
3163	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3164		pci_disable_sriov(pdev);
3165		dev->flags &= ~MLX4_FLAG_SRIOV;
3166	}
3167
3168	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3169		atomic_dec(&pf_loading);
3170
3171	kfree(priv->dev.dev_vfs);
3172
3173	if (!mlx4_is_slave(dev))
3174		mlx4_free_ownership(dev);
3175
3176	kfree(dev_cap);
3177	return err;
3178}
3179
3180static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
3181			   struct mlx4_priv *priv)
3182{
3183	int err;
3184	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3185	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3186	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
3187		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
3188	unsigned total_vfs = 0;
3189	unsigned int i;
3190
3191	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3192
3193	err = pci_enable_device(pdev);
3194	if (err) {
3195		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
3196		return err;
3197	}
3198
3199	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
3200	 * per port, we must limit the number of VFs to 63 (since their are
3201	 * 128 MACs)
3202	 */
3203	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
3204	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
3205		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
3206		if (nvfs[i] < 0) {
3207			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3208			err = -EINVAL;
3209			goto err_disable_pdev;
3210		}
3211	}
3212	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
3213	     i++) {
3214		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
3215		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
3216			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
3217			err = -EINVAL;
3218			goto err_disable_pdev;
3219		}
3220	}
3221	if (total_vfs >= MLX4_MAX_NUM_VF) {
3222		dev_err(&pdev->dev,
3223			"Requested more VF's (%d) than allowed (%d)\n",
3224			total_vfs, MLX4_MAX_NUM_VF - 1);
3225		err = -EINVAL;
3226		goto err_disable_pdev;
3227	}
3228
3229	for (i = 0; i < MLX4_MAX_PORTS; i++) {
3230		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
3231			dev_err(&pdev->dev,
3232				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
3233				nvfs[i] + nvfs[2], i + 1,
3234				MLX4_MAX_NUM_VF_P_PORT - 1);
3235			err = -EINVAL;
3236			goto err_disable_pdev;
3237		}
3238	}
3239
3240	/* Check for BARs. */
3241	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3242	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3243		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3244			pci_dev_data, pci_resource_flags(pdev, 0));
3245		err = -ENODEV;
3246		goto err_disable_pdev;
3247	}
3248	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3249		dev_err(&pdev->dev, "Missing UAR, aborting\n");
3250		err = -ENODEV;
3251		goto err_disable_pdev;
3252	}
3253
3254	err = pci_request_regions(pdev, DRV_NAME);
3255	if (err) {
3256		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3257		goto err_disable_pdev;
3258	}
3259
3260	pci_set_master(pdev);
3261
3262	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3263	if (err) {
3264		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3265		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3266		if (err) {
3267			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3268			goto err_release_regions;
3269		}
3270	}
3271	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3272	if (err) {
3273		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3274		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3275		if (err) {
3276			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3277			goto err_release_regions;
3278		}
3279	}
3280
3281	/* Allow large DMA segments, up to the firmware limit of 1 GB */
3282	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3283	/* Detect if this device is a virtual function */
3284	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3285		/* When acting as pf, we normally skip vfs unless explicitly
3286		 * requested to probe them.
3287		 */
3288		if (total_vfs) {
3289			unsigned vfs_offset = 0;
3290
3291			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
3292			     vfs_offset + nvfs[i] < extended_func_num(pdev);
3293			     vfs_offset += nvfs[i], i++)
3294				;
3295			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
3296				err = -ENODEV;
3297				goto err_release_regions;
3298			}
3299			if ((extended_func_num(pdev) - vfs_offset)
3300			    > prb_vf[i]) {
3301				dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3302					 extended_func_num(pdev));
3303				err = -ENODEV;
3304				goto err_release_regions;
3305			}
3306		}
3307	}
3308
3309	err = mlx4_catas_init(&priv->dev);
3310	if (err)
3311		goto err_release_regions;
3312
3313	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3314	if (err)
3315		goto err_catas;
3316
3317	return 0;
3318
3319err_catas:
3320	mlx4_catas_end(&priv->dev);
3321
3322err_release_regions:
3323	pci_release_regions(pdev);
3324
3325err_disable_pdev:
3326	pci_disable_device(pdev);
3327	pci_set_drvdata(pdev, NULL);
3328	return err;
3329}
3330
3331static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3332{
3333	struct mlx4_priv *priv;
3334	struct mlx4_dev *dev;
3335	int ret;
3336
3337	printk_once(KERN_INFO "%s", mlx4_version);
3338
3339	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
3340	if (!priv)
3341		return -ENOMEM;
3342
3343	dev       = &priv->dev;
3344	dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3345	if (!dev->persist) {
3346		kfree(priv);
3347		return -ENOMEM;
3348	}
3349	dev->persist->pdev = pdev;
3350	dev->persist->dev = dev;
3351	pci_set_drvdata(pdev, dev->persist);
3352	priv->pci_dev_data = id->driver_data;
3353	mutex_init(&dev->persist->device_state_mutex);
3354	mutex_init(&dev->persist->interface_state_mutex);
3355
3356	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3357	if (ret) {
3358		kfree(dev->persist);
3359		kfree(priv);
3360	} else {
3361		pci_save_state(pdev);
3362	}
3363
3364	return ret;
3365}
3366
3367static void mlx4_clean_dev(struct mlx4_dev *dev)
3368{
3369	struct mlx4_dev_persistent *persist = dev->persist;
3370	struct mlx4_priv *priv = mlx4_priv(dev);
3371	unsigned long	flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
3372
3373	memset(priv, 0, sizeof(*priv));
3374	priv->dev.persist = persist;
3375	priv->dev.flags = flags;
3376}
3377
3378static void mlx4_unload_one(struct pci_dev *pdev)
3379{
3380	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3381	struct mlx4_dev  *dev  = persist->dev;
3382	struct mlx4_priv *priv = mlx4_priv(dev);
3383	int               pci_dev_data;
3384	int p, i;
3385
3386	if (priv->removed)
3387		return;
3388
3389	/* saving current ports type for further use */
3390	for (i = 0; i < dev->caps.num_ports; i++) {
3391		dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
3392		dev->persist->curr_port_poss_type[i] = dev->caps.
3393						       possible_type[i + 1];
3394	}
3395
3396	pci_dev_data = priv->pci_dev_data;
3397
3398	mlx4_stop_sense(dev);
3399	mlx4_unregister_device(dev);
3400
3401	for (p = 1; p <= dev->caps.num_ports; p++) {
3402		mlx4_cleanup_port_info(&priv->port[p]);
3403		mlx4_CLOSE_PORT(dev, p);
3404	}
3405
3406	if (mlx4_is_master(dev))
3407		mlx4_free_resource_tracker(dev,
3408					   RES_TR_FREE_SLAVES_ONLY);
3409
3410	mlx4_cleanup_counters_table(dev);
3411	mlx4_cleanup_qp_table(dev);
3412	mlx4_cleanup_srq_table(dev);
3413	mlx4_cleanup_cq_table(dev);
3414	mlx4_cmd_use_polling(dev);
3415	mlx4_cleanup_eq_table(dev);
3416	mlx4_cleanup_mcg_table(dev);
3417	mlx4_cleanup_mr_table(dev);
3418	mlx4_cleanup_xrcd_table(dev);
3419	mlx4_cleanup_pd_table(dev);
3420
3421	if (mlx4_is_master(dev))
3422		mlx4_free_resource_tracker(dev,
3423					   RES_TR_FREE_STRUCTS_ONLY);
3424
3425	iounmap(priv->kar);
3426	mlx4_uar_free(dev, &priv->driver_uar);
3427	mlx4_cleanup_uar_table(dev);
3428	if (!mlx4_is_slave(dev))
3429		mlx4_clear_steering(dev);
3430	mlx4_free_eq_table(dev);
3431	if (mlx4_is_master(dev))
3432		mlx4_multi_func_cleanup(dev);
3433	mlx4_close_hca(dev);
3434	mlx4_close_fw(dev);
3435	if (mlx4_is_slave(dev))
3436		mlx4_multi_func_cleanup(dev);
3437	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3438
3439	if (dev->flags & MLX4_FLAG_MSI_X)
3440		pci_disable_msix(pdev);
3441
3442	if (!mlx4_is_slave(dev))
3443		mlx4_free_ownership(dev);
3444
3445	kfree(dev->caps.qp0_qkey);
3446	kfree(dev->caps.qp0_tunnel);
3447	kfree(dev->caps.qp0_proxy);
3448	kfree(dev->caps.qp1_tunnel);
3449	kfree(dev->caps.qp1_proxy);
3450	kfree(dev->dev_vfs);
3451
3452	mlx4_clean_dev(dev);
3453	priv->pci_dev_data = pci_dev_data;
3454	priv->removed = 1;
3455}
3456
3457static void mlx4_remove_one(struct pci_dev *pdev)
3458{
3459	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3460	struct mlx4_dev  *dev  = persist->dev;
3461	struct mlx4_priv *priv = mlx4_priv(dev);
3462	int active_vfs = 0;
3463
3464	mutex_lock(&persist->interface_state_mutex);
3465	persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
3466	mutex_unlock(&persist->interface_state_mutex);
3467
3468	/* Disabling SR-IOV is not allowed while there are active vf's */
3469	if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
3470		active_vfs = mlx4_how_many_lives_vf(dev);
3471		if (active_vfs) {
3472			pr_warn("Removing PF when there are active VF's !!\n");
3473			pr_warn("Will not disable SR-IOV.\n");
3474		}
3475	}
3476
3477	/* device marked to be under deletion running now without the lock
3478	 * letting other tasks to be terminated
3479	 */
3480	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3481		mlx4_unload_one(pdev);
3482	else
3483		mlx4_info(dev, "%s: interface is down\n", __func__);
3484	mlx4_catas_end(dev);
3485	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
3486		mlx4_warn(dev, "Disabling SR-IOV\n");
3487		pci_disable_sriov(pdev);
3488	}
3489
3490	pci_release_regions(pdev);
3491	pci_disable_device(pdev);
3492	kfree(dev->persist);
3493	kfree(priv);
3494	pci_set_drvdata(pdev, NULL);
3495}
3496
3497static int restore_current_port_types(struct mlx4_dev *dev,
3498				      enum mlx4_port_type *types,
3499				      enum mlx4_port_type *poss_types)
3500{
3501	struct mlx4_priv *priv = mlx4_priv(dev);
3502	int err, i;
3503
3504	mlx4_stop_sense(dev);
3505
3506	mutex_lock(&priv->port_mutex);
3507	for (i = 0; i < dev->caps.num_ports; i++)
3508		dev->caps.possible_type[i + 1] = poss_types[i];
3509	err = mlx4_change_port_types(dev, types);
3510	mlx4_start_sense(dev);
3511	mutex_unlock(&priv->port_mutex);
3512
3513	return err;
3514}
3515
3516int mlx4_restart_one(struct pci_dev *pdev)
3517{
3518	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3519	struct mlx4_dev	 *dev  = persist->dev;
3520	struct mlx4_priv *priv = mlx4_priv(dev);
3521	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3522	int pci_dev_data, err, total_vfs;
3523
3524	pci_dev_data = priv->pci_dev_data;
3525	total_vfs = dev->persist->num_vfs;
3526	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3527
3528	mlx4_unload_one(pdev);
3529	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
3530	if (err) {
3531		mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
3532			 __func__, pci_name(pdev), err);
3533		return err;
3534	}
3535
3536	err = restore_current_port_types(dev, dev->persist->curr_port_type,
3537					 dev->persist->curr_port_poss_type);
3538	if (err)
3539		mlx4_err(dev, "could not restore original port types (%d)\n",
3540			 err);
3541
3542	return err;
3543}
3544
3545static const struct pci_device_id mlx4_pci_table[] = {
3546	/* MT25408 "Hermon" SDR */
3547	{ PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3548	/* MT25408 "Hermon" DDR */
3549	{ PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3550	/* MT25408 "Hermon" QDR */
3551	{ PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3552	/* MT25408 "Hermon" DDR PCIe gen2 */
3553	{ PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3554	/* MT25408 "Hermon" QDR PCIe gen2 */
3555	{ PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3556	/* MT25408 "Hermon" EN 10GigE */
3557	{ PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3558	/* MT25408 "Hermon" EN 10GigE PCIe gen2 */
3559	{ PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3560	/* MT25458 ConnectX EN 10GBASE-T 10GigE */
3561	{ PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3562	/* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
3563	{ PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3564	/* MT26468 ConnectX EN 10GigE PCIe gen2*/
3565	{ PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3566	/* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
3567	{ PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3568	/* MT26478 ConnectX2 40GigE PCIe gen2 */
3569	{ PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3570	/* MT25400 Family [ConnectX-2 Virtual Function] */
3571	{ PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
3572	/* MT27500 Family [ConnectX-3] */
3573	{ PCI_VDEVICE(MELLANOX, 0x1003), 0 },
3574	/* MT27500 Family [ConnectX-3 Virtual Function] */
3575	{ PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
3576	{ PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
3577	{ PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
3578	{ PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
3579	{ PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
3580	{ PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
3581	{ PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
3582	{ PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
3583	{ PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
3584	{ PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
3585	{ PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
3586	{ PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
3587	{ PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
3588	{ 0, }
3589};
3590
3591MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
3592
3593static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
3594					      pci_channel_state_t state)
3595{
3596	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3597
3598	mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
3599	mlx4_enter_error_state(persist);
3600
3601	mutex_lock(&persist->interface_state_mutex);
3602	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3603		mlx4_unload_one(pdev);
3604
3605	mutex_unlock(&persist->interface_state_mutex);
3606	if (state == pci_channel_io_perm_failure)
3607		return PCI_ERS_RESULT_DISCONNECT;
3608
3609	pci_disable_device(pdev);
3610	return PCI_ERS_RESULT_NEED_RESET;
3611}
3612
3613static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
3614{
3615	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3616	struct mlx4_dev	 *dev  = persist->dev;
3617	struct mlx4_priv *priv = mlx4_priv(dev);
3618	int               ret;
3619	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3620	int total_vfs;
3621
3622	mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
3623	ret = pci_enable_device(pdev);
3624	if (ret) {
3625		mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret);
3626		return PCI_ERS_RESULT_DISCONNECT;
3627	}
3628
3629	pci_set_master(pdev);
3630	pci_restore_state(pdev);
3631	pci_save_state(pdev);
3632
3633	total_vfs = dev->persist->num_vfs;
3634	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3635
3636	mutex_lock(&persist->interface_state_mutex);
3637	if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
3638		ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
3639				    priv, 1);
3640		if (ret) {
3641			mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
3642				 __func__,  ret);
3643			goto end;
3644		}
3645
3646		ret = restore_current_port_types(dev, dev->persist->
3647						 curr_port_type, dev->persist->
3648						 curr_port_poss_type);
3649		if (ret)
3650			mlx4_err(dev, "could not restore original port types (%d)\n", ret);
3651	}
3652end:
3653	mutex_unlock(&persist->interface_state_mutex);
3654
3655	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
3656}
3657
3658static void mlx4_shutdown(struct pci_dev *pdev)
3659{
3660	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3661
3662	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
3663	mutex_lock(&persist->interface_state_mutex);
3664	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3665		mlx4_unload_one(pdev);
3666	mutex_unlock(&persist->interface_state_mutex);
3667}
3668
3669static const struct pci_error_handlers mlx4_err_handler = {
3670	.error_detected = mlx4_pci_err_detected,
3671	.slot_reset     = mlx4_pci_slot_reset,
3672};
3673
3674static struct pci_driver mlx4_driver = {
3675	.name		= DRV_NAME,
3676	.id_table	= mlx4_pci_table,
3677	.probe		= mlx4_init_one,
3678	.shutdown	= mlx4_shutdown,
3679	.remove		= mlx4_remove_one,
3680	.err_handler    = &mlx4_err_handler,
3681};
3682
3683static int __init mlx4_verify_params(void)
3684{
3685	if ((log_num_mac < 0) || (log_num_mac > 7)) {
3686		pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
3687		return -1;
3688	}
3689
3690	if (log_num_vlan != 0)
3691		pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
3692			MLX4_LOG_NUM_VLANS);
3693
3694	if (use_prio != 0)
3695		pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
3696
3697	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
3698		pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
3699			log_mtts_per_seg);
3700		return -1;
3701	}
3702
3703	/* Check if module param for ports type has legal combination */
3704	if (port_type_array[0] == false && port_type_array[1] == true) {
3705		pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
3706		port_type_array[0] = true;
3707	}
3708
3709	if (mlx4_log_num_mgm_entry_size < -7 ||
3710	    (mlx4_log_num_mgm_entry_size > 0 &&
3711	     (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
3712	      mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
3713		pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
3714			mlx4_log_num_mgm_entry_size,
3715			MLX4_MIN_MGM_LOG_ENTRY_SIZE,
3716			MLX4_MAX_MGM_LOG_ENTRY_SIZE);
3717		return -1;
3718	}
3719
3720	return 0;
3721}
3722
3723static int __init mlx4_init(void)
3724{
3725	int ret;
3726
3727	if (mlx4_verify_params())
3728		return -EINVAL;
3729
3730
3731	mlx4_wq = create_singlethread_workqueue("mlx4");
3732	if (!mlx4_wq)
3733		return -ENOMEM;
3734
3735	ret = pci_register_driver(&mlx4_driver);
3736	if (ret < 0)
3737		destroy_workqueue(mlx4_wq);
3738	return ret < 0 ? ret : 0;
3739}
3740
3741static void __exit mlx4_cleanup(void)
3742{
3743	pci_unregister_driver(&mlx4_driver);
3744	destroy_workqueue(mlx4_wq);
3745}
3746
3747module_init(mlx4_init);
3748module_exit(mlx4_cleanup);
3749