1/*
2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34#include <net/busy_poll.h>
35#include <linux/mlx4/cq.h>
36#include <linux/slab.h>
37#include <linux/mlx4/qp.h>
38#include <linux/skbuff.h>
39#include <linux/rculist.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/vmalloc.h>
43#include <linux/irq.h>
44
45#if IS_ENABLED(CONFIG_IPV6)
46#include <net/ip6_checksum.h>
47#endif
48
49#include "mlx4_en.h"
50
51static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
52			    struct mlx4_en_rx_alloc *page_alloc,
53			    const struct mlx4_en_frag_info *frag_info,
54			    gfp_t _gfp)
55{
56	int order;
57	struct page *page;
58	dma_addr_t dma;
59
60	for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
61		gfp_t gfp = _gfp;
62
63		if (order)
64			gfp |= __GFP_COMP | __GFP_NOWARN;
65		page = alloc_pages(gfp, order);
66		if (likely(page))
67			break;
68		if (--order < 0 ||
69		    ((PAGE_SIZE << order) < frag_info->frag_size))
70			return -ENOMEM;
71	}
72	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
73			   PCI_DMA_FROMDEVICE);
74	if (dma_mapping_error(priv->ddev, dma)) {
75		put_page(page);
76		return -ENOMEM;
77	}
78	page_alloc->page_size = PAGE_SIZE << order;
79	page_alloc->page = page;
80	page_alloc->dma = dma;
81	page_alloc->page_offset = 0;
82	/* Not doing get_page() for each frag is a big win
83	 * on asymetric workloads. Note we can not use atomic_set().
84	 */
85	atomic_add(page_alloc->page_size / frag_info->frag_stride - 1,
86		   &page->_count);
87	return 0;
88}
89
90static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
91			       struct mlx4_en_rx_desc *rx_desc,
92			       struct mlx4_en_rx_alloc *frags,
93			       struct mlx4_en_rx_alloc *ring_alloc,
94			       gfp_t gfp)
95{
96	struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
97	const struct mlx4_en_frag_info *frag_info;
98	struct page *page;
99	dma_addr_t dma;
100	int i;
101
102	for (i = 0; i < priv->num_frags; i++) {
103		frag_info = &priv->frag_info[i];
104		page_alloc[i] = ring_alloc[i];
105		page_alloc[i].page_offset += frag_info->frag_stride;
106
107		if (page_alloc[i].page_offset + frag_info->frag_stride <=
108		    ring_alloc[i].page_size)
109			continue;
110
111		if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
112			goto out;
113	}
114
115	for (i = 0; i < priv->num_frags; i++) {
116		frags[i] = ring_alloc[i];
117		dma = ring_alloc[i].dma + ring_alloc[i].page_offset;
118		ring_alloc[i] = page_alloc[i];
119		rx_desc->data[i].addr = cpu_to_be64(dma);
120	}
121
122	return 0;
123
124out:
125	while (i--) {
126		if (page_alloc[i].page != ring_alloc[i].page) {
127			dma_unmap_page(priv->ddev, page_alloc[i].dma,
128				page_alloc[i].page_size, PCI_DMA_FROMDEVICE);
129			page = page_alloc[i].page;
130			atomic_set(&page->_count, 1);
131			put_page(page);
132		}
133	}
134	return -ENOMEM;
135}
136
137static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
138			      struct mlx4_en_rx_alloc *frags,
139			      int i)
140{
141	const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
142	u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
143
144
145	if (next_frag_end > frags[i].page_size)
146		dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
147			       PCI_DMA_FROMDEVICE);
148
149	if (frags[i].page)
150		put_page(frags[i].page);
151}
152
153static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
154				  struct mlx4_en_rx_ring *ring)
155{
156	int i;
157	struct mlx4_en_rx_alloc *page_alloc;
158
159	for (i = 0; i < priv->num_frags; i++) {
160		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
161
162		if (mlx4_alloc_pages(priv, &ring->page_alloc[i],
163				     frag_info, GFP_KERNEL | __GFP_COLD))
164			goto out;
165
166		en_dbg(DRV, priv, "  frag %d allocator: - size:%d frags:%d\n",
167		       i, ring->page_alloc[i].page_size,
168		       atomic_read(&ring->page_alloc[i].page->_count));
169	}
170	return 0;
171
172out:
173	while (i--) {
174		struct page *page;
175
176		page_alloc = &ring->page_alloc[i];
177		dma_unmap_page(priv->ddev, page_alloc->dma,
178			       page_alloc->page_size, PCI_DMA_FROMDEVICE);
179		page = page_alloc->page;
180		atomic_set(&page->_count, 1);
181		put_page(page);
182		page_alloc->page = NULL;
183	}
184	return -ENOMEM;
185}
186
187static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
188				      struct mlx4_en_rx_ring *ring)
189{
190	struct mlx4_en_rx_alloc *page_alloc;
191	int i;
192
193	for (i = 0; i < priv->num_frags; i++) {
194		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
195
196		page_alloc = &ring->page_alloc[i];
197		en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
198		       i, page_count(page_alloc->page));
199
200		dma_unmap_page(priv->ddev, page_alloc->dma,
201				page_alloc->page_size, PCI_DMA_FROMDEVICE);
202		while (page_alloc->page_offset + frag_info->frag_stride <
203		       page_alloc->page_size) {
204			put_page(page_alloc->page);
205			page_alloc->page_offset += frag_info->frag_stride;
206		}
207		page_alloc->page = NULL;
208	}
209}
210
211static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
212				 struct mlx4_en_rx_ring *ring, int index)
213{
214	struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
215	int possible_frags;
216	int i;
217
218	/* Set size and memtype fields */
219	for (i = 0; i < priv->num_frags; i++) {
220		rx_desc->data[i].byte_count =
221			cpu_to_be32(priv->frag_info[i].frag_size);
222		rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
223	}
224
225	/* If the number of used fragments does not fill up the ring stride,
226	 * remaining (unused) fragments must be padded with null address/size
227	 * and a special memory key */
228	possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
229	for (i = priv->num_frags; i < possible_frags; i++) {
230		rx_desc->data[i].byte_count = 0;
231		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
232		rx_desc->data[i].addr = 0;
233	}
234}
235
236static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
237				   struct mlx4_en_rx_ring *ring, int index,
238				   gfp_t gfp)
239{
240	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
241	struct mlx4_en_rx_alloc *frags = ring->rx_info +
242					(index << priv->log_rx_info);
243
244	return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
245}
246
247static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring)
248{
249	BUG_ON((u32)(ring->prod - ring->cons) > ring->actual_size);
250	return ring->prod == ring->cons;
251}
252
253static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
254{
255	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
256}
257
258static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
259				 struct mlx4_en_rx_ring *ring,
260				 int index)
261{
262	struct mlx4_en_rx_alloc *frags;
263	int nr;
264
265	frags = ring->rx_info + (index << priv->log_rx_info);
266	for (nr = 0; nr < priv->num_frags; nr++) {
267		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
268		mlx4_en_free_frag(priv, frags, nr);
269	}
270}
271
272static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
273{
274	struct mlx4_en_rx_ring *ring;
275	int ring_ind;
276	int buf_ind;
277	int new_size;
278
279	for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
280		for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
281			ring = priv->rx_ring[ring_ind];
282
283			if (mlx4_en_prepare_rx_desc(priv, ring,
284						    ring->actual_size,
285						    GFP_KERNEL | __GFP_COLD)) {
286				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
287					en_err(priv, "Failed to allocate enough rx buffers\n");
288					return -ENOMEM;
289				} else {
290					new_size = rounddown_pow_of_two(ring->actual_size);
291					en_warn(priv, "Only %d buffers allocated reducing ring size to %d\n",
292						ring->actual_size, new_size);
293					goto reduce_rings;
294				}
295			}
296			ring->actual_size++;
297			ring->prod++;
298		}
299	}
300	return 0;
301
302reduce_rings:
303	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
304		ring = priv->rx_ring[ring_ind];
305		while (ring->actual_size > new_size) {
306			ring->actual_size--;
307			ring->prod--;
308			mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
309		}
310	}
311
312	return 0;
313}
314
315static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
316				struct mlx4_en_rx_ring *ring)
317{
318	int index;
319
320	en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
321	       ring->cons, ring->prod);
322
323	/* Unmap and free Rx buffers */
324	while (!mlx4_en_is_ring_empty(ring)) {
325		index = ring->cons & ring->size_mask;
326		en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
327		mlx4_en_free_rx_desc(priv, ring, index);
328		++ring->cons;
329	}
330}
331
332void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
333{
334	int i;
335	int num_of_eqs;
336	int num_rx_rings;
337	struct mlx4_dev *dev = mdev->dev;
338
339	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
340		if (!dev->caps.comp_pool)
341			num_of_eqs = max_t(int, MIN_RX_RINGS,
342					   min_t(int,
343						 dev->caps.num_comp_vectors,
344						 DEF_RX_RINGS));
345		else
346			num_of_eqs = min_t(int, MAX_MSIX_P_PORT,
347					   dev->caps.comp_pool/
348					   dev->caps.num_ports) - 1;
349
350		num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS :
351			min_t(int, num_of_eqs,
352			      netif_get_num_default_rss_queues());
353		mdev->profile.prof[i].rx_ring_num =
354			rounddown_pow_of_two(num_rx_rings);
355	}
356}
357
358int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
359			   struct mlx4_en_rx_ring **pring,
360			   u32 size, u16 stride, int node)
361{
362	struct mlx4_en_dev *mdev = priv->mdev;
363	struct mlx4_en_rx_ring *ring;
364	int err = -ENOMEM;
365	int tmp;
366
367	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
368	if (!ring) {
369		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
370		if (!ring) {
371			en_err(priv, "Failed to allocate RX ring structure\n");
372			return -ENOMEM;
373		}
374	}
375
376	ring->prod = 0;
377	ring->cons = 0;
378	ring->size = size;
379	ring->size_mask = size - 1;
380	ring->stride = stride;
381	ring->log_stride = ffs(ring->stride) - 1;
382	ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
383
384	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
385					sizeof(struct mlx4_en_rx_alloc));
386	ring->rx_info = vmalloc_node(tmp, node);
387	if (!ring->rx_info) {
388		ring->rx_info = vmalloc(tmp);
389		if (!ring->rx_info) {
390			err = -ENOMEM;
391			goto err_ring;
392		}
393	}
394
395	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
396		 ring->rx_info, tmp);
397
398	/* Allocate HW buffers on provided NUMA node */
399	set_dev_node(&mdev->dev->persist->pdev->dev, node);
400	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
401				 ring->buf_size, 2 * PAGE_SIZE);
402	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
403	if (err)
404		goto err_info;
405
406	err = mlx4_en_map_buffer(&ring->wqres.buf);
407	if (err) {
408		en_err(priv, "Failed to map RX buffer\n");
409		goto err_hwq;
410	}
411	ring->buf = ring->wqres.buf.direct.buf;
412
413	ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;
414
415	*pring = ring;
416	return 0;
417
418err_hwq:
419	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
420err_info:
421	vfree(ring->rx_info);
422	ring->rx_info = NULL;
423err_ring:
424	kfree(ring);
425	*pring = NULL;
426
427	return err;
428}
429
430int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
431{
432	struct mlx4_en_rx_ring *ring;
433	int i;
434	int ring_ind;
435	int err;
436	int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
437					DS_SIZE * priv->num_frags);
438
439	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
440		ring = priv->rx_ring[ring_ind];
441
442		ring->prod = 0;
443		ring->cons = 0;
444		ring->actual_size = 0;
445		ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
446
447		ring->stride = stride;
448		if (ring->stride <= TXBB_SIZE)
449			ring->buf += TXBB_SIZE;
450
451		ring->log_stride = ffs(ring->stride) - 1;
452		ring->buf_size = ring->size * ring->stride;
453
454		memset(ring->buf, 0, ring->buf_size);
455		mlx4_en_update_rx_prod_db(ring);
456
457		/* Initialize all descriptors */
458		for (i = 0; i < ring->size; i++)
459			mlx4_en_init_rx_desc(priv, ring, i);
460
461		/* Initialize page allocators */
462		err = mlx4_en_init_allocator(priv, ring);
463		if (err) {
464			en_err(priv, "Failed initializing ring allocator\n");
465			if (ring->stride <= TXBB_SIZE)
466				ring->buf -= TXBB_SIZE;
467			ring_ind--;
468			goto err_allocator;
469		}
470	}
471	err = mlx4_en_fill_rx_buffers(priv);
472	if (err)
473		goto err_buffers;
474
475	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
476		ring = priv->rx_ring[ring_ind];
477
478		ring->size_mask = ring->actual_size - 1;
479		mlx4_en_update_rx_prod_db(ring);
480	}
481
482	return 0;
483
484err_buffers:
485	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
486		mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]);
487
488	ring_ind = priv->rx_ring_num - 1;
489err_allocator:
490	while (ring_ind >= 0) {
491		if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE)
492			priv->rx_ring[ring_ind]->buf -= TXBB_SIZE;
493		mlx4_en_destroy_allocator(priv, priv->rx_ring[ring_ind]);
494		ring_ind--;
495	}
496	return err;
497}
498
499/* We recover from out of memory by scheduling our napi poll
500 * function (mlx4_en_process_cq), which tries to allocate
501 * all missing RX buffers (call to mlx4_en_refill_rx_buffers).
502 */
503void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
504{
505	int ring;
506
507	if (!priv->port_up)
508		return;
509
510	for (ring = 0; ring < priv->rx_ring_num; ring++) {
511		if (mlx4_en_is_ring_empty(priv->rx_ring[ring]))
512			napi_reschedule(&priv->rx_cq[ring]->napi);
513	}
514}
515
516void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
517			     struct mlx4_en_rx_ring **pring,
518			     u32 size, u16 stride)
519{
520	struct mlx4_en_dev *mdev = priv->mdev;
521	struct mlx4_en_rx_ring *ring = *pring;
522
523	mlx4_en_unmap_buffer(&ring->wqres.buf);
524	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
525	vfree(ring->rx_info);
526	ring->rx_info = NULL;
527	kfree(ring);
528	*pring = NULL;
529#ifdef CONFIG_RFS_ACCEL
530	mlx4_en_cleanup_filters(priv);
531#endif
532}
533
534void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
535				struct mlx4_en_rx_ring *ring)
536{
537	mlx4_en_free_rx_buf(priv, ring);
538	if (ring->stride <= TXBB_SIZE)
539		ring->buf -= TXBB_SIZE;
540	mlx4_en_destroy_allocator(priv, ring);
541}
542
543
544static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
545				    struct mlx4_en_rx_desc *rx_desc,
546				    struct mlx4_en_rx_alloc *frags,
547				    struct sk_buff *skb,
548				    int length)
549{
550	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
551	struct mlx4_en_frag_info *frag_info;
552	int nr;
553	dma_addr_t dma;
554
555	/* Collect used fragments while replacing them in the HW descriptors */
556	for (nr = 0; nr < priv->num_frags; nr++) {
557		frag_info = &priv->frag_info[nr];
558		if (length <= frag_info->frag_prefix_size)
559			break;
560		if (!frags[nr].page)
561			goto fail;
562
563		dma = be64_to_cpu(rx_desc->data[nr].addr);
564		dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
565					DMA_FROM_DEVICE);
566
567		/* Save page reference in skb */
568		__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
569		skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
570		skb_frags_rx[nr].page_offset = frags[nr].page_offset;
571		skb->truesize += frag_info->frag_stride;
572		frags[nr].page = NULL;
573	}
574	/* Adjust size of last fragment to match actual length */
575	if (nr > 0)
576		skb_frag_size_set(&skb_frags_rx[nr - 1],
577			length - priv->frag_info[nr - 1].frag_prefix_size);
578	return nr;
579
580fail:
581	while (nr > 0) {
582		nr--;
583		__skb_frag_unref(&skb_frags_rx[nr]);
584	}
585	return 0;
586}
587
588
589static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
590				      struct mlx4_en_rx_desc *rx_desc,
591				      struct mlx4_en_rx_alloc *frags,
592				      unsigned int length)
593{
594	struct sk_buff *skb;
595	void *va;
596	int used_frags;
597	dma_addr_t dma;
598
599	skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
600	if (!skb) {
601		en_dbg(RX_ERR, priv, "Failed allocating skb\n");
602		return NULL;
603	}
604	skb_reserve(skb, NET_IP_ALIGN);
605	skb->len = length;
606
607	/* Get pointer to first fragment so we could copy the headers into the
608	 * (linear part of the) skb */
609	va = page_address(frags[0].page) + frags[0].page_offset;
610
611	if (length <= SMALL_PACKET_SIZE) {
612		/* We are copying all relevant data to the skb - temporarily
613		 * sync buffers for the copy */
614		dma = be64_to_cpu(rx_desc->data[0].addr);
615		dma_sync_single_for_cpu(priv->ddev, dma, length,
616					DMA_FROM_DEVICE);
617		skb_copy_to_linear_data(skb, va, length);
618		skb->tail += length;
619	} else {
620		unsigned int pull_len;
621
622		/* Move relevant fragments to skb */
623		used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags,
624							skb, length);
625		if (unlikely(!used_frags)) {
626			kfree_skb(skb);
627			return NULL;
628		}
629		skb_shinfo(skb)->nr_frags = used_frags;
630
631		pull_len = eth_get_headlen(va, SMALL_PACKET_SIZE);
632		/* Copy headers into the skb linear buffer */
633		memcpy(skb->data, va, pull_len);
634		skb->tail += pull_len;
635
636		/* Skip headers in first fragment */
637		skb_shinfo(skb)->frags[0].page_offset += pull_len;
638
639		/* Adjust size of first fragment */
640		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], pull_len);
641		skb->data_len = length - pull_len;
642	}
643	return skb;
644}
645
646static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
647{
648	int i;
649	int offset = ETH_HLEN;
650
651	for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
652		if (*(skb->data + offset) != (unsigned char) (i & 0xff))
653			goto out_loopback;
654	}
655	/* Loopback found */
656	priv->loopback_ok = 1;
657
658out_loopback:
659	dev_kfree_skb_any(skb);
660}
661
662static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
663				     struct mlx4_en_rx_ring *ring)
664{
665	int index = ring->prod & ring->size_mask;
666
667	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
668		if (mlx4_en_prepare_rx_desc(priv, ring, index,
669					    GFP_ATOMIC | __GFP_COLD))
670			break;
671		ring->prod++;
672		index = ring->prod & ring->size_mask;
673	}
674}
675
676/* When hardware doesn't strip the vlan, we need to calculate the checksum
677 * over it and add it to the hardware's checksum calculation
678 */
679static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum,
680					 struct vlan_hdr *vlanh)
681{
682	return csum_add(hw_checksum, *(__wsum *)vlanh);
683}
684
685/* Although the stack expects checksum which doesn't include the pseudo
686 * header, the HW adds it. To address that, we are subtracting the pseudo
687 * header checksum from the checksum value provided by the HW.
688 */
689static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
690				struct iphdr *iph)
691{
692	__u16 length_for_csum = 0;
693	__wsum csum_pseudo_header = 0;
694
695	length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2));
696	csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr,
697						length_for_csum, iph->protocol, 0);
698	skb->csum = csum_sub(hw_checksum, csum_pseudo_header);
699}
700
701#if IS_ENABLED(CONFIG_IPV6)
702/* In IPv6 packets, besides subtracting the pseudo header checksum,
703 * we also compute/add the IP header checksum which
704 * is not added by the HW.
705 */
706static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
707			       struct ipv6hdr *ipv6h)
708{
709	__wsum csum_pseudo_hdr = 0;
710
711	if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
712		return -1;
713	hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8));
714
715	csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
716				       sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
717	csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len);
718	csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr));
719
720	skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
721	skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0));
722	return 0;
723}
724#endif
725static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
726		      netdev_features_t dev_features)
727{
728	__wsum hw_checksum = 0;
729
730	void *hdr = (u8 *)va + sizeof(struct ethhdr);
731
732	hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
733
734	if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) &&
735	    !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) {
736		hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
737		hdr += sizeof(struct vlan_hdr);
738	}
739
740	if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4))
741		get_fixed_ipv4_csum(hw_checksum, skb, hdr);
742#if IS_ENABLED(CONFIG_IPV6)
743	else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
744		if (get_fixed_ipv6_csum(hw_checksum, skb, hdr))
745			return -1;
746#endif
747	return 0;
748}
749
750int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
751{
752	struct mlx4_en_priv *priv = netdev_priv(dev);
753	struct mlx4_en_dev *mdev = priv->mdev;
754	struct mlx4_cqe *cqe;
755	struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
756	struct mlx4_en_rx_alloc *frags;
757	struct mlx4_en_rx_desc *rx_desc;
758	struct sk_buff *skb;
759	int index;
760	int nr;
761	unsigned int length;
762	int polled = 0;
763	int ip_summed;
764	int factor = priv->cqe_factor;
765	u64 timestamp;
766	bool l2_tunnel;
767
768	if (!priv->port_up)
769		return 0;
770
771	if (budget <= 0)
772		return polled;
773
774	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
775	 * descriptor offset can be deduced from the CQE index instead of
776	 * reading 'cqe->index' */
777	index = cq->mcq.cons_index & ring->size_mask;
778	cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
779
780	/* Process all completed CQEs */
781	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
782		    cq->mcq.cons_index & cq->size)) {
783
784		frags = ring->rx_info + (index << priv->log_rx_info);
785		rx_desc = ring->buf + (index << ring->log_stride);
786
787		/*
788		 * make sure we read the CQE after we read the ownership bit
789		 */
790		dma_rmb();
791
792		/* Drop packet on bad receive or bad checksum */
793		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
794						MLX4_CQE_OPCODE_ERROR)) {
795			en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
796			       ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
797			       ((struct mlx4_err_cqe *)cqe)->syndrome);
798			goto next;
799		}
800		if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
801			en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
802			goto next;
803		}
804
805		/* Check if we need to drop the packet if SRIOV is not enabled
806		 * and not performing the selftest or flb disabled
807		 */
808		if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) {
809			struct ethhdr *ethh;
810			dma_addr_t dma;
811			/* Get pointer to first fragment since we haven't
812			 * skb yet and cast it to ethhdr struct
813			 */
814			dma = be64_to_cpu(rx_desc->data[0].addr);
815			dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
816						DMA_FROM_DEVICE);
817			ethh = (struct ethhdr *)(page_address(frags[0].page) +
818						 frags[0].page_offset);
819
820			if (is_multicast_ether_addr(ethh->h_dest)) {
821				struct mlx4_mac_entry *entry;
822				struct hlist_head *bucket;
823				unsigned int mac_hash;
824
825				/* Drop the packet, since HW loopback-ed it */
826				mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
827				bucket = &priv->mac_hash[mac_hash];
828				rcu_read_lock();
829				hlist_for_each_entry_rcu(entry, bucket, hlist) {
830					if (ether_addr_equal_64bits(entry->mac,
831								    ethh->h_source)) {
832						rcu_read_unlock();
833						goto next;
834					}
835				}
836				rcu_read_unlock();
837			}
838		}
839
840		/*
841		 * Packet is OK - process it.
842		 */
843		length = be32_to_cpu(cqe->byte_cnt);
844		length -= ring->fcs_del;
845		ring->bytes += length;
846		ring->packets++;
847		l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
848			(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
849
850		if (likely(dev->features & NETIF_F_RXCSUM)) {
851			if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
852						      MLX4_CQE_STATUS_UDP)) {
853				if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
854				    cqe->checksum == cpu_to_be16(0xffff)) {
855					ip_summed = CHECKSUM_UNNECESSARY;
856					ring->csum_ok++;
857				} else {
858					ip_summed = CHECKSUM_NONE;
859					ring->csum_none++;
860				}
861			} else {
862				if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
863				    (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
864							       MLX4_CQE_STATUS_IPV6))) {
865					ip_summed = CHECKSUM_COMPLETE;
866					ring->csum_complete++;
867				} else {
868					ip_summed = CHECKSUM_NONE;
869					ring->csum_none++;
870				}
871			}
872		} else {
873			ip_summed = CHECKSUM_NONE;
874			ring->csum_none++;
875		}
876
877		/* This packet is eligible for GRO if it is:
878		 * - DIX Ethernet (type interpretation)
879		 * - TCP/IP (v4)
880		 * - without IP options
881		 * - not an IP fragment
882		 * - no LLS polling in progress
883		 */
884		if (!mlx4_en_cq_busy_polling(cq) &&
885		    (dev->features & NETIF_F_GRO)) {
886			struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
887			if (!gro_skb)
888				goto next;
889
890			nr = mlx4_en_complete_rx_desc(priv,
891				rx_desc, frags, gro_skb,
892				length);
893			if (!nr)
894				goto next;
895
896			if (ip_summed == CHECKSUM_COMPLETE) {
897				void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
898				if (check_csum(cqe, gro_skb, va,
899					       dev->features)) {
900					ip_summed = CHECKSUM_NONE;
901					ring->csum_none++;
902					ring->csum_complete--;
903				}
904			}
905
906			skb_shinfo(gro_skb)->nr_frags = nr;
907			gro_skb->len = length;
908			gro_skb->data_len = length;
909			gro_skb->ip_summed = ip_summed;
910
911			if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
912				gro_skb->csum_level = 1;
913
914			if ((cqe->vlan_my_qpn &
915			    cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
916			    (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
917				u16 vid = be16_to_cpu(cqe->sl_vid);
918
919				__vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid);
920			}
921
922			if (dev->features & NETIF_F_RXHASH)
923				skb_set_hash(gro_skb,
924					     be32_to_cpu(cqe->immed_rss_invalid),
925					     PKT_HASH_TYPE_L3);
926
927			skb_record_rx_queue(gro_skb, cq->ring);
928			skb_mark_napi_id(gro_skb, &cq->napi);
929
930			if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
931				timestamp = mlx4_en_get_cqe_ts(cqe);
932				mlx4_en_fill_hwtstamps(mdev,
933						       skb_hwtstamps(gro_skb),
934						       timestamp);
935			}
936
937			napi_gro_frags(&cq->napi);
938			goto next;
939		}
940
941		/* GRO not possible, complete processing here */
942		skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
943		if (!skb) {
944			priv->stats.rx_dropped++;
945			goto next;
946		}
947
948                if (unlikely(priv->validate_loopback)) {
949			validate_loopback(priv, skb);
950			goto next;
951		}
952
953		if (ip_summed == CHECKSUM_COMPLETE) {
954			if (check_csum(cqe, skb, skb->data, dev->features)) {
955				ip_summed = CHECKSUM_NONE;
956				ring->csum_complete--;
957				ring->csum_none++;
958			}
959		}
960
961		skb->ip_summed = ip_summed;
962		skb->protocol = eth_type_trans(skb, dev);
963		skb_record_rx_queue(skb, cq->ring);
964
965		if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
966			skb->csum_level = 1;
967
968		if (dev->features & NETIF_F_RXHASH)
969			skb_set_hash(skb,
970				     be32_to_cpu(cqe->immed_rss_invalid),
971				     PKT_HASH_TYPE_L3);
972
973		if ((be32_to_cpu(cqe->vlan_my_qpn) &
974		    MLX4_CQE_VLAN_PRESENT_MASK) &&
975		    (dev->features & NETIF_F_HW_VLAN_CTAG_RX))
976			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid));
977
978		if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
979			timestamp = mlx4_en_get_cqe_ts(cqe);
980			mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb),
981					       timestamp);
982		}
983
984		skb_mark_napi_id(skb, &cq->napi);
985
986		if (!mlx4_en_cq_busy_polling(cq))
987			napi_gro_receive(&cq->napi, skb);
988		else
989			netif_receive_skb(skb);
990
991next:
992		for (nr = 0; nr < priv->num_frags; nr++)
993			mlx4_en_free_frag(priv, frags, nr);
994
995		++cq->mcq.cons_index;
996		index = (cq->mcq.cons_index) & ring->size_mask;
997		cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
998		if (++polled == budget)
999			goto out;
1000	}
1001
1002out:
1003	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
1004	mlx4_cq_set_ci(&cq->mcq);
1005	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
1006	ring->cons = cq->mcq.cons_index;
1007	mlx4_en_refill_rx_buffers(priv, ring);
1008	mlx4_en_update_rx_prod_db(ring);
1009	return polled;
1010}
1011
1012
1013void mlx4_en_rx_irq(struct mlx4_cq *mcq)
1014{
1015	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
1016	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
1017
1018	if (likely(priv->port_up))
1019		napi_schedule_irqoff(&cq->napi);
1020	else
1021		mlx4_en_arm_cq(priv, cq);
1022}
1023
1024/* Rx CQ polling - called by NAPI */
1025int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
1026{
1027	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
1028	struct net_device *dev = cq->dev;
1029	struct mlx4_en_priv *priv = netdev_priv(dev);
1030	int done;
1031
1032	if (!mlx4_en_cq_lock_napi(cq))
1033		return budget;
1034
1035	done = mlx4_en_process_rx_cq(dev, cq, budget);
1036
1037	mlx4_en_cq_unlock_napi(cq);
1038
1039	/* If we used up all the quota - we're probably not done yet... */
1040	if (done == budget) {
1041		int cpu_curr;
1042		const struct cpumask *aff;
1043
1044		INC_PERF_COUNTER(priv->pstats.napi_quota);
1045
1046		cpu_curr = smp_processor_id();
1047		aff = irq_desc_get_irq_data(cq->irq_desc)->affinity;
1048
1049		if (likely(cpumask_test_cpu(cpu_curr, aff)))
1050			return budget;
1051
1052		/* Current cpu is not according to smp_irq_affinity -
1053		 * probably affinity changed. need to stop this NAPI
1054		 * poll, and restart it on the right CPU
1055		 */
1056		done = 0;
1057	}
1058	/* Done for now */
1059	napi_complete_done(napi, done);
1060	mlx4_en_arm_cq(priv, cq);
1061	return done;
1062}
1063
1064static const int frag_sizes[] = {
1065	FRAG_SZ0,
1066	FRAG_SZ1,
1067	FRAG_SZ2,
1068	FRAG_SZ3
1069};
1070
1071void mlx4_en_calc_rx_buf(struct net_device *dev)
1072{
1073	struct mlx4_en_priv *priv = netdev_priv(dev);
1074	int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN;
1075	int buf_size = 0;
1076	int i = 0;
1077
1078	while (buf_size < eff_mtu) {
1079		priv->frag_info[i].frag_size =
1080			(eff_mtu > buf_size + frag_sizes[i]) ?
1081				frag_sizes[i] : eff_mtu - buf_size;
1082		priv->frag_info[i].frag_prefix_size = buf_size;
1083		priv->frag_info[i].frag_stride =
1084				ALIGN(priv->frag_info[i].frag_size,
1085				      SMP_CACHE_BYTES);
1086		buf_size += priv->frag_info[i].frag_size;
1087		i++;
1088	}
1089
1090	priv->num_frags = i;
1091	priv->rx_skb_size = eff_mtu;
1092	priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc));
1093
1094	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d num_frags:%d):\n",
1095	       eff_mtu, priv->num_frags);
1096	for (i = 0; i < priv->num_frags; i++) {
1097		en_err(priv,
1098		       "  frag:%d - size:%d prefix:%d stride:%d\n",
1099		       i,
1100		       priv->frag_info[i].frag_size,
1101		       priv->frag_info[i].frag_prefix_size,
1102		       priv->frag_info[i].frag_stride);
1103	}
1104}
1105
1106/* RSS related functions */
1107
1108static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
1109				 struct mlx4_en_rx_ring *ring,
1110				 enum mlx4_qp_state *state,
1111				 struct mlx4_qp *qp)
1112{
1113	struct mlx4_en_dev *mdev = priv->mdev;
1114	struct mlx4_qp_context *context;
1115	int err = 0;
1116
1117	context = kmalloc(sizeof(*context), GFP_KERNEL);
1118	if (!context)
1119		return -ENOMEM;
1120
1121	err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
1122	if (err) {
1123		en_err(priv, "Failed to allocate qp #%x\n", qpn);
1124		goto out;
1125	}
1126	qp->event = mlx4_en_sqp_event;
1127
1128	memset(context, 0, sizeof *context);
1129	mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
1130				qpn, ring->cqn, -1, context);
1131	context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
1132
1133	/* Cancel FCS removal if FW allows */
1134	if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
1135		context->param3 |= cpu_to_be32(1 << 29);
1136		if (priv->dev->features & NETIF_F_RXFCS)
1137			ring->fcs_del = 0;
1138		else
1139			ring->fcs_del = ETH_FCS_LEN;
1140	} else
1141		ring->fcs_del = 0;
1142
1143	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
1144	if (err) {
1145		mlx4_qp_remove(mdev->dev, qp);
1146		mlx4_qp_free(mdev->dev, qp);
1147	}
1148	mlx4_en_update_rx_prod_db(ring);
1149out:
1150	kfree(context);
1151	return err;
1152}
1153
1154int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
1155{
1156	int err;
1157	u32 qpn;
1158
1159	err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn,
1160				    MLX4_RESERVE_A0_QP);
1161	if (err) {
1162		en_err(priv, "Failed reserving drop qpn\n");
1163		return err;
1164	}
1165	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
1166	if (err) {
1167		en_err(priv, "Failed allocating drop qp\n");
1168		mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
1169		return err;
1170	}
1171
1172	return 0;
1173}
1174
1175void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv)
1176{
1177	u32 qpn;
1178
1179	qpn = priv->drop_qp.qpn;
1180	mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp);
1181	mlx4_qp_free(priv->mdev->dev, &priv->drop_qp);
1182	mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
1183}
1184
1185/* Allocate rx qp's and configure them according to rss map */
1186int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
1187{
1188	struct mlx4_en_dev *mdev = priv->mdev;
1189	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
1190	struct mlx4_qp_context context;
1191	struct mlx4_rss_context *rss_context;
1192	int rss_rings;
1193	void *ptr;
1194	u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
1195			MLX4_RSS_TCP_IPV6);
1196	int i, qpn;
1197	int err = 0;
1198	int good_qps = 0;
1199
1200	en_dbg(DRV, priv, "Configuring rss steering\n");
1201	err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
1202				    priv->rx_ring_num,
1203				    &rss_map->base_qpn, 0);
1204	if (err) {
1205		en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
1206		return err;
1207	}
1208
1209	for (i = 0; i < priv->rx_ring_num; i++) {
1210		qpn = rss_map->base_qpn + i;
1211		err = mlx4_en_config_rss_qp(priv, qpn, priv->rx_ring[i],
1212					    &rss_map->state[i],
1213					    &rss_map->qps[i]);
1214		if (err)
1215			goto rss_err;
1216
1217		++good_qps;
1218	}
1219
1220	/* Configure RSS indirection qp */
1221	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
1222	if (err) {
1223		en_err(priv, "Failed to allocate RSS indirection QP\n");
1224		goto rss_err;
1225	}
1226	rss_map->indir_qp.event = mlx4_en_sqp_event;
1227	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
1228				priv->rx_ring[0]->cqn, -1, &context);
1229
1230	if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
1231		rss_rings = priv->rx_ring_num;
1232	else
1233		rss_rings = priv->prof->rss_rings;
1234
1235	ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
1236					+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
1237	rss_context = ptr;
1238	rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
1239					    (rss_map->base_qpn));
1240	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
1241	if (priv->mdev->profile.udp_rss) {
1242		rss_mask |=  MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
1243		rss_context->base_qpn_udp = rss_context->default_qpn;
1244	}
1245
1246	if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
1247		en_info(priv, "Setting RSS context tunnel type to RSS on inner headers\n");
1248		rss_mask |= MLX4_RSS_BY_INNER_HEADERS;
1249	}
1250
1251	rss_context->flags = rss_mask;
1252	rss_context->hash_fn = MLX4_RSS_HASH_TOP;
1253	if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) {
1254		rss_context->hash_fn = MLX4_RSS_HASH_XOR;
1255	} else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) {
1256		rss_context->hash_fn = MLX4_RSS_HASH_TOP;
1257		memcpy(rss_context->rss_key, priv->rss_key,
1258		       MLX4_EN_RSS_KEY_SIZE);
1259	} else {
1260		en_err(priv, "Unknown RSS hash function requested\n");
1261		err = -EINVAL;
1262		goto indir_err;
1263	}
1264	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
1265			       &rss_map->indir_qp, &rss_map->indir_state);
1266	if (err)
1267		goto indir_err;
1268
1269	return 0;
1270
1271indir_err:
1272	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
1273		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
1274	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
1275	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
1276rss_err:
1277	for (i = 0; i < good_qps; i++) {
1278		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
1279			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
1280		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
1281		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
1282	}
1283	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
1284	return err;
1285}
1286
1287void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
1288{
1289	struct mlx4_en_dev *mdev = priv->mdev;
1290	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
1291	int i;
1292
1293	mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
1294		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
1295	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
1296	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
1297
1298	for (i = 0; i < priv->rx_ring_num; i++) {
1299		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
1300			       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
1301		mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
1302		mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
1303	}
1304	mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
1305}
1306