/******************************************************************************

  Copyright (c) 2013-2019, Intel Corporation
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

   1. Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.

   2. Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

   3. Neither the name of the Intel Corporation nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  POSSIBILITY OF SUCH DAMAGE.

******************************************************************************/
/*$FreeBSD$*/

/*
**	IXL driver TX/RX Routines:
**	    This was seperated to allow usage by
** 	    both the PF and VF drivers.
*/

#ifndef IXL_STANDALONE_BUILD
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_rss.h"
#endif

#include "ixl.h"

#ifdef RSS
#include <net/rss_config.h>
#endif

#ifdef CSUM_ENCAP_VXLAN
#include <net/if_vxlan.h>
#endif

/* Local Prototypes */
static void	ixl_rx_checksum(struct ixl_queue *, struct mbuf *, u32, u32, u8);
static void	ixl_refresh_mbufs(struct ixl_queue *, int);
static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
static void	ixl_tx_setup_offload(struct ixl_tx_offload_info *, u32 *, u32 *);
static void	ixl_ctxd_setup(struct ixl_queue *, struct i40e_tx_context_desc *,
				struct ixl_tx_offload_info *, u32);
static int	ixl_parse_packet_header(struct mbuf **,
					struct ixl_tx_offload_info *);
static void	ixl_queue_sw_irq(struct ixl_vsi *, int);

static inline void ixl_rx_discard(struct rx_ring *, int);
static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
		    struct mbuf *, u8);

static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
static inline u32 ixl_get_tx_head(struct ixl_queue *que);

#ifdef DEV_NETMAP
#include <dev/netmap/if_ixl_netmap.h>
#if __FreeBSD_version >= 1200000
int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
#endif
#endif /* DEV_NETMAP */

#ifdef IXL_DEBUG
static int	ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS);
static int	ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS);
#endif

/*
 * @key key is saved into this parameter
 */
void
ixl_get_default_rss_key(u32 *key)
{
	MPASS(key != NULL);

	u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
	    0x183cfd8c, 0xce880440, 0x580cbc3c,
	    0x35897377, 0x328b25e1, 0x4fa98922,
	    0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
	    0x0, 0x0, 0x0};

	bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
}

/**
 * i40e_vc_stat_str - convert virtchnl status err code to a string
 * @hw: pointer to the HW structure
 * @stat_err: the status error code to convert
 **/
const char *
i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
{
	switch (stat_err) {
	case VIRTCHNL_STATUS_SUCCESS:
		return "OK";
	case VIRTCHNL_ERR_PARAM:
		return "VIRTCHNL_ERR_PARAM";
	case VIRTCHNL_STATUS_ERR_NO_MEMORY:
		return "VIRTCHNL_STATUS_ERR_NO_MEMORY";
	case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
		return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
	case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
		return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
	case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
		return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
	case VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR:
		return "VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR";
	case VIRTCHNL_STATUS_ERR_NOT_SUPPORTED:
		return "VIRTCHNL_STATUS_ERR_NOT_SUPPORTED";
	}

	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
	return hw->err_str;
}

/*
 * Rewrite the ENABLE bit in the MSI-X control register
 */
void
ixl_set_msix_enable(device_t dev)
{
	int msix_ctrl, rid;

	pci_find_cap(dev, PCIY_MSIX, &rid);
	rid += PCIR_MSIX_CTRL;
	msix_ctrl = pci_read_config(dev, rid, 2);
	msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
	pci_write_config(dev, rid, msix_ctrl, 2);
}


/*
** Multiqueue Transmit driver
*/
int
ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
{
	struct ixl_vsi		*vsi = ifp->if_softc;
	struct ixl_queue	*que;
	struct tx_ring		*txr;
	int 			err, i;
#ifdef RSS
	u32			bucket_id;
#endif

	/*
	 * Which queue to use:
	 *
	 * When doing RSS, map it to the same outbound
	 * queue as the incoming flow would be mapped to.
	 * If everything is setup correctly, it should be
	 * the same bucket that the current CPU we're on is.
	 */
	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
#ifdef  RSS
		if (rss_hash2bucket(m->m_pkthdr.flowid,
		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
			i = bucket_id % vsi->num_queues;
                } else
#endif
                        i = m->m_pkthdr.flowid % vsi->num_queues;
        } else
		i = curcpu % vsi->num_queues;

	que = &vsi->queues[i];
	txr = &que->txr;

	err = drbr_enqueue(ifp, txr->br, m);
	if (err)
		return (err);
	if (IXL_TX_TRYLOCK(txr)) {
		ixl_mq_start_locked(ifp, txr);
		IXL_TX_UNLOCK(txr);
	} else
		taskqueue_enqueue(que->tq, &que->tx_task);

	return (0);
}

void
ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
{
	struct ixl_queue	*que = txr->que;
	struct ixl_vsi		*vsi = que->vsi;
        struct mbuf		*next;
        int			err = 0;


	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
	    vsi->link_active == 0)
		return;

	/* Process the transmit queue */
	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
		if ((err = ixl_xmit(que, &next)) != 0) {
			if (next == NULL)
				drbr_advance(ifp, txr->br);
			else
				drbr_putback(ifp, txr->br, next);
			break;
		}
		drbr_advance(ifp, txr->br);
		/* Send a copy of the frame to the BPF listener */
		ETHER_BPF_MTAP(ifp, next);
		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
			break;
	}

	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
		ixl_txeof(que);
}

/*
 * Called from a taskqueue to drain queued transmit packets.
 */
void
ixl_deferred_mq_start(void *arg, int pending)
{
	struct ixl_queue	*que = arg;
        struct tx_ring		*txr = &que->txr;
	struct ixl_vsi		*vsi = que->vsi;
        struct ifnet		*ifp = vsi->ifp;
        
	IXL_TX_LOCK(txr);
	if (!drbr_empty(ifp, txr->br))
		ixl_mq_start_locked(ifp, txr);
	IXL_TX_UNLOCK(txr);
}

/*
** Flush all queue ring buffers
*/
void
ixl_qflush(struct ifnet *ifp)
{
	struct ixl_vsi	*vsi = ifp->if_softc;

        for (int i = 0; i < vsi->num_queues; i++) {
		struct ixl_queue *que = &vsi->queues[i];
		struct tx_ring	*txr = &que->txr;
		struct mbuf	*m;
		IXL_TX_LOCK(txr);
		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
			m_freem(m);
		IXL_TX_UNLOCK(txr);
	}
	if_qflush(ifp);
}

static inline bool
ixl_tso_detect_sparse(struct mbuf *mp)
{
	struct mbuf	*m;
	int		num, mss;

	num = 0;
	mss = mp->m_pkthdr.tso_segsz;

	/* Exclude first mbuf; assume it contains all headers */
	for (m = mp->m_next; m != NULL; m = m->m_next) {
		num++;
		mss -= m->m_len % mp->m_pkthdr.tso_segsz;

		if (num > IXL_SPARSE_CHAIN)
			return (true);
		if (mss < 1) {
			num = (mss == 0) ? 0 : 1;
			mss += mp->m_pkthdr.tso_segsz;
		}
	}

	return (false);
}


/*********************************************************************
 *
 *  This routine maps the mbufs to tx descriptors, allowing the
 *  TX engine to transmit the packets. 
 *  	- return 0 on success, positive on failure
 *
 **********************************************************************/
#define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)

static int
ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
{
	struct ixl_vsi		*vsi = que->vsi;
	struct i40e_hw		*hw = vsi->hw;
	struct tx_ring		*txr = &que->txr;
	struct ixl_tx_buf	*buf;
	struct i40e_tx_desc	*txd = NULL;
	struct mbuf		*m_head, *m;
	int             	i, j, error, nsegs;
	int			first, last = 0;
	u16			vtag = 0;
	u32			cmd, off, csum_flags;
	bus_dmamap_t		map;
	bus_dma_tag_t		tag;
	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
	struct ixl_tx_offload_info ti;

	cmd = off = 0;
	m_head = *m_headp;

        /*
         * Important to capture the first descriptor
         * used because it will contain the index of
         * the one we tell the hardware to report back
         */
        first = txr->next_avail;
	buf = &txr->buffers[first];
	map = buf->map;
	tag = txr->tx_tag;

	if (m_head->m_pkthdr.csum_flags & IXL_CSUM_TSO) {
		/* Use larger mapping for TSO */
		tag = txr->tso_tag;
		if (ixl_tso_detect_sparse(m_head)) {
			m = m_defrag(m_head, M_NOWAIT);
			if (m == NULL) {
				m_freem(*m_headp);
				*m_headp = NULL;
				return (ENOBUFS);
			}
			*m_headp = m;
		}
	}

	error = ixl_parse_packet_header(m_headp, &ti);
	if (error)
		goto xmit_fail;

	/*
	 * Map the packet for DMA.
	 */
	error = bus_dmamap_load_mbuf_sg(tag, map,
	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);

	if (error == EFBIG) {
		struct mbuf *m;

		m = m_defrag(*m_headp, M_NOWAIT);
		if (m == NULL) {
			que->mbuf_defrag_failed++;
			m_freem(*m_headp);
			*m_headp = NULL;
			return (ENOBUFS);
		}
		*m_headp = m;

		/* Try it again */
		error = bus_dmamap_load_mbuf_sg(tag, map,
		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);

		if (error != 0) {
			que->tx_dmamap_failed++;
			m_freem(*m_headp);
			*m_headp = NULL;
			return (error);
		}
	} else if (error != 0) {
		que->tx_dmamap_failed++;
		m_freem(*m_headp);
		*m_headp = NULL;
		return (error);
	}

	/* Make certain there are enough descriptors */
	if (nsegs > txr->avail - 2) {
		txr->no_desc++;
		error = ENOBUFS;
		goto xmit_fail;
	}
	m_head = *m_headp;

	csum_flags = m_head->m_pkthdr.csum_flags;
	i = txr->next_avail;
	/* Set up the TSO/CSUM offload */
	if (csum_flags & CSUM_OFFLOAD) {
		/* Set up the context descriptor if required */
		if (IXL_NEEDS_CTXD(csum_flags)) {
			ixl_ctxd_setup(que,
			    (struct i40e_tx_context_desc *)&txr->base[i],
			    &ti, m_head->m_pkthdr.tso_segsz);

			buf->m_head = NULL;
			buf->eop_index = -1;

			if (++i == que->num_tx_desc)
				i = 0;
			txr->avail--;
		}
		ixl_tx_setup_offload(&ti, &cmd, &off);
	}
	/* Grab the VLAN tag */
	if (m_head->m_flags & M_VLANTAG) {
		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
		vtag = htole16(m_head->m_pkthdr.ether_vtag);
	}


	cmd |= I40E_TX_DESC_CMD_ICRC;
	for (j = 0; j < nsegs; j++) {
		bus_size_t seglen;

		buf = &txr->buffers[i];
		buf->tag = tag; /* Keep track of the type tag */
		txd = &txr->base[i];
		seglen = segs[j].ds_len;

		txd->buffer_addr = htole64(segs[j].ds_addr);
		txd->cmd_type_offset_bsz =
		    htole64(I40E_TX_DESC_DTYPE_DATA
		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));

		last = i; /* descriptor that will get completion IRQ */

		if (++i == que->num_tx_desc)
			i = 0;

		buf->m_head = NULL;
		buf->eop_index = -1;
	}
	/* Set the last descriptor for report */
	txd->cmd_type_offset_bsz |=
	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
	txr->avail -= nsegs;
	txr->next_avail = i;

	buf->m_head = m_head;
	/* Swap the dma map between the first and last descriptor.
	 * The descriptor that gets checked on completion will now
	 * have the real map from the first descriptor.
	 */
	txr->buffers[first].map = buf->map;
	buf->map = map;
	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);

        /* Set the index of the descriptor that will be marked done */
        buf = &txr->buffers[first];
	buf->eop_index = last;

        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
	/*
	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
	 * hardware that this frame is available to transmit.
	 */
	++txr->total_packets;
	wr32(hw, txr->tail, i);

	/* Mark outstanding work */
	atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
	return (0);

xmit_fail:
	bus_dmamap_unload(tag, buf->map);
	return (error);
}


/*********************************************************************
 *
 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
 *  the information needed to transmit a packet on the wire. This is
 *  called only once at attach, setup is done every reset.
 *
 **********************************************************************/
int
ixl_allocate_tx_data(struct ixl_queue *que)
{
	struct tx_ring		*txr = &que->txr;
	struct ixl_vsi		*vsi = que->vsi;
	device_t		dev = vsi->dev;
	struct ixl_tx_buf	*buf;
	int			i, error = 0;

	/*
	 * Setup DMA descriptor areas.
	 */
	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
			       1, 0,			/* alignment, bounds */
			       BUS_SPACE_MAXADDR,	/* lowaddr */
			       BUS_SPACE_MAXADDR,	/* highaddr */
			       NULL, NULL,		/* filter, filterarg */
			       IXL_TSO_SIZE,		/* maxsize */
			       IXL_MAX_TX_SEGS,		/* nsegments */
			       IXL_MAX_DMA_SEG_SIZE,	/* maxsegsize */
			       0,			/* flags */
			       NULL,			/* lockfunc */
			       NULL,			/* lockfuncarg */
			       &txr->tx_tag))) {
		device_printf(dev,"Unable to allocate TX DMA tag\n");
		return (error);
	}

	/* Make a special tag for TSO */
	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
			       1, 0,			/* alignment, bounds */
			       BUS_SPACE_MAXADDR,	/* lowaddr */
			       BUS_SPACE_MAXADDR,	/* highaddr */
			       NULL, NULL,		/* filter, filterarg */
			       IXL_TSO_SIZE,		/* maxsize */
			       IXL_MAX_TSO_SEGS,	/* nsegments */
			       IXL_MAX_DMA_SEG_SIZE,	/* maxsegsize */
			       0,			/* flags */
			       NULL,			/* lockfunc */
			       NULL,			/* lockfuncarg */
			       &txr->tso_tag))) {
		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
		goto free_tx_dma;
	}

	if (!(txr->buffers =
	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
	    que->num_tx_desc, M_IXL, M_NOWAIT | M_ZERO))) {
		device_printf(dev, "Unable to allocate tx_buffer memory\n");
		error = ENOMEM;
		goto free_tx_tso_dma;
	}

        /* Create the descriptor buffer default dma maps */
	buf = txr->buffers;
	for (i = 0; i < que->num_tx_desc; i++, buf++) {
		buf->tag = txr->tx_tag;
		error = bus_dmamap_create(buf->tag, 0, &buf->map);
		if (error != 0) {
			device_printf(dev, "Unable to create TX DMA map\n");
			goto free_buffers;
		}
	}

	return 0;

free_buffers:
	while (i--) {
		buf--;
		bus_dmamap_destroy(buf->tag, buf->map);
	}

	free(txr->buffers, M_IXL);
	txr->buffers = NULL;
free_tx_tso_dma:
	bus_dma_tag_destroy(txr->tso_tag);
	txr->tso_tag = NULL;
free_tx_dma:
	bus_dma_tag_destroy(txr->tx_tag);
	txr->tx_tag = NULL;

	return (error);
}


/*********************************************************************
 *
 *  (Re)Initialize a queue transmit ring.
 *	- called by init, it clears the descriptor ring,
 *	  and frees any stale mbufs 
 *
 **********************************************************************/
void
ixl_init_tx_ring(struct ixl_queue *que)
{
#ifdef DEV_NETMAP
	struct netmap_adapter *na = NA(que->vsi->ifp);
	struct netmap_slot *slot;
#endif /* DEV_NETMAP */
	struct tx_ring		*txr = &que->txr;
	struct ixl_tx_buf	*buf;

	/* Clear the old ring contents */
	IXL_TX_LOCK(txr);

#ifdef DEV_NETMAP
	/*
	 * (under lock): if in netmap mode, do some consistency
	 * checks and set slot to entry 0 of the netmap ring.
	 */
	slot = netmap_reset(na, NR_TX, que->me, 0);
#endif /* DEV_NETMAP */

	bzero((void *)txr->base,
	      (sizeof(struct i40e_tx_desc)) * que->num_tx_desc);

	/* Reset indices */
	txr->next_avail = 0;
	txr->next_to_clean = 0;

	/* Reset watchdog status */
	txr->watchdog_timer = 0;

	/* Free any existing tx mbufs. */
        buf = txr->buffers;
	for (int i = 0; i < que->num_tx_desc; i++, buf++) {
		if (buf->m_head != NULL) {
			bus_dmamap_sync(buf->tag, buf->map,
			    BUS_DMASYNC_POSTWRITE);
			bus_dmamap_unload(buf->tag, buf->map);
			m_freem(buf->m_head);
			buf->m_head = NULL;
		}
#ifdef DEV_NETMAP
		/*
		 * In netmap mode, set the map for the packet buffer.
		 * NOTE: Some drivers (not this one) also need to set
		 * the physical buffer address in the NIC ring.
		 * netmap_idx_n2k() maps a nic index, i, into the corresponding
		 * netmap slot index, si
		 */
		if (slot) {
#if ((__FreeBSD_version >= 1102505 && __FreeBSD_version < 1200000) || \
    __FreeBSD_version >= 1200062)
			int si = netmap_idx_n2k(na->tx_rings[que->me], i);
#else
			int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
#endif
			netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
		}
#endif /* DEV_NETMAP */
		/* Clear the EOP index */
		buf->eop_index = -1;
        }

	/* Set number of descriptors available */
	txr->avail = que->num_tx_desc;

	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
	IXL_TX_UNLOCK(txr);
}


/*********************************************************************
 *
 *  Free transmit ring related data structures.
 *
 **********************************************************************/
void
ixl_free_que_tx(struct ixl_queue *que)
{
	struct tx_ring *txr = &que->txr;
	struct ixl_tx_buf *buf;

	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);

	for (int i = 0; i < que->num_tx_desc; i++) {
		buf = &txr->buffers[i];
		if (buf->m_head != NULL) {
			bus_dmamap_sync(buf->tag, buf->map,
			    BUS_DMASYNC_POSTWRITE);
			m_freem(buf->m_head);
			buf->m_head = NULL;
			}
		bus_dmamap_unload(buf->tag, buf->map);
		bus_dmamap_destroy(buf->tag, buf->map);
	}
	if (txr->buffers != NULL) {
		free(txr->buffers, M_IXL);
		txr->buffers = NULL;
	}
	if (txr->tx_tag != NULL) {
		bus_dma_tag_destroy(txr->tx_tag);
		txr->tx_tag = NULL;
	}
	if (txr->tso_tag != NULL) {
		bus_dma_tag_destroy(txr->tso_tag);
		txr->tso_tag = NULL;
	}

	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
	return;
}

/**
 * ixl_parse_packet_header - Parse header info and store it in ixl_tx_offload_info
 * @mp: the mbuf chain to parse
 * @pi: TX offload info structure
 *
 * Extract relevant packet information from the given mbuf, and fill in the
 * ixl_tx_offload_info. The structure is cleared and overwritten. Other code will
 * then use the this info for handling hardware offloads.
 * Pointer to the mbuf chain may be modified if not all packet headers
 * are located in first mbuf and m_pullup has to be used.
 *
 * On success, returns zero. On error, a non-zero error code is returned.
 */
static int
ixl_parse_packet_header(struct mbuf **mp, struct ixl_tx_offload_info *ti)
{
	struct ether_vlan_header *eh;
	struct mbuf *m = *mp;
	caddr_t l3h, l4h;
#ifdef CSUM_ENCAP_VXLAN
	caddr_t outer_l3h;
	u8 ehdrlen;
#endif
	memset(ti, 0, sizeof(*ti));

	/* We only need to parse the remaining bits if we have a VLAN tag, or
	 * if an offload was enabled.
	 */
	if (__predict_false(!m->m_pkthdr.csum_flags && !(m->m_flags & M_VLANTAG)))
		return (0);

#ifdef CSUM_ENCAP_VXLAN
	if (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
		/*
		 * Assume smallest possible frame. May need to do another
		 * pullup after parsing headers.
		 */
		size_t minlen = 2 * ETHER_HDR_LEN + sizeof(struct ip) +
		    sizeof(struct udphdr) + sizeof(struct vxlan_header);

		if (m->m_pkthdr.csum_flags & CSUM_INNER_IP_TCP)
			minlen += sizeof(struct ip) + sizeof(struct tcphdr);
		else if (m->m_pkthdr.csum_flags & CSUM_INNER_IP_UDP)
			minlen += sizeof(struct ip) + sizeof(struct udphdr);
		else if (m->m_pkthdr.csum_flags & CSUM_INNER_IP)
			minlen += sizeof(struct ip);
		else if (m->m_pkthdr.csum_flags & CSUM_INNER_IP6_TCP)
			minlen += sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
		else if (m->m_pkthdr.csum_flags & CSUM_INNER_IP6_UDP)
			minlen += sizeof(struct ip6_hdr) + sizeof(struct udphdr);

		if (m->m_len < minlen) {
			/*
			 * If MTU on VxLAN interface is misconfigured and does
			 * not leave room for tunneling headers then frames may
			 * be fragmented in unexpected way. Make sure to not
			 * try to pull up too much.
			 */
			if (minlen > m->m_pkthdr.len)
				minlen = m->m_pkthdr.len;
			m = m_pullup(m, minlen);
			if (m == NULL)
				return (ENOMEM);
			*mp = m;
		}
	}
#endif

	ti->csum_flags = m->m_pkthdr.csum_flags;
	ti->plen = m->m_pkthdr.len;
	/*
	 * Determine where frame payload starts.
	 * Jump over VLAN headers if already present.
	 */
	eh = mtod(m, struct ether_vlan_header *);
	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
		ti->etype = ntohs(eh->evl_proto);
		ti->ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
	} else {
		ti->etype = ntohs(eh->evl_encap_proto);
		ti->ehdrlen = ETHER_HDR_LEN;
	}
	l3h = (caddr_t)eh + ti->ehdrlen;

	switch (ti->etype) {
#ifdef INET
	case ETHERTYPE_IP:
	{
		struct ip *ip;
		struct tcphdr *th;

		if (__predict_false((size_t)m->m_len < ti->ehdrlen + sizeof(*ip))) {
			m = m_pullup(m, ti->ehdrlen + sizeof(*ip));
			if (m == NULL)
				return (ENOMEM);
			*mp = m;
			/* Update pointers as first mbuf may be re-allocated */
			l3h = m->m_data + ti->ehdrlen;
		}

		ip = (struct ip *)(l3h);
		ti->ip_hlen = ip->ip_hl << 2;
		ti->ipproto = ip->ip_p;
		l4h = l3h + ti->ip_hlen;

		/* TCP checksum offload requires the TCP header length */
		if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO)) {
			if (__predict_false(ti->ipproto != IPPROTO_TCP))
				return (ENXIO);

			/* We could attempt to m_pullup here */
			if (__predict_false((size_t)m->m_len < ti->ehdrlen + ti->ip_hlen + sizeof(*th))) {
				m = m_pullup(m, ti->ehdrlen + ti->ip_hlen + sizeof(*th));
				if (m == NULL)
					return (ENOMEM);
				*mp = m;
				/* Update pointers as first mbuf may be re-allocated */
				l3h = m->m_data + ti->ehdrlen;
				l4h = l3h + ti->ip_hlen;
			}
			th = (struct tcphdr *)(l4h);
			ti->l4_hlen = th->th_off << 2;

			if (m->m_pkthdr.csum_flags & CSUM_IP_TSO) {
				/* Always enable hardware checksum offload for TSO */
				ti->csum_flags |= (CSUM_IP_TCP | CSUM_IP);
				th->th_sum = in_pseudo(ip->ip_src.s_addr,
						       ip->ip_dst.s_addr, htons(IPPROTO_TCP));

				/* TSO offload expects the IP sum to be zero */
				ip->ip_sum = 0;
			}
			/*
			 * Encapsulated packets have CSUM_IP_* flags cleared.
			 * If we got here then there is no need look for inner frame.
			 */
			return (0);
		}
		break;
	}
#endif
#ifdef INET6
	case ETHERTYPE_IPV6:
	{
		struct ip6_hdr *ip6;
		struct tcphdr *th;

		if (__predict_false((size_t)m->m_len < ti->ehdrlen + sizeof(*ip6))) {
			m = m_pullup(m, ti->ehdrlen + sizeof(*ip6));
			if (m == NULL)
				return (ENOMEM);
			*mp = m;
			/* Update pointer as first mbuf may be re-allocated */
			l3h = m->m_data + ti->ehdrlen;
		}

		/* XXX: this won't support extended headers */
		ip6 = (struct ip6_hdr *)l3h;
		ti->ip_hlen = sizeof(*ip6);
		ti->ipproto = ip6->ip6_nxt;
		l4h = l3h + ti->ip_hlen;

		/* TCP checksum offload requires the TCP header length */
		if (m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) {
			if (__predict_false(ti->ipproto != IPPROTO_TCP))
				return (ENXIO);

			if (__predict_false((size_t)m->m_len < ti->ehdrlen + sizeof(*ip6) + sizeof(*th))) {
				m = m_pullup(m, ti->ehdrlen + sizeof(*ip6) + sizeof(*th));
				if (m == NULL)
					return (ENOMEM);
				*mp = m;
				/* Update pointers as first mbuf may be re-allocated */
				l3h = m->m_data + ti->ehdrlen;
				l4h = l3h + ti->ip_hlen;
			}
			th = (struct tcphdr *)(l4h);
			ti->l4_hlen = th->th_off << 2;

			if (m->m_pkthdr.csum_flags & CSUM_IP6_TSO) {
				/* Always enable hardware checksum offload for TSO */
				ti->csum_flags |= CSUM_IP6_TCP;
				th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
			}

			/*
			 * Encapsulated packets have CSUM_IP6_* flags cleared.
			 * If we got here then there is no need look for inner frame.
			 */
			return (0);
		}
		break;
	}
#endif
	default:
		ti->csum_flags &= ~CSUM_OFFLOAD;
		ti->ip_hlen = 0;
		return (0);
	}
#ifdef CSUM_ENCAP_VXLAN
	/*
	 * VXLAN HW offloads require support form if_vxlan pseudo device,
	 * which is available only if CSUM_ENCAP_VXLAN flag is defined.
	 */
	if ((m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) == 0)
		return (0);

	if (ti->ipproto != IPPROTO_UDP)
		return (ENXIO);

	outer_l3h = l3h;
	/*
	 * Instead of adding conditions to ixl_tx_setup_offload and ixl_tso_setup
	 * save outer frame info and reuse etype and ip_hlen for inner frame.
	 */
	ti->outer_etype = ti->etype;
	ti->outer_ip_hlen = ti->ip_hlen;
	ti->l4tunlen = sizeof(struct udphdr) + sizeof(struct vxlan_header);


	eh = (struct ether_vlan_header *)(l4h + ti->l4tunlen);
	/* We could attempt to m_pullup here */
	if (__predict_false((size_t)m->m_len < ((caddr_t)eh - m->m_data) + sizeof(*eh)))
		return (ENOMEM);

	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
		ti->etype = ntohs(eh->evl_proto);
		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
	} else {
		ti->etype = ntohs(eh->evl_encap_proto);
		ehdrlen = ETHER_HDR_LEN;
	}

	l3h = ((caddr_t)eh + ehdrlen);
	ti->l4tunlen += ehdrlen;

	switch (ti->etype) {
#ifdef INET
	case ETHERTYPE_IP:
	{
		struct ip *ip;
		struct tcphdr *th;

		/* We could attempt to m_pullup here */
		if (__predict_false((size_t)m->m_len < (l3h - m->m_data) + sizeof(*ip)))
			return (ENOMEM);

		ip = (struct ip *)(l3h);
		ti->ip_hlen = ip->ip_hl << 2;
		ti->ipproto = ip->ip_p;
		l4h = l3h + ti->ip_hlen;

		/* TCP checksum offload requires the TCP header length */
		if (m->m_pkthdr.csum_flags & (CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO)) {
			if (__predict_false(ti->ipproto != IPPROTO_TCP))
				return (ENXIO);

			/* We could attempt to m_pullup here */
			if (__predict_false((size_t)m->m_len < ((l4h - m->m_data) + sizeof(*th))))
				return (ENOMEM);

			th = (struct tcphdr *)l4h;
			ti->l4_hlen = th->th_off << 2;

			if (m->m_pkthdr.csum_flags & CSUM_INNER_IP_TSO) {
				/* Always enable hardware checksum offload for TSO */
				ti->csum_flags |= (CSUM_INNER_IP_TCP | CSUM_INNER_IP);
				th->th_sum = in_pseudo(ip->ip_src.s_addr,
						       ip->ip_dst.s_addr, htons(IPPROTO_TCP));

				/* TSO offload expects the IP sum to be zero */
				ip->ip_sum = 0;
			}
		}
		break;
	}
#endif
#ifdef INET6
	case ETHERTYPE_IPV6:
	{
		struct ip6_hdr *ip6;
		struct tcphdr *th;

		/* We could attempt to m_pullup here */
		if (__predict_false((size_t)m->m_len < ((l3h - m->m_data) + sizeof(*ip6))))
			return (ENOMEM);

		/* XXX: this won't support extended headers */
		ip6 = (struct ip6_hdr *)l3h;
		ti->ip_hlen = sizeof(*ip6);
		ti->ipproto = ip6->ip6_nxt;
		l4h = l3h + ti->ip_hlen;

		/* TCP checksum offload requires the TCP header length */
		if (m->m_pkthdr.csum_flags & (CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO)) {
			if (__predict_false(ti->ipproto != IPPROTO_TCP))
				return (ENXIO);

			/* We could attempt to m_pullup here */
			if (__predict_false((size_t)m->m_len < (l4h - m->m_data) + sizeof(*th)))
				return (ENOMEM);

			th = (struct tcphdr *)(l4h);
			ti->l4_hlen = th->th_off << 2;

			if (m->m_pkthdr.csum_flags & CSUM_INNER_IP6_TSO) {
				/* Always enable hardware checksum offload for TSO */
				ti->csum_flags |= CSUM_INNER_IP6_TCP;
				th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
			}
		}
		break;
	}
#endif
	default:
		break;
	}

	/* In case of TSO outer IP checksum also needs to be set to zero */
	if (ti->csum_flags & (CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO) &&
	    ti->outer_etype == ETHERTYPE_IP)
		((struct ip *)outer_l3h)->ip_sum = 0;

#endif /* CSUM_ENCAP_VXLAN */

	return (0);
}


/**
 * Setup descriptor for hw offloads
 * @ti: information extracted from packet headers
 * @cmd: command part of TX descriptor
 * @off: offset part of TX descriptor
 *
 * Configure TX descriptor with information extracted
 * from a packet header required for HW to calculate
 * requested checksum.
 */
static inline void
ixl_tx_setup_offload(struct ixl_tx_offload_info *ti, u32 *cmd, u32 *off)
{
	switch (ti->etype) {
#ifdef INET
	case ETHERTYPE_IP:
		if (ti->csum_flags & IXL_CSUM_IPV4)
			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
		else
			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
		break;
#endif
#ifdef INET6
	case ETHERTYPE_IPV6:
		*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
		break;
#endif
	default:
		break;
	}

	*off |= (ti->ehdrlen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
	*off |= (ti->ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;

	switch (ti->ipproto) {
	case IPPROTO_TCP:
		if (ti->csum_flags & IXL_CSUM_TCP) {
			*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
			*off |= (ti->l4_hlen >> 2) <<
			    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
			/* Check for NO_HEAD MDD event */
			MPASS(ti->l4_hlen != 0);
		}
		break;
	case IPPROTO_UDP:
		if (ti->csum_flags & IXL_CSUM_UDP) {
			*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
			*off |= (sizeof(struct udphdr) >> 2) <<
			    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
		}
		break;
	case IPPROTO_SCTP:
		if (ti->csum_flags & IXL_CSUM_SCTP) {
			*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
			*off |= (sizeof(struct sctphdr) >> 2) <<
			    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
		}
		/* Fall Thru */
	default:
		break;
	}
}


#if defined(INET6) || defined(INET)
/**
 * Setup context descriptor for TSO or VXLAN Offload
 * @que: TX queue which handles transmission
 * @TXD: descriptor to be configured
 * @ti: information extracted from packet headers
 * @mss: maximum segment size
 *
 * Configure TX descriptor with information extracted
 * from a packet header required for HW to calculate
 * requested checksum.
 *
 */
static inline void
ixl_ctxd_setup(struct ixl_queue *que, struct i40e_tx_context_desc *TXD,
	struct ixl_tx_offload_info *ti, u32 mss)
{
	u64 type_cmd_tso_mss =
	    (u64)I40E_TX_DESC_DTYPE_CONTEXT << I40E_TXD_CTX_QW1_DTYPE_SHIFT;

	if (ti->csum_flags & IXL_CSUM_TSO) {
		u32 cmd, tsolen, total_hdr_len;
		/*
		 * TSO MSS must not be less than 64; this prevents a
		 * BAD_LSO_MSS MDD event when the MSS is too small.
		 */
		if (mss < IXL_MIN_TSO_MSS) {
			que->mss_too_small++;
			mss = IXL_MIN_TSO_MSS;
		}

		cmd = I40E_TX_CTX_DESC_TSO;
#ifdef CSUM_ENCAP_VXLAN
		total_hdr_len = ti->ehdrlen + ti->ip_hlen + ti->l4_hlen +
		    ti->outer_ip_hlen + ti->l4tunlen;
#else
		total_hdr_len = ti->ehdrlen + ti->ip_hlen + ti->l4_hlen;
#endif
		tsolen = ti->plen - total_hdr_len;

		/* Check for BAD_LS0_MSS MDD event (mss too large) */
		MPASS(mss <= IXL_MAX_TSO_MSS);
		/* Check for NO_HEAD MDD event (header lengths are 0) */
		MPASS(ti->ehdrlen != 0);
		MPASS(ti->ip_hlen != 0);
		/* Partial check for BAD_LSO_LEN MDD event */
		MPASS(tsolen != 0);
		/* Partial check for WRONG_SIZE MDD event (during TSO) */
		MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);

		type_cmd_tso_mss |=
		    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
		    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
		    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
		que->tso++;
	}
	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);

	TXD->tunneling_params = htole32(0);
#ifdef CSUM_ENCAP_VXLAN
	if (ti->csum_flags & CSUM_ENCAP_VXLAN) {
		u32 tun_params = I40E_TXD_CTX_UDP_TUNNELING;
		switch (ti->outer_etype) {
		case ETHERTYPE_IP:
			if (ti->csum_flags & CSUM_INNER_IP_TSO)
				tun_params |= I40E_TX_CTX_EXT_IP_IPV4;
			else
				tun_params |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
			break;
		case ETHERTYPE_IPV6:
			tun_params |= I40E_TX_CTX_EXT_IP_IPV6;
			break;
		default:
			break;
		}
		tun_params |=
		    (ti->outer_ip_hlen >> 2) << I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
		    (ti->l4tunlen >> 1) << I40E_TXD_CTX_QW0_NATLEN_SHIFT;
		TXD->tunneling_params = htole32(tun_params);
		que->tx_vxlan++;
	}
#endif
}
#endif

/*
 * ixl_get_tx_head - Retrieve the value from the
 *    location the HW records its HEAD index
 */
static inline u32
ixl_get_tx_head(struct ixl_queue *que)
{
	struct tx_ring  *txr = &que->txr;
	void *head = &txr->base[que->num_tx_desc];
	return LE32_TO_CPU(*(volatile __le32 *)head);
}

/**********************************************************************
 *
 * Get index of last used descriptor/buffer from hardware, and clean
 * the descriptors/buffers up to that index.
 *
 **********************************************************************/
static bool
ixl_txeof_hwb(struct ixl_queue *que)
{
	struct tx_ring		*txr = &que->txr;
	u32			first, last, head, done;
	struct ixl_tx_buf	*buf;

	mtx_assert(&txr->mtx, MA_OWNED);

#ifdef DEV_NETMAP
	// XXX todo: implement moderation
	if (netmap_tx_irq(que->vsi->ifp, que->me))
		return FALSE;
#endif /* DEF_NETMAP */

	/* These are not the descriptors you seek, move along :) */
	if (txr->avail == que->num_tx_desc) {
		atomic_store_rel_32(&txr->watchdog_timer, 0);
		return FALSE;
	}

	first = txr->next_to_clean;
	buf = &txr->buffers[first];
	last = buf->eop_index;
	if (last == -1)
		return FALSE;

	/* Sync DMA before reading head index from ring */
        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
            BUS_DMASYNC_POSTREAD);

	/* Get the Head WB value */
	head = ixl_get_tx_head(que);

	/*
	** Get the index of the first descriptor
	** BEYOND the EOP and call that 'done'.
	** I do this so the comparison in the
	** inner while loop below can be simple
	*/
	if (++last == que->num_tx_desc) last = 0;
	done = last;

	/*
	** The HEAD index of the ring is written in a 
	** defined location, this rather than a done bit
	** is what is used to keep track of what must be
	** 'cleaned'.
	*/
	while (first != head) {
		/* We clean the range of the packet */
		while (first != done) {
			++txr->avail;

			if (buf->m_head) {
				txr->bytes += /* for ITR adjustment */
				    buf->m_head->m_pkthdr.len;
				txr->tx_bytes += /* for TX stats */
				    buf->m_head->m_pkthdr.len;
				bus_dmamap_sync(buf->tag,
				    buf->map,
				    BUS_DMASYNC_POSTWRITE);
				bus_dmamap_unload(buf->tag,
				    buf->map);
				m_freem(buf->m_head);
				buf->m_head = NULL;
			}
			buf->eop_index = -1;

			if (++first == que->num_tx_desc)
				first = 0;

			buf = &txr->buffers[first];
		}
		++txr->packets;
		/* If a packet was successfully cleaned, reset the watchdog timer */
		atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
		/* See if there is more work now */
		last = buf->eop_index;
		if (last != -1) {
			/* Get next done point */
			if (++last == que->num_tx_desc) last = 0;
			done = last;
		} else
			break;
	}
	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

	txr->next_to_clean = first;

	/*
	 * If there are no pending descriptors, clear the timeout.
	 */
	if (txr->avail == que->num_tx_desc) {
		atomic_store_rel_32(&txr->watchdog_timer, 0);
		return FALSE;
	}

	return TRUE;
}

/**********************************************************************
 *
 * Use index kept by driver and the flag on each descriptor to find used
 * descriptor/buffers and clean them up for re-use.
 *
 * This method of reclaiming descriptors is current incompatible with
 * DEV_NETMAP.
 *
 * Returns TRUE if there are more descriptors to be cleaned after this
 * function exits.
 *
 **********************************************************************/
static bool
ixl_txeof_dwb(struct ixl_queue *que)
{
	struct tx_ring		*txr = &que->txr;
	u32			first, last, done;
	u32			limit = 256;
	struct ixl_tx_buf	*buf;
	struct i40e_tx_desc	*eop_desc;

	mtx_assert(&txr->mtx, MA_OWNED);

	/* There are no descriptors to clean */
	if (txr->avail == que->num_tx_desc) {
		atomic_store_rel_32(&txr->watchdog_timer, 0);
		return FALSE;
	}

	/* Set starting index/descriptor/buffer */
	first = txr->next_to_clean;
	buf = &txr->buffers[first];

	/*
	 * This function operates per-packet -- identifies the start of the
	 * packet and gets the index of the last descriptor of the packet from
	 * it, from eop_index.
	 *
	 * If the last descriptor is marked "done" by the hardware, then all
	 * of the descriptors for the packet are cleaned.
	 */
	last = buf->eop_index;
	if (last == -1)
		return FALSE;
	eop_desc = &txr->base[last];

	/* Sync DMA before reading from ring */
        bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD);

	/*
	 * Get the index of the first descriptor beyond the EOP and call that
	 * 'done'. Simplifies the comparison for the inner loop below.
	 */
	if (++last == que->num_tx_desc)
		last = 0;
	done = last;

	/*
	 * We find the last completed descriptor by examining each
	 * descriptor's status bits to see if it's done.
	 */
	do {
		/* Break if last descriptor in packet isn't marked done */
		if ((eop_desc->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK)
		    != I40E_TX_DESC_DTYPE_DESC_DONE)
			break;

		/* Clean the descriptors that make up the processed packet */
		while (first != done) {
			/*
			 * If there was a buffer attached to this descriptor,
			 * prevent the adapter from accessing it, and add its
			 * length to the queue's TX stats.
			 */
			if (buf->m_head) {
				txr->bytes += buf->m_head->m_pkthdr.len;
				txr->tx_bytes += buf->m_head->m_pkthdr.len;
				bus_dmamap_sync(buf->tag, buf->map,
				    BUS_DMASYNC_POSTWRITE);
				bus_dmamap_unload(buf->tag, buf->map);
				m_freem(buf->m_head);
				buf->m_head = NULL;
			}
			buf->eop_index = -1;
			++txr->avail;

			if (++first == que->num_tx_desc)
				first = 0;
			buf = &txr->buffers[first];
		}
		++txr->packets;
		/* If a packet was successfully cleaned, reset the watchdog timer */
		atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);

		/*
		 * Since buf is the first buffer after the one that was just
		 * cleaned, check if the packet it starts is done, too.
		 */
		last = buf->eop_index;
		if (last != -1) {
			eop_desc = &txr->base[last];
			/* Get next done point */
			if (++last == que->num_tx_desc) last = 0;
			done = last;
		} else
			break;
	} while (--limit);

	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

	txr->next_to_clean = first;

	/*
	 * If there are no pending descriptors, clear the watchdog timer.
	 */
	if (txr->avail == que->num_tx_desc) {
		atomic_store_rel_32(&txr->watchdog_timer, 0);
		return FALSE;
	}

	return TRUE;
}

bool
ixl_txeof(struct ixl_queue *que)
{
	struct ixl_vsi *vsi = que->vsi;

	return (vsi->enable_head_writeback) ? ixl_txeof_hwb(que)
	    : ixl_txeof_dwb(que);
}


/*********************************************************************
 *
 *  Refresh mbuf buffers for RX descriptor rings
 *   - now keeps its own state so discards due to resource
 *     exhaustion are unnecessary, if an mbuf cannot be obtained
 *     it just returns, keeping its placeholder, thus it can simply
 *     be recalled to try again.
 *
 **********************************************************************/
static void
ixl_refresh_mbufs(struct ixl_queue *que, int limit)
{
	struct ixl_vsi		*vsi = que->vsi;
	struct rx_ring		*rxr = &que->rxr;
	bus_dma_segment_t	hseg[1];
	bus_dma_segment_t	pseg[1];
	struct ixl_rx_buf	*buf;
	struct mbuf		*mh, *mp;
	int			i, j, nsegs, error;
	bool			refreshed = FALSE;

	i = j = rxr->next_refresh;
	/* Control the loop with one beyond */
	if (++j == que->num_rx_desc)
		j = 0;

	while (j != limit) {
		buf = &rxr->buffers[i];
		if (rxr->hdr_split == FALSE)
			goto no_split;

		if (buf->m_head == NULL) {
			mh = m_gethdr(M_NOWAIT, MT_DATA);
			if (mh == NULL)
				goto update;
		} else
			mh = buf->m_head;

		mh->m_pkthdr.len = mh->m_len = MHLEN;
		mh->m_len = MHLEN;
		mh->m_flags |= M_PKTHDR;
		/* Get the memory mapping */
		error = bus_dmamap_load_mbuf_sg(rxr->htag,
		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
		if (error != 0) {
			printf("Refresh mbufs: hdr dmamap load"
			    " failure - %d\n", error);
			m_free(mh);
			buf->m_head = NULL;
			goto update;
		}
		buf->m_head = mh;
		bus_dmamap_sync(rxr->htag, buf->hmap,
		    BUS_DMASYNC_PREREAD);
		rxr->base[i].read.hdr_addr =
		   htole64(hseg[0].ds_addr);

no_split:
		if (buf->m_pack == NULL) {
			mp = m_getjcl(M_NOWAIT, MT_DATA,
			    M_PKTHDR, rxr->mbuf_sz);
			if (mp == NULL)
				goto update;
		} else
			mp = buf->m_pack;

		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
		/* Get the memory mapping */
		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
		if (error != 0) {
			printf("Refresh mbufs: payload dmamap load"
			    " failure - %d\n", error);
			m_free(mp);
			buf->m_pack = NULL;
			goto update;
		}
		buf->m_pack = mp;
		bus_dmamap_sync(rxr->ptag, buf->pmap,
		    BUS_DMASYNC_PREREAD);
		rxr->base[i].read.pkt_addr =
		   htole64(pseg[0].ds_addr);
		/* Used only when doing header split */
		rxr->base[i].read.hdr_addr = 0;

		refreshed = TRUE;
		/* Next is precalculated */
		i = j;
		rxr->next_refresh = i;
		if (++j == que->num_rx_desc)
			j = 0;
	}
update:
	if (refreshed) /* Update hardware tail index */
		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
	return;
}


/*********************************************************************
 *
 *  Allocate memory for rx_buffer structures. Since we use one
 *  rx_buffer per descriptor, the maximum number of rx_buffer's
 *  that we'll need is equal to the number of receive descriptors
 *  that we've defined.
 *
 **********************************************************************/
int
ixl_allocate_rx_data(struct ixl_queue *que)
{
	struct rx_ring		*rxr = &que->rxr;
	struct ixl_vsi		*vsi = que->vsi;
	device_t 		dev = vsi->dev;
	struct ixl_rx_buf 	*buf;
	int             	i, bsize, error;

	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
				   1, 0,		/* alignment, bounds */
				   BUS_SPACE_MAXADDR,	/* lowaddr */
				   BUS_SPACE_MAXADDR,	/* highaddr */
				   NULL, NULL,		/* filter, filterarg */
				   MSIZE,		/* maxsize */
				   1,			/* nsegments */
				   MSIZE,		/* maxsegsize */
				   0,			/* flags */
				   NULL,		/* lockfunc */
				   NULL,		/* lockfuncarg */
				   &rxr->htag))) {
		device_printf(dev, "Unable to create RX DMA htag\n");
		return (error);
	}

	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
				   1, 0,		/* alignment, bounds */
				   BUS_SPACE_MAXADDR,	/* lowaddr */
				   BUS_SPACE_MAXADDR,	/* highaddr */
				   NULL, NULL,		/* filter, filterarg */
				   MJUM16BYTES,		/* maxsize */
				   1,			/* nsegments */
				   MJUM16BYTES,		/* maxsegsize */
				   0,			/* flags */
				   NULL,		/* lockfunc */
				   NULL,		/* lockfuncarg */
				   &rxr->ptag))) {
		device_printf(dev, "Unable to create RX DMA ptag\n");
		goto free_rx_htag;
	}

	bsize = sizeof(struct ixl_rx_buf) * que->num_rx_desc;
	if (!(rxr->buffers =
	    (struct ixl_rx_buf *) malloc(bsize,
	    M_IXL, M_NOWAIT | M_ZERO))) {
		device_printf(dev, "Unable to allocate rx_buffer memory\n");
		error = ENOMEM;
		goto free_rx_ptag;
	}

	for (i = 0; i < que->num_rx_desc; i++) {
		buf = &rxr->buffers[i];
		error = bus_dmamap_create(rxr->htag,
		    BUS_DMA_NOWAIT, &buf->hmap);
		if (error) {
			device_printf(dev, "Unable to create RX head map\n");
			goto free_buffers;
		}
		error = bus_dmamap_create(rxr->ptag,
		    BUS_DMA_NOWAIT, &buf->pmap);
		if (error) {
			bus_dmamap_destroy(rxr->htag, buf->hmap);
			device_printf(dev, "Unable to create RX pkt map\n");
			goto free_buffers;
		}
	}

	return 0;
free_buffers:
	while (i--) {
		buf = &rxr->buffers[i];
		bus_dmamap_destroy(rxr->ptag, buf->pmap);
		bus_dmamap_destroy(rxr->htag, buf->hmap);
	}
	free(rxr->buffers, M_IXL);
	rxr->buffers = NULL;
free_rx_ptag:
	bus_dma_tag_destroy(rxr->ptag);
	rxr->ptag = NULL;
free_rx_htag:
	bus_dma_tag_destroy(rxr->htag);
	rxr->htag = NULL;
	return (error);
}


/*********************************************************************
 *
 *  (Re)Initialize the queue receive ring and its buffers.
 *
 **********************************************************************/
int
ixl_init_rx_ring(struct ixl_queue *que)
{
	struct	rx_ring 	*rxr = &que->rxr;
	struct ixl_vsi		*vsi = que->vsi;
#if defined(INET6) || defined(INET)
	struct ifnet		*ifp = vsi->ifp;
	struct lro_ctrl		*lro = &rxr->lro;
#endif
	struct ixl_rx_buf	*buf;
	bus_dma_segment_t	pseg[1], hseg[1];
	int			rsize, nsegs, error = 0;
#ifdef DEV_NETMAP
	struct netmap_adapter *na = NA(que->vsi->ifp);
	struct netmap_slot *slot;
#endif /* DEV_NETMAP */

	IXL_RX_LOCK(rxr);
#ifdef DEV_NETMAP
	/* same as in ixl_init_tx_ring() */
	slot = netmap_reset(na, NR_RX, que->me, 0);
#endif /* DEV_NETMAP */
	/* Clear the ring contents */
	rsize = roundup2(que->num_rx_desc *
	    sizeof(union i40e_rx_desc), DBA_ALIGN);
	bzero((void *)rxr->base, rsize);
	/* Cleanup any existing buffers */
	for (int i = 0; i < que->num_rx_desc; i++) {
		buf = &rxr->buffers[i];
		if (buf->m_head != NULL) {
			bus_dmamap_sync(rxr->htag, buf->hmap,
			    BUS_DMASYNC_POSTREAD);
			bus_dmamap_unload(rxr->htag, buf->hmap);
			buf->m_head->m_flags |= M_PKTHDR;
			m_freem(buf->m_head);
		}
		if (buf->m_pack != NULL) {
			bus_dmamap_sync(rxr->ptag, buf->pmap,
			    BUS_DMASYNC_POSTREAD);
			bus_dmamap_unload(rxr->ptag, buf->pmap);
			buf->m_pack->m_flags |= M_PKTHDR;
			m_freem(buf->m_pack);
		}
		buf->m_head = NULL;
		buf->m_pack = NULL;
	}

	/* header split is off */
	rxr->hdr_split = FALSE;

	/* Now replenish the mbufs */
	for (int j = 0; j != que->num_rx_desc; ++j) {
		struct mbuf	*mh, *mp;

		buf = &rxr->buffers[j];
#ifdef DEV_NETMAP
		/*
		 * In netmap mode, fill the map and set the buffer
		 * address in the NIC ring, considering the offset
		 * between the netmap and NIC rings (see comment in
		 * ixgbe_setup_transmit_ring() ). No need to allocate
		 * an mbuf, so end the block with a continue;
		 */
		if (slot) {
#if ((__FreeBSD_version >= 1102505 && __FreeBSD_version < 1200000) || \
    __FreeBSD_version >= 1200062)
			int sj = netmap_idx_n2k(na->rx_rings[que->me], j);
#else
			int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
#endif
			uint64_t paddr;
			void *addr;

			addr = PNMB(na, slot + sj, &paddr);
			netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
			/* Update descriptor and the cached value */
			rxr->base[j].read.pkt_addr = htole64(paddr);
			rxr->base[j].read.hdr_addr = 0;
			continue;
		}
#endif /* DEV_NETMAP */
		/*
		** Don't allocate mbufs if not
		** doing header split, its wasteful
		*/ 
		if (rxr->hdr_split == FALSE)
			goto skip_head;

		/* First the header */
		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
		if (buf->m_head == NULL) {
			error = ENOBUFS;
			goto fail;
		}
		m_adj(buf->m_head, ETHER_ALIGN);
		mh = buf->m_head;
		mh->m_len = mh->m_pkthdr.len = MHLEN;
		mh->m_flags |= M_PKTHDR;
		/* Get the memory mapping */
		error = bus_dmamap_load_mbuf_sg(rxr->htag,
		    buf->hmap, buf->m_head, hseg,
		    &nsegs, BUS_DMA_NOWAIT);
		if (error != 0) /* Nothing elegant to do here */
			goto fail;
		bus_dmamap_sync(rxr->htag,
		    buf->hmap, BUS_DMASYNC_PREREAD);
		/* Update descriptor */
		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);

skip_head:
		/* Now the payload cluster */
		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
		    M_PKTHDR, rxr->mbuf_sz);
		if (buf->m_pack == NULL) {
			error = ENOBUFS;
                        goto fail;
		}
		mp = buf->m_pack;
		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
		/* Get the memory mapping */
		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
		    buf->pmap, mp, pseg,
		    &nsegs, BUS_DMA_NOWAIT);
		if (error != 0)
                        goto fail;
		bus_dmamap_sync(rxr->ptag,
		    buf->pmap, BUS_DMASYNC_PREREAD);
		/* Update descriptor */
		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
		rxr->base[j].read.hdr_addr = 0;
	}


	/* Setup our descriptor indices */
	rxr->next_check = 0;
	rxr->next_refresh = 0;
	rxr->lro_enabled = FALSE;
	rxr->split = 0;
	rxr->bytes = 0;

	wr32(vsi->hw, rxr->tail, que->num_rx_desc - 1);
	ixl_flush(vsi->hw);

#if defined(INET6) || defined(INET)
	/*
	** Now set up the LRO interface:
	*/
	if (lro->ifp != NULL) {
		tcp_lro_free(lro);
		lro->ifp = NULL;
	}
	if (ifp->if_capenable & IFCAP_LRO) {
		int err = tcp_lro_init(lro);
		if (err) {
			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
			goto fail;
		}
		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
		rxr->lro_enabled = TRUE;
		lro->ifp = vsi->ifp;
	}
#endif

	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

fail:
	IXL_RX_UNLOCK(rxr);
	return (error);
}


/*********************************************************************
 *
 *  Free station receive ring data structures
 *
 **********************************************************************/
void
ixl_free_que_rx(struct ixl_queue *que)
{
	struct rx_ring		*rxr = &que->rxr;
	struct ixl_rx_buf	*buf;

	/* Cleanup any existing buffers */
	if (rxr->buffers != NULL) {
		for (int i = 0; i < que->num_rx_desc; i++) {
			buf = &rxr->buffers[i];

			/* Free buffers and unload dma maps */
			ixl_rx_discard(rxr, i);

			bus_dmamap_destroy(rxr->htag, buf->hmap);
			bus_dmamap_destroy(rxr->ptag, buf->pmap);
		}
		free(rxr->buffers, M_IXL);
		rxr->buffers = NULL;
	}

	if (rxr->htag != NULL) {
		bus_dma_tag_destroy(rxr->htag);
		rxr->htag = NULL;
	}
	if (rxr->ptag != NULL) {
		bus_dma_tag_destroy(rxr->ptag);
		rxr->ptag = NULL;
	}

	if (rxr->lro.ifp != NULL) {
		tcp_lro_free(&rxr->lro);
		rxr->lro.ifp = NULL;
	}
}

static inline void
ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
{

#if defined(INET6) || defined(INET)
        /*
         * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
         * should be computed by hardware. Also it should not have VLAN tag in
         * ethernet header.
         */
        if (rxr->lro_enabled &&
            (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
                /*
                 * Send to the stack if:
                 **  - LRO not enabled, or
                 **  - no LRO resources, or
                 **  - lro enqueue fails
                 */
                if (rxr->lro.lro_cnt != 0)
                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
                                return;
        }
#endif
	IXL_RX_UNLOCK(rxr);
        (*ifp->if_input)(ifp, m);
	IXL_RX_LOCK(rxr);
}


static inline void
ixl_rx_discard(struct rx_ring *rxr, int i)
{
	struct ixl_rx_buf	*rbuf;

	KASSERT(rxr != NULL, ("Receive ring pointer cannot be null"));
	KASSERT(i < que->num_desc, ("Descriptor index must be less than que->num_desc"));

	rbuf = &rxr->buffers[i];

	/* Free the mbufs in the current chain for the packet */
        if (rbuf->fmp != NULL) {
		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
                m_freem(rbuf->fmp);
                rbuf->fmp = NULL;
	}

	/*
	 * Free the mbufs for the current descriptor; and let ixl_refresh_mbufs()
	 * assign new mbufs to these.
	 */
	if (rbuf->m_head) {
		bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD);
		bus_dmamap_unload(rxr->htag, rbuf->hmap);
		m_free(rbuf->m_head);
		rbuf->m_head = NULL;
	}
 
	if (rbuf->m_pack) {
		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
		m_free(rbuf->m_pack);
		rbuf->m_pack = NULL;
	}
}

#ifdef RSS
/*
** i40e_ptype_to_hash: parse the packet type
** to determine the appropriate hash.
*/
static inline int
ixl_ptype_to_hash(u8 ptype)
{
        struct i40e_rx_ptype_decoded	decoded;
	u8				ex = 0;

	decoded = decode_rx_desc_ptype(ptype);
	ex = decoded.outer_frag;

	if (!decoded.known)
		return M_HASHTYPE_OPAQUE;

	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
		return M_HASHTYPE_OPAQUE;

	/* Note: anything that gets to this point is IP */
        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
		switch (decoded.inner_prot) {
			case I40E_RX_PTYPE_INNER_PROT_TCP:
				if (ex)
					return M_HASHTYPE_RSS_TCP_IPV6_EX;
				else
					return M_HASHTYPE_RSS_TCP_IPV6;
			case I40E_RX_PTYPE_INNER_PROT_UDP:
				if (ex)
					return M_HASHTYPE_RSS_UDP_IPV6_EX;
				else
					return M_HASHTYPE_RSS_UDP_IPV6;
			default:
				if (ex)
					return M_HASHTYPE_RSS_IPV6_EX;
				else
					return M_HASHTYPE_RSS_IPV6;
		}
	}
        if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
		switch (decoded.inner_prot) {
			case I40E_RX_PTYPE_INNER_PROT_TCP:
					return M_HASHTYPE_RSS_TCP_IPV4;
			case I40E_RX_PTYPE_INNER_PROT_UDP:
				if (ex)
					return M_HASHTYPE_RSS_UDP_IPV4_EX;
				else
					return M_HASHTYPE_RSS_UDP_IPV4;
			default:
					return M_HASHTYPE_RSS_IPV4;
		}
	}
	/* We should never get here!! */
	return M_HASHTYPE_OPAQUE;
}
#endif /* RSS */

/*********************************************************************
 *
 *  This routine executes in interrupt context. It replenishes
 *  the mbufs in the descriptor and sends data which has been
 *  dma'ed into host memory to upper layer.
 *
 *  We loop at most count times if count is > 0, or until done if
 *  count < 0.
 *
 *  Return TRUE for more work, FALSE for all clean.
 *********************************************************************/
bool
ixl_rxeof(struct ixl_queue *que, int count)
{
	struct ixl_vsi		*vsi = que->vsi;
	struct rx_ring		*rxr = &que->rxr;
	struct ifnet		*ifp = vsi->ifp;
#if defined(INET6) || defined(INET)
	struct lro_ctrl		*lro = &rxr->lro;
#endif
	int			i, nextp, processed = 0;
	union i40e_rx_desc	*cur;
	struct ixl_rx_buf	*rbuf, *nbuf;

	IXL_RX_LOCK(rxr);

#ifdef DEV_NETMAP
	if (netmap_rx_irq(ifp, que->me, &count)) {
		IXL_RX_UNLOCK(rxr);
		return (FALSE);
	}
#endif /* DEV_NETMAP */

	for (i = rxr->next_check; count != 0;) {
		struct mbuf	*sendmp, *mh, *mp;
		u32		status, error;
		u16		hlen, plen, vtag;
		u64		qword;
		u8		ptype;
		bool		eop;
 
		/* Sync the ring. */
		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

		cur = &rxr->base[i];
		qword = le64toh(cur->wb.qword1.status_error_len);
		status = (qword & I40E_RXD_QW1_STATUS_MASK)
		    >> I40E_RXD_QW1_STATUS_SHIFT;
		error = (qword & I40E_RXD_QW1_ERROR_MASK)
		    >> I40E_RXD_QW1_ERROR_SHIFT;
		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
		    >> I40E_RXD_QW1_PTYPE_SHIFT;

		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
			++rxr->not_done;
			break;
		}
		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
			break;

		count--;
		sendmp = NULL;
		nbuf = NULL;
		cur->wb.qword1.status_error_len = 0;
		rbuf = &rxr->buffers[i];
		mh = rbuf->m_head;
		mp = rbuf->m_pack;
		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
		else
			vtag = 0;

		/* Remove device access to the rx buffers. */
		if (rbuf->m_head != NULL) {
			bus_dmamap_sync(rxr->htag, rbuf->hmap,
			    BUS_DMASYNC_POSTREAD);
			bus_dmamap_unload(rxr->htag, rbuf->hmap);
		}
		if (rbuf->m_pack != NULL) {
			bus_dmamap_sync(rxr->ptag, rbuf->pmap,
			    BUS_DMASYNC_POSTREAD);
			bus_dmamap_unload(rxr->ptag, rbuf->pmap);
		}

		/*
		** Make sure bad packets are discarded,
		** note that only EOP descriptor has valid
		** error results.
		*/
                if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
			rxr->desc_errs++;
			ixl_rx_discard(rxr, i);
			goto next_desc;
		}

		/* Prefetch the next buffer */
		if (!eop) {
			nextp = i + 1;
			if (nextp == que->num_rx_desc)
				nextp = 0;
			nbuf = &rxr->buffers[nextp];
			prefetch(nbuf);
		}

		/*
		** The header mbuf is ONLY used when header 
		** split is enabled, otherwise we get normal 
		** behavior, ie, both header and payload
		** are DMA'd into the payload buffer.
		**
		** Rather than using the fmp/lmp global pointers
		** we now keep the head of a packet chain in the
		** buffer struct and pass this along from one
		** descriptor to the next, until we get EOP.
		*/
		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
			if (hlen > IXL_RX_HDR)
				hlen = IXL_RX_HDR;
			mh->m_len = hlen;
			mh->m_flags |= M_PKTHDR;
			mh->m_next = NULL;
			mh->m_pkthdr.len = mh->m_len;
			/* Null buf pointer so it is refreshed */
			rbuf->m_head = NULL;
			/*
			** Check the payload length, this
			** could be zero if its a small
			** packet.
			*/
			if (plen > 0) {
				mp->m_len = plen;
				mp->m_next = NULL;
				mp->m_flags &= ~M_PKTHDR;
				mh->m_next = mp;
				mh->m_pkthdr.len += mp->m_len;
				/* Null buf pointer so it is refreshed */
				rbuf->m_pack = NULL;
				rxr->split++;
			}
			/*
			** Now create the forward
			** chain so when complete 
			** we wont have to.
			*/
                        if (eop == 0) {
				/* stash the chain head */
                                nbuf->fmp = mh;
				/* Make forward chain */
                                if (plen)
                                        mp->m_next = nbuf->m_pack;
                                else
                                        mh->m_next = nbuf->m_pack;
                        } else {
				/* Singlet, prepare to send */
                                sendmp = mh;
                                if (vtag) {
                                        sendmp->m_pkthdr.ether_vtag = vtag;
                                        sendmp->m_flags |= M_VLANTAG;
                                }
                        }
		} else {
			/*
			** Either no header split, or a
			** secondary piece of a fragmented
			** split packet.
			*/
			mp->m_len = plen;
			/*
			** See if there is a stored head
			** that determines what we are
			*/
			sendmp = rbuf->fmp;
			rbuf->m_pack = rbuf->fmp = NULL;

			if (sendmp != NULL) /* secondary frag */
				sendmp->m_pkthdr.len += mp->m_len;
			else {
				/* first desc of a non-ps chain */
				sendmp = mp;
				sendmp->m_flags |= M_PKTHDR;
				sendmp->m_pkthdr.len = mp->m_len;
                        }
			/* Pass the head pointer on */
			if (eop == 0) {
				nbuf->fmp = sendmp;
				sendmp = NULL;
				mp->m_next = nbuf->m_pack;
			}
		}
		++processed;
		/* Sending this frame? */
		if (eop) {
			sendmp->m_pkthdr.rcvif = ifp;
			/* gather stats */
			rxr->rx_packets++;
			rxr->rx_bytes += sendmp->m_pkthdr.len;
			/* capture data for dynamic ITR adjustment */
			rxr->packets++;
			rxr->bytes += sendmp->m_pkthdr.len;
			/* Set VLAN tag (field only valid in eop desc) */
			if (vtag) {
				sendmp->m_pkthdr.ether_vtag = vtag;
				sendmp->m_flags |= M_VLANTAG;
			}
			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
				ixl_rx_checksum(que, sendmp,
				    status, error, ptype);
#ifdef RSS
			sendmp->m_pkthdr.flowid =
			    le32toh(cur->wb.qword0.hi_dword.rss);
			M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
#else
			sendmp->m_pkthdr.flowid = que->msix;
			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
#endif
		}
next_desc:
		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

		/* Advance our pointers to the next descriptor. */
		if (++i == que->num_rx_desc)
			i = 0;

		/* Now send to the stack or do LRO */
		if (sendmp != NULL) {
			rxr->next_check = i;
			ixl_rx_input(rxr, ifp, sendmp, ptype);
			/*
			 * Update index used in loop in case another
			 * ixl_rxeof() call executes when lock is released
			 */
			i = rxr->next_check;
		}

		/* Every 8 descriptors we go to refresh mbufs */
		if (processed == 8) {
			ixl_refresh_mbufs(que, i);
			processed = 0;
		}
	}

	/* Refresh any remaining buf structs */
	if (ixl_rx_unrefreshed(que))
		ixl_refresh_mbufs(que, i);

	rxr->next_check = i;

#if defined(INET6) || defined(INET)
#if __FreeBSD_version >= 1100105
	tcp_lro_flush_all(lro);
#else
	/*
	 * Flush any outstanding LRO work
	 */
	struct lro_entry *queued;
	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
		SLIST_REMOVE_HEAD(&lro->lro_active, next);
		tcp_lro_flush(lro, queued);
	}
#endif
#endif /* defined(INET6) || defined(INET) */

	IXL_RX_UNLOCK(rxr);
	return (FALSE);
}

/**
 * ixl_rx_checksum - Verify that the hardware indicated that the checksum is valid or not
 * @que: the queue structure
 * @mp: mbuf structure to set CFLAGS
 * @status: RX descriptor status data
 * @error: RX descriptor error data
 * @ptype: packet type
 *
 * Determine whether the hardware indicated that RX checksums were verified
 * and are valid. Inform the stack about the status of checksum so that stack
 * doesn't spend time verifying them.
 */
static inline void
ixl_rx_checksum(struct ixl_queue *que, struct mbuf *mp, u32 status, u32 error, u8 ptype)
{
	struct i40e_rx_ptype_decoded decoded;

	/* HW did not calculate checksums */
	if ((status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) == 0)
		return;

	decoded = decode_rx_desc_ptype(ptype);

	/* Cannot proceed if packet type is unknown or not an IP packet */
	if (decoded.known == 0 || decoded.outer_ip != I40E_RX_PTYPE_OUTER_IP)
		return;

	/* IPv6 with extension headers likely have bad csum */
	if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6 &&
	    status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
			mp->m_pkthdr.csum_flags = 0;
			que->rx_csum_errs++;
			return;
	}

	switch (decoded.tunnel_type) {
	case I40E_RX_PTYPE_TUNNEL_NONE:
		/* L3 checksum is calculated only for IPv4 packets */
		if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
			mp->m_pkthdr.csum_flags = CSUM_L3_CALC;
			/* IP checksum error */
			if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)) {
				que->rx_csum_errs++;
				return;
			}
			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
		}

		switch (decoded.inner_prot) {
		case I40E_RX_PTYPE_INNER_PROT_UDP:
		case I40E_RX_PTYPE_INNER_PROT_TCP:
		case I40E_RX_PTYPE_INNER_PROT_SCTP:
			mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
			if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) {
				que->rx_csum_errs++;
				return;
			}
			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
			mp->m_pkthdr.csum_data |= htons(0xffff);
			break;
		default:
			break;
		}
		break;
#ifdef CSUM_ENCAP_VXLAN
	case I40E_RX_PTYPE_TUNNEL_IP_GRENAT:
	case I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC:
	case I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN:
		/* L3 checksum of outer IPv4 packets */
		if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
			mp->m_pkthdr.csum_flags = CSUM_L3_CALC;
			/* IP checksum error */
			if (error & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)) {
				que->rx_csum_errs++;
				return;
			}
			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
		}
		/* L3 checksum of most inner IPv4 packets */
		if (decoded.tunnel_end_prot == I40E_RX_PTYPE_TUNNEL_END_IPV4) {
			mp->m_pkthdr.csum_flags = CSUM_INNER_L3_CALC;
			/* IP checksum error */
			if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)) {
				que->rx_csum_errs++;
#ifdef IXL_RXCSUM_DEBUG_COUNTERS
				que->rx_csum_vxlan_l3_errs++;
#endif
				return;
			}
			mp->m_pkthdr.csum_flags |= CSUM_INNER_L3_VALID;
#ifdef IXL_RXCSUM_DEBUG_COUNTERS
				que->rx_csum_vxlan_l3_good++;
#endif
		}
		switch (decoded.inner_prot) {
		case I40E_RX_PTYPE_INNER_PROT_UDP:
		case I40E_RX_PTYPE_INNER_PROT_TCP:
		case I40E_RX_PTYPE_INNER_PROT_SCTP:
			mp->m_pkthdr.csum_flags |= CSUM_INNER_L4_CALC;
			if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) {
				que->rx_csum_errs++;
#ifdef IXL_RXCSUM_DEBUG_COUNTERS
				que->rx_csum_vxlan_l4_errs++;
#endif
				return;
			}
			mp->m_pkthdr.csum_flags |= CSUM_INNER_L4_VALID;
			mp->m_pkthdr.csum_data |= htons(0xffff);
#ifdef IXL_RXCSUM_DEBUG_COUNTERS
			que->rx_csum_vxlan_l4_good++;
#endif
			break;
		default:
			break;
		}
		break;
#endif
	default:
		return;
	}

}

#if __FreeBSD_version >= 1100000
uint64_t
ixl_get_counter(if_t ifp, ift_counter cnt)
{
	struct ixl_vsi *vsi;

	vsi = if_getsoftc(ifp);

	switch (cnt) {
	case IFCOUNTER_IPACKETS:
		return (vsi->ipackets);
	case IFCOUNTER_IERRORS:
		return (vsi->ierrors);
	case IFCOUNTER_OPACKETS:
		return (vsi->opackets);
	case IFCOUNTER_OERRORS:
		return (vsi->oerrors);
	case IFCOUNTER_COLLISIONS:
		/* Collisions are by standard impossible in 40G/10G Ethernet */
		return (0);
	case IFCOUNTER_IBYTES:
		return (vsi->ibytes);
	case IFCOUNTER_OBYTES:
		return (vsi->obytes);
	case IFCOUNTER_IMCASTS:
		return (vsi->imcasts);
	case IFCOUNTER_OMCASTS:
		return (vsi->omcasts);
	case IFCOUNTER_IQDROPS:
		return (vsi->iqdrops);
	case IFCOUNTER_OQDROPS:
		return (vsi->oqdrops);
	case IFCOUNTER_NOPROTO:
		return (vsi->noproto);
	default:
		return (if_get_counter_default(ifp, cnt));
	}
}
#endif

/*
 * Set TX and RX ring size adjusting value to supported range
 */
void
ixl_vsi_setup_rings_size(struct ixl_vsi * vsi, int tx_ring_size, int rx_ring_size)
{
	device_t dev = vsi->dev;

	if (tx_ring_size < IXL_MIN_RING
	     || tx_ring_size > IXL_MAX_RING
	     || tx_ring_size % IXL_RING_INCREMENT != 0) {
		device_printf(dev, "Invalid tx_ring_size value of %d set!\n",
		    tx_ring_size);
		device_printf(dev, "tx_ring_size must be between %d and %d, "
		    "inclusive, and must be a multiple of %d\n",
		    IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
		device_printf(dev, "Using default value of %d instead\n",
		    IXL_DEFAULT_RING);
		vsi->num_tx_desc = IXL_DEFAULT_RING;
	} else
		vsi->num_tx_desc = tx_ring_size;

	if (rx_ring_size < IXL_MIN_RING
	     || rx_ring_size > IXL_MAX_RING
	     || rx_ring_size % IXL_RING_INCREMENT != 0) {
		device_printf(dev, "Invalid rx_ring_size value of %d set!\n",
		    rx_ring_size);
		device_printf(dev, "rx_ring_size must be between %d and %d, "
		    "inclusive, and must be a multiple of %d\n",
		    IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
		device_printf(dev, "Using default value of %d instead\n",
		    IXL_DEFAULT_RING);
		vsi->num_rx_desc = IXL_DEFAULT_RING;
	} else
		vsi->num_rx_desc = rx_ring_size;

	device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
		vsi->num_tx_desc, vsi->num_rx_desc);
}

static void
ixl_queue_sw_irq(struct ixl_vsi *vsi, int qidx)
{
	struct i40e_hw *hw = vsi->hw;
	u32 mask, reg;

	mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
		I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
		I40E_PFINT_DYN_CTLN_ITR_INDX_MASK);

	reg = ((vsi->flags & IXL_FLAGS_USES_MSIX) != 0) ?
		I40E_PFINT_DYN_CTLN(qidx) : I40E_PFINT_DYN_CTL0;
	wr32(hw, reg, mask);
}

int
ixl_queue_hang_check(struct ixl_vsi *vsi)
{
	struct ixl_queue *que = vsi->queues;
	device_t dev = vsi->dev;
	struct tx_ring *txr;
	s32 timer, new_timer;
	int hung = 0;

	for (int i = 0; i < vsi->num_queues; i++, que++) {
		txr = &que->txr;
		/*
		 * If watchdog_timer is equal to defualt value set by ixl_txeof
		 * just substract hz and move on - the queue is most probably
		 * running. Otherwise check the value.
		 */
                if (atomic_cmpset_rel_32(&txr->watchdog_timer,
					IXL_WATCHDOG, (IXL_WATCHDOG) - hz) == 0) {
			timer = atomic_load_acq_32(&txr->watchdog_timer);
			/*
                         * Again - if the timer was reset to default value
			 * then queue is running. Otherwise check if watchdog
			 * expired and act accrdingly.
                         */

			if (timer > 0 && timer != IXL_WATCHDOG) {
				new_timer = timer - hz;
				if (new_timer <= 0) {
					atomic_store_rel_32(&txr->watchdog_timer, -1);
					device_printf(dev, "WARNING: queue %d "
							"appears to be hung!\n", que->me);
					++hung;
					/* Try to unblock the queue with SW IRQ */
					ixl_queue_sw_irq(vsi, i);
				} else {
					/*
					 * If this fails, that means something in the TX path
					 * has updated the watchdog, so it means the TX path
					 * is still working and the watchdog doesn't need
					 * to countdown.
					 */
					atomic_cmpset_rel_32(&txr->watchdog_timer,
							timer, new_timer);
				}
			}
		}
	}

	return (hung);
}

void
ixl_vsi_add_queues_stats(struct ixl_vsi * vsi)
{
	char queue_namebuf[IXL_QUEUE_NAME_LEN];
	struct sysctl_oid_list	*vsi_list, *queue_list;
	struct ixl_queue	*queues = vsi->queues;
	struct sysctl_oid 	*queue_node;
	struct sysctl_ctx_list	*ctx;
	struct tx_ring		*txr;
	struct rx_ring		*rxr;

	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
	ctx = &vsi->sysctl_ctx;

	/* Queue statistics */
	for (int q = 0; q < vsi->num_queues; q++) {
		snprintf(queue_namebuf, IXL_QUEUE_NAME_LEN, "que%d", q);
		queue_node = SYSCTL_ADD_NODE(ctx, vsi_list,
		    OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue #");
		queue_list = SYSCTL_CHILDREN(queue_node);

		txr = &(queues[q].txr);
		rxr = &(queues[q].rxr);

		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed",
				CTLFLAG_RD, &(queues[q].mbuf_defrag_failed),
				"m_defrag() failed");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
				CTLFLAG_RD, &(queues[q].irqs),
				"irqs on this queue");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx",
				CTLFLAG_RD, &(queues[q].tso),
				"TSO");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_dmamap_failed",
				CTLFLAG_RD, &(queues[q].tx_dmamap_failed),
				"Driver tx dma failure in xmit");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
				CTLFLAG_RD, &(queues[q].mss_too_small),
				"TSO sends with an MSS less than 64");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
				CTLFLAG_RD, &(txr->no_desc),
				"Queue No Descriptor Available");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_vxlan",
				CTLFLAG_RD, &(queues[q].tx_vxlan),
				"VXLAN TX offload requests");
#ifdef IXL_RXCSUM_DEBUG_COUNTERS
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_csum_vxlan_l3_good",
				CTLFLAG_RD, &(queues[q].rx_csum_vxlan_l3_good),
				"VXLAN RX packets with correct IPv4 checksum");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_csum_vxlan_l3_errs",
				CTLFLAG_RD, &(queues[q].rx_csum_vxlan_l3_errs),
				"VXLAN RX packets with IPv4 checksum errors");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_csum_vxlan_l4_good",
				CTLFLAG_RD, &(queues[q].rx_csum_vxlan_l4_good),
				"VXLAN RX packets with correct L4 checksum");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_csum_vxlan_l4_errs",
				CTLFLAG_RD, &(queues[q].rx_csum_vxlan_l4_errs),
				"VXLAN RX packets with L4 checksum errors");
#endif

		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
				CTLFLAG_RD, &(txr->total_packets),
				"Queue Packets Transmitted");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes",
				CTLFLAG_RD, &(txr->tx_bytes),
				"Queue Bytes Transmitted");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
				CTLFLAG_RD, &(rxr->rx_packets),
				"Queue Packets Received");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
				CTLFLAG_RD, &(rxr->rx_bytes),
				"Queue Bytes Received");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_desc_err",
				CTLFLAG_RD, &(rxr->desc_errs),
				"Queue Rx Descriptor Errors");
		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_itr",
				CTLFLAG_RD, &(rxr->itr), 0,
				"Queue Rx ITR Interval");
		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_itr",
				CTLFLAG_RD, &(txr->itr), 0,
				"Queue Tx ITR Interval");
#ifdef IXL_DEBUG
		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "txr_watchdog",
				CTLFLAG_RD, &(txr->watchdog_timer), 0,
				"Ticks before watchdog timer causes interface reinit");
		SYSCTL_ADD_U16(ctx, queue_list, OID_AUTO, "tx_next_avail",
				CTLFLAG_RD, &(txr->next_avail), 0,
				"Next TX descriptor to be used");
		SYSCTL_ADD_U16(ctx, queue_list, OID_AUTO, "tx_next_to_clean",
				CTLFLAG_RD, &(txr->next_to_clean), 0,
				"Next TX descriptor to be cleaned");
		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_not_done",
				CTLFLAG_RD, &(rxr->not_done),
				"Queue Rx Descriptors not Done");
		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_refresh",
				CTLFLAG_RD, &(rxr->next_refresh), 0,
				"Queue Rx Descriptors not Done");
		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_check",
				CTLFLAG_RD, &(rxr->next_check), 0,
				"Queue Rx Descriptors not Done");
		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_tail",
				CTLTYPE_UINT | CTLFLAG_RD, &queues[q],
				sizeof(struct ixl_queue),
				ixl_sysctl_qrx_tail_handler, "IU",
				"Queue Receive Descriptor Tail");
		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_tail",
				CTLTYPE_UINT | CTLFLAG_RD, &queues[q],
				sizeof(struct ixl_queue),
				ixl_sysctl_qtx_tail_handler, "IU",
				"Queue Transmit Descriptor Tail");
#endif
	}

}

void
ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
	struct sysctl_oid_list *child,
	struct i40e_eth_stats *eth_stats)
{
	struct ixl_sysctl_info ctls[] =
	{
		{&eth_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"},
		{&eth_stats->rx_unicast, "ucast_pkts_rcvd",
			"Unicast Packets Received"},
		{&eth_stats->rx_multicast, "mcast_pkts_rcvd",
			"Multicast Packets Received"},
		{&eth_stats->rx_broadcast, "bcast_pkts_rcvd",
			"Broadcast Packets Received"},
		{&eth_stats->rx_discards, "rx_discards", "Discarded RX packets"},
		{&eth_stats->rx_unknown_protocol, "rx_unknown_proto",
			"RX unknown protocol packets"},
		{&eth_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"},
		{&eth_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"},
		{&eth_stats->tx_multicast, "mcast_pkts_txd",
			"Multicast Packets Transmitted"},
		{&eth_stats->tx_broadcast, "bcast_pkts_txd",
			"Broadcast Packets Transmitted"},
		{&eth_stats->tx_errors, "tx_errors", "TX packet errors"},
		// end
		{0,0,0}
	};

	struct ixl_sysctl_info *entry = ctls;

	while (entry->stat != 0)
	{
		SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name,
				CTLFLAG_RD, entry->stat,
				entry->description);
		entry++;
	}
}

#ifdef IXL_DEBUG
/**
 * ixl_sysctl_qtx_tail_handler
 * Retrieves I40E_QTX_TAIL value from hardware
 * for a sysctl.
 */
static int
ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS)
{
	struct ixl_queue *que;
	int error;
	u32 val;

	que = ((struct ixl_queue *)oidp->oid_arg1);
	if (!que) return 0;

	val = rd32(que->vsi->hw, que->txr.tail);
	error = sysctl_handle_int(oidp, &val, 0, req);
	if (error || !req->newptr)
		return error;
	return (0);
}

/**
 * ixl_sysctl_qrx_tail_handler
 * Retrieves I40E_QRX_TAIL value from hardware
 * for a sysctl.
 */
static int
ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS)
{
	struct ixl_queue *que;
	int error;
	u32 val;

	que = ((struct ixl_queue *)oidp->oid_arg1);
	if (!que) return 0;

	val = rd32(que->vsi->hw, que->rxr.tail);
	error = sysctl_handle_int(oidp, &val, 0, req);
	if (error || !req->newptr)
		return error;
	return (0);
}
#endif
