1/**************************************************************************
2
3Copyright (c) 2001-2003, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/* $OpenBSD: if_em.c,v 1.379 2025/07/14 11:52:43 jmatthew Exp $ */
35/* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36
37#include <dev/pci/if_em.h>
38#include <dev/pci/if_em_soc.h>
39
40/*********************************************************************
41 *  Driver version
42 *********************************************************************/
43
44#define EM_DRIVER_VERSION   "6.2.9"
45
46/*********************************************************************
47 *  PCI Device ID Table
48 *********************************************************************/
49const struct pci_matchid em_devices[] = {
50    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
51    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
52    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
53    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
54    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
55    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
56    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
57    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
58    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
59    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
60    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
61    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
62    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
63    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
64    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
65    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
66    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
67    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
68    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
69    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
70    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
71    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
72    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
73    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
74    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
75    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
76    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
77    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
78    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
79    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
80    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
81    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
82    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
83    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
84    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
85    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
86    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
87    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
88    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
89    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
90    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
91    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
92    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
93    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
94    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
95    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
96    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
97    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
98    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
99    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
100    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
101    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
102    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
103    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
104    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
105    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
106    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
107    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
108    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
109    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
110    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
111    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
112    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
113    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
114    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
115    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
116    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
117    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
118    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
119    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
120    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
121    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
122    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
123    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
124    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
125    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
126    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
127    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
128    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
129    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
130    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
131    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
132    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
133    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
134    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
135    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
136    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
137    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
138    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
139    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
140    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
141    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
142    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
143    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
144    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
145    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
146    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
147    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
148    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
149    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
150    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
151    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
152    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
153    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
154    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
155    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
156    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
157    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
158    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
159    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
160    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
161    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
162    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
163    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
164    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
165    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM16 },
166    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM17 },
167    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM18 },
168    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM19 },
169    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM20 },
170    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM21 },
171    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM22 },
172    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM23 },
173    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM24 },
174    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
175    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
176    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
177    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
178    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
179    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
180    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
181    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
182    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
183    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
184    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
185    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
186    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
187    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V15 },
188    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V16 },
189    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V17 },
190    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V18 },
191    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V19 },
192    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V20 },
193    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V21 },
194    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V22 },
195    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V23 },
196    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V24 },
197    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
198    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
199    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
200    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
201    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
202    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
203    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
204    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
205    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
206    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
207    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
208    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
209    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
210    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
211    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
212    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
213    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
214    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
215    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
216    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
217    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
218    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
219    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
220    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
221    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
222    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
223    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
224    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
225    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
226    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
227    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
228    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
229    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
230    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
231    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
232    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
233    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
234    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
235    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
236    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
237    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
238    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
239    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
240    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
241    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
242    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
243    { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
244};
245
246/*********************************************************************
247 *  Function prototypes
248 *********************************************************************/
249int  em_probe(struct device *, void *, void *);
250void em_attach(struct device *, struct device *, void *);
251void em_defer_attach(struct device*);
252int  em_detach(struct device *, int);
253int  em_activate(struct device *, int);
254int  em_intr(void *);
255int  em_allocate_legacy(struct em_softc *);
256void em_start(struct ifqueue *);
257int  em_ioctl(struct ifnet *, u_long, caddr_t);
258void em_watchdog(struct ifnet *);
259void em_init(void *);
260void em_stop(void *, int);
261void em_media_status(struct ifnet *, struct ifmediareq *);
262int  em_media_change(struct ifnet *);
263uint64_t  em_flowstatus(struct em_softc *);
264void em_identify_hardware(struct em_softc *);
265int  em_allocate_pci_resources(struct em_softc *);
266void em_free_pci_resources(struct em_softc *);
267void em_local_timer(void *);
268int  em_hardware_init(struct em_softc *);
269void em_setup_interface(struct em_softc *);
270int  em_setup_transmit_structures(struct em_softc *);
271void em_initialize_transmit_unit(struct em_softc *);
272int  em_setup_receive_structures(struct em_softc *);
273void em_initialize_receive_unit(struct em_softc *);
274void em_enable_intr(struct em_softc *);
275void em_disable_intr(struct em_softc *);
276void em_free_transmit_structures(struct em_softc *);
277void em_free_receive_structures(struct em_softc *);
278void em_disable_aspm(struct em_softc *);
279void em_txeof(struct em_queue *);
280int  em_allocate_receive_structures(struct em_softc *);
281int  em_allocate_transmit_structures(struct em_softc *);
282int  em_allocate_desc_rings(struct em_softc *);
283int  em_rxfill(struct em_queue *);
284void em_rxrefill(void *);
285void em_rxrefill_locked(struct em_queue *);
286int  em_rxeof(struct em_queue *);
287void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
288             struct mbuf *);
289u_int   em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
290        u_int32_t *, u_int32_t *);
291u_int   em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
292        u_int32_t *);
293u_int   em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
294        u_int32_t *);
295void em_iff(struct em_softc *);
296void em_update_link_status(struct em_softc *);
297int  em_get_buf(struct em_queue *, int);
298void em_enable_hw_vlans(struct em_softc *);
299u_int em_encap(struct em_queue *, struct mbuf *);
300void em_smartspeed(struct em_softc *);
301int  em_82547_fifo_workaround(struct em_softc *, int);
302void em_82547_update_fifo_head(struct em_softc *, int);
303int  em_82547_tx_fifo_reset(struct em_softc *);
304void em_82547_move_tail(void *arg);
305void em_82547_move_tail_locked(struct em_softc *);
306int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
307void em_dma_free(struct em_softc *, struct em_dma_alloc *);
308u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
309                  PDESC_ARRAY desc_array);
310void em_flush_tx_ring(struct em_queue *);
311void em_flush_rx_ring(struct em_queue *);
312void em_flush_desc_rings(struct em_softc *);
313int em_get_sffpage(struct em_softc *, struct if_sffpage *);
314
315#ifndef SMALL_KERNEL
316/* MSIX/Multiqueue functions */
317int  em_allocate_msix(struct em_softc *);
318int  em_setup_queues_msix(struct em_softc *);
319int  em_queue_intr_msix(void *);
320int  em_link_intr_msix(void *);
321void em_enable_queue_intr_msix(struct em_queue *);
322#else
323#define em_allocate_msix(_sc)   (-1)
324#endif
325
326#if NKSTAT > 0
327void    em_kstat_attach(struct em_softc *);
328int em_kstat_read(struct kstat *);
329void    em_tbi_adjust_stats(struct em_softc *, uint32_t, uint8_t *);
330#endif
331
332/*********************************************************************
333 *  OpenBSD Device Interface Entry Points
334 *********************************************************************/
335
336const struct cfattach em_ca = {
337    sizeof(struct em_softc), em_probe, em_attach, em_detach,
338    em_activate
339};
340
341struct cfdriver em_cd = {
342    NULL, "em", DV_IFNET
343};
344
345static int em_smart_pwr_down = FALSE;
346int em_enable_msix = 0;
347
348/*********************************************************************
349 *  Device identification routine
350 *
351 *  em_probe determines if the driver should be loaded on
352 *  adapter based on PCI vendor/device id of the adapter.
353 *
354 *  return 0 on no match, positive on match
355 *********************************************************************/
356
357int
358em_probe(struct device *parent, void *match, void *aux)
359{
360    INIT_DEBUGOUT("em_probe: begin");
361
362    return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
363        nitems(em_devices)));
364}
365
366void
367em_defer_attach(struct device *self)
368{
369    struct em_softc *sc = (struct em_softc *)self;
370    struct pci_attach_args *pa = &sc->osdep.em_pa;
371    pci_chipset_tag_t   pc = pa->pa_pc;
372    void *gcu;
373
374    INIT_DEBUGOUT("em_defer_attach: begin");
375
376    if ((gcu = em_lookup_gcu(self)) == 0) {
377        printf("%s: No GCU found, deferred attachment failed\n",
378            DEVNAME(sc));
379
380        if (sc->sc_intrhand)
381            pci_intr_disestablish(pc, sc->sc_intrhand);
382        sc->sc_intrhand = 0;
383
384        em_stop(sc, 1);
385
386        em_free_pci_resources(sc);
387
388        return;
389    }
390
391    sc->hw.gcu = gcu;
392
393    em_attach_miibus(self);
394
395    em_setup_interface(sc);
396
397    em_setup_link(&sc->hw);
398
399    em_update_link_status(sc);
400}
401
402/*********************************************************************
403 *  Device initialization routine
404 *
405 *  The attach entry point is called when the driver is being loaded.
406 *  This routine identifies the type of hardware, allocates all resources
407 *  and initializes the hardware.
408 *
409 *********************************************************************/
410
411void
412em_attach(struct device *parent, struct device *self, void *aux)
413{
414    struct pci_attach_args *pa = aux;
415    struct em_softc *sc;
416    int defer = 0;
417
418    INIT_DEBUGOUT("em_attach: begin");
419
420    sc = (struct em_softc *)self;
421    sc->sc_dmat = pa->pa_dmat;
422    sc->osdep.em_pa = *pa;
423
424    timeout_set(&sc->timer_handle, em_local_timer, sc);
425    timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
426
427    rw_init(&sc->sfflock, "emsff");
428
429    /* Determine hardware revision */
430    em_identify_hardware(sc);
431
432    /*
433     * Only use MSI on the newer PCIe parts, with the exception
434     * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
435     */
436    if (sc->hw.mac_type <= em_82572)
437        sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
438
439    /* Parameters (to be read from user) */
440    if (sc->hw.mac_type >= em_82544) {
441        sc->sc_tx_slots = EM_MAX_TXD;
442        sc->sc_rx_slots = EM_MAX_RXD;
443    } else {
444        sc->sc_tx_slots = EM_MAX_TXD_82543;
445        sc->sc_rx_slots = EM_MAX_RXD_82543;
446    }
447    sc->tx_int_delay = EM_TIDV;
448    sc->tx_abs_int_delay = EM_TADV;
449    sc->rx_int_delay = EM_RDTR;
450    sc->rx_abs_int_delay = EM_RADV;
451    sc->hw.autoneg = DO_AUTO_NEG;
452    sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
453    sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
454    sc->hw.tbi_compatibility_en = TRUE;
455    sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
456
457    sc->hw.phy_init_script = 1;
458    sc->hw.phy_reset_disable = FALSE;
459
460#ifndef EM_MASTER_SLAVE
461    sc->hw.master_slave = em_ms_hw_default;
462#else
463    sc->hw.master_slave = EM_MASTER_SLAVE;
464#endif
465
466    /*
467     * This controls when hardware reports transmit completion
468     * status.
469     */
470    sc->hw.report_tx_early = 1;
471
472    if (em_allocate_pci_resources(sc))
473        goto err_pci;
474
475    /* Initialize eeprom parameters */
476    em_init_eeprom_params(&sc->hw);
477
478    /*
479     * Set the max frame size assuming standard Ethernet
480     * sized frames.
481     */
482    switch (sc->hw.mac_type) {
483        case em_82573:
484        {
485            uint16_t    eeprom_data = 0;
486
487            /*
488             * 82573 only supports Jumbo frames
489             * if ASPM is disabled.
490             */
491            em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
492                1, &eeprom_data);
493            if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
494                sc->hw.max_frame_size = ETHER_MAX_LEN;
495                break;
496            }
497            /* Allow Jumbo frames */
498            /* FALLTHROUGH */
499        }
500        case em_82571:
501        case em_82572:
502        case em_82574:
503        case em_82575:
504        case em_82576:
505        case em_82580:
506        case em_i210:
507        case em_i350:
508        case em_ich9lan:
509        case em_ich10lan:
510        case em_pch2lan:
511        case em_pch_lpt:
512        case em_pch_spt:
513        case em_pch_cnp:
514        case em_pch_tgp:
515        case em_pch_adp:
516        case em_80003es2lan:
517            /* 9K Jumbo Frame size */
518            sc->hw.max_frame_size = 9234;
519            break;
520        case em_pchlan:
521            sc->hw.max_frame_size = 4096;
522            break;
523        case em_82542_rev2_0:
524        case em_82542_rev2_1:
525        case em_ich8lan:
526            /* Adapters that do not support Jumbo frames */
527            sc->hw.max_frame_size = ETHER_MAX_LEN;
528            break;
529        default:
530            sc->hw.max_frame_size =
531                MAX_JUMBO_FRAME_SIZE;
532    }
533
534    sc->hw.min_frame_size =
535        ETHER_MIN_LEN + ETHER_CRC_LEN;
536
537    if (em_allocate_desc_rings(sc) != 0) {
538        printf("%s: Unable to allocate descriptor ring memory\n",
539            DEVNAME(sc));
540        goto err_pci;
541    }
542
543    /* Initialize the hardware */
544    if ((defer = em_hardware_init(sc))) {
545        if (defer == EAGAIN)
546            config_defer(self, em_defer_attach);
547        else {
548            printf("%s: Unable to initialize the hardware\n",
549                DEVNAME(sc));
550            goto err_pci;
551        }
552    }
553
554    if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
555        sc->hw.mac_type == em_82576 ||
556        sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
557        sc->hw.mac_type == em_i350) {
558        uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
559        sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
560            E1000_STATUS_FUNC_SHIFT;
561
562        switch (sc->hw.bus_func) {
563        case 0:
564            sc->hw.swfw = E1000_SWFW_PHY0_SM;
565            break;
566        case 1:
567            sc->hw.swfw = E1000_SWFW_PHY1_SM;
568            break;
569        case 2:
570            sc->hw.swfw = E1000_SWFW_PHY2_SM;
571            break;
572        case 3:
573            sc->hw.swfw = E1000_SWFW_PHY3_SM;
574            break;
575        }
576    } else {
577        sc->hw.bus_func = 0;
578    }
579
580    /* Copy the permanent MAC address out of the EEPROM */
581    if (em_read_mac_addr(&sc->hw) < 0) {
582        printf("%s: EEPROM read error while reading mac address\n",
583               DEVNAME(sc));
584        goto err_pci;
585    }
586
587    bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
588
589    /* Setup OS specific network interface */
590    if (!defer)
591        em_setup_interface(sc);
592
593    /* Initialize statistics */
594    em_clear_hw_cntrs(&sc->hw);
595#if NKSTAT > 0
596    em_kstat_attach(sc);
597#endif
598    sc->hw.get_link_status = 1;
599    if (!defer)
600        em_update_link_status(sc);
601
602#ifdef EM_DEBUG
603    printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
604#endif
605    printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
606
607    /* Indicate SOL/IDER usage */
608    if (em_check_phy_reset_block(&sc->hw))
609        printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
610            DEVNAME(sc));
611
612    /* Identify 82544 on PCI-X */
613    em_get_bus_info(&sc->hw);
614    if (sc->hw.bus_type == em_bus_type_pcix &&
615        sc->hw.mac_type == em_82544)
616        sc->pcix_82544 = TRUE;
617        else
618        sc->pcix_82544 = FALSE;
619
620    sc->hw.icp_xxxx_is_link_up = FALSE;
621
622    INIT_DEBUGOUT("em_attach: end");
623    return;
624
625err_pci:
626    em_free_pci_resources(sc);
627}
628
629/*********************************************************************
630 *  Transmit entry point
631 *
632 *  em_start is called by the stack to initiate a transmit.
633 *  The driver will remain in this routine as long as there are
634 *  packets to transmit and transmit resources are available.
635 *  In case resources are not available stack is notified and
636 *  the packet is requeued.
637 **********************************************************************/
638
639void
640em_start(struct ifqueue *ifq)
641{
642    struct ifnet *ifp = ifq->ifq_if;
643    struct em_softc *sc = ifp->if_softc;
644    u_int head, free, used;
645    struct mbuf *m;
646    int post = 0;
647    struct em_queue *que = sc->queues; /* Use only first queue. */
648
649    if (!sc->link_active) {
650        ifq_purge(ifq);
651        return;
652    }
653
654    /* calculate free space */
655    head = que->tx.sc_tx_desc_head;
656    free = que->tx.sc_tx_desc_tail;
657    if (free <= head)
658        free += sc->sc_tx_slots;
659    free -= head;
660
661    if (sc->hw.mac_type != em_82547) {
662        bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
663            0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
664            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
665    }
666
667    for (;;) {
668        /* use 2 because cksum setup can use an extra slot */
669        if (EM_MAX_SCATTER + 2 > free) {
670            ifq_set_oactive(ifq);
671            break;
672        }
673
674        m = ifq_dequeue(ifq);
675        if (m == NULL)
676            break;
677
678        used = em_encap(que, m);
679        if (used == 0) {
680            m_freem(m);
681            continue;
682        }
683
684        KASSERT(used <= free);
685
686        free -= used;
687
688#if NBPFILTER > 0
689        /* Send a copy of the frame to the BPF listener */
690        if (ifp->if_bpf)
691            bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
692#endif
693
694        /* Set timeout in case hardware has problems transmitting */
695        ifp->if_timer = EM_TX_TIMEOUT;
696
697        if (sc->hw.mac_type == em_82547) {
698            int len = m->m_pkthdr.len;
699
700            if (sc->link_duplex == HALF_DUPLEX)
701                em_82547_move_tail_locked(sc);
702            else {
703                E1000_WRITE_REG(&sc->hw, TDT(que->me),
704                    que->tx.sc_tx_desc_head);
705                em_82547_update_fifo_head(sc, len);
706            }
707        }
708
709        post = 1;
710    }
711
712    if (sc->hw.mac_type != em_82547) {
713        bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
714            0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
715            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
716        /*
717         * Advance the Transmit Descriptor Tail (Tdt),
718         * this tells the E1000 that this frame is
719         * available to transmit.
720         */
721        if (post)
722            E1000_WRITE_REG(&sc->hw, TDT(que->me),
723                que->tx.sc_tx_desc_head);
724    }
725}
726
727/*********************************************************************
728 *  Ioctl entry point
729 *
730 *  em_ioctl is called when the user wants to configure the
731 *  interface.
732 *
733 *  return 0 on success, positive on failure
734 **********************************************************************/
735
736int
737em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
738{
739    int     error = 0;
740    struct ifreq   *ifr = (struct ifreq *) data;
741    struct em_softc *sc = ifp->if_softc;
742    int s;
743
744    s = splnet();
745
746    switch (command) {
747    case SIOCSIFADDR:
748        IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
749                   "Addr)");
750        if (!(ifp->if_flags & IFF_UP)) {
751            ifp->if_flags |= IFF_UP;
752            em_init(sc);
753        }
754        break;
755
756    case SIOCSIFFLAGS:
757        IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
758        if (ifp->if_flags & IFF_UP) {
759            if (ifp->if_flags & IFF_RUNNING)
760                error = ENETRESET;
761            else
762                em_init(sc);
763        } else {
764            if (ifp->if_flags & IFF_RUNNING)
765                em_stop(sc, 0);
766        }
767        break;
768
769    case SIOCSIFMEDIA:
770        /* Check SOL/IDER usage */
771        if (em_check_phy_reset_block(&sc->hw)) {
772            printf("%s: Media change is blocked due to SOL/IDER session.\n",
773                DEVNAME(sc));
774            break;
775        }
776    case SIOCGIFMEDIA:
777        IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
778        error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
779        break;
780
781    case SIOCGIFRXR:
782        error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
783            NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
784        break;
785
786    case SIOCGIFSFFPAGE:
787        error = rw_enter(&sc->sfflock, RW_WRITE|RW_INTR);
788        if (error != 0)
789            break;
790
791        error = em_get_sffpage(sc, (struct if_sffpage *)data);
792        rw_exit(&sc->sfflock);
793        break;
794
795    default:
796        error = ether_ioctl(ifp, &sc->sc_ac, command, data);
797    }
798
799    if (error == ENETRESET) {
800        if (ifp->if_flags & IFF_RUNNING) {
801            em_disable_intr(sc);
802            em_iff(sc);
803            if (sc->hw.mac_type == em_82542_rev2_0)
804                em_initialize_receive_unit(sc);
805            em_enable_intr(sc);
806        }
807        error = 0;
808    }
809
810    splx(s);
811    return (error);
812}
813
814/*********************************************************************
815 *  Watchdog entry point
816 *
817 *  This routine is called whenever hardware quits transmitting.
818 *
819 **********************************************************************/
820
821void
822em_watchdog(struct ifnet *ifp)
823{
824    struct em_softc *sc = ifp->if_softc;
825    struct em_queue *que = sc->queues; /* Use only first queue. */
826
827
828    /* If we are in this routine because of pause frames, then
829     * don't reset the hardware.
830     */
831    if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
832        ifp->if_timer = EM_TX_TIMEOUT;
833        return;
834    }
835    printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
836        DEVNAME(sc),
837        que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
838        E1000_READ_REG(&sc->hw, TDH(que->me)),
839        E1000_READ_REG(&sc->hw, TDT(que->me)));
840
841    em_init(sc);
842
843    sc->watchdog_events++;
844}
845
846/*********************************************************************
847 *  Init entry point
848 *
849 *  This routine is used in two ways. It is used by the stack as
850 *  init entry point in network interface structure. It is also used
851 *  by the driver as a hw/sw initialization routine to get to a
852 *  consistent state.
853 *
854 **********************************************************************/
855
856void
857em_init(void *arg)
858{
859    struct em_softc *sc = arg;
860    struct ifnet   *ifp = &sc->sc_ac.ac_if;
861    uint32_t    pba;
862    int s;
863
864    s = splnet();
865
866    INIT_DEBUGOUT("em_init: begin");
867
868    em_stop(sc, 0);
869
870    /*
871     * Packet Buffer Allocation (PBA)
872     * Writing PBA sets the receive portion of the buffer
873     * the remainder is used for the transmit buffer.
874     *
875     * Devices before the 82547 had a Packet Buffer of 64K.
876     *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
877     * After the 82547 the buffer was reduced to 40K.
878     *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
879     *   Note: default does not leave enough room for Jumbo Frame >10k.
880     */
881    switch (sc->hw.mac_type) {
882    case em_82547:
883    case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
884        if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
885            pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
886        else
887            pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
888        sc->tx_fifo_head = 0;
889        sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
890        sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
891        break;
892    case em_82571:
893    case em_82572: /* Total Packet Buffer on these is 48k */
894    case em_82575:
895    case em_82576:
896    case em_82580:
897    case em_80003es2lan:
898    case em_i350:
899        pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
900        break;
901    case em_i210:
902        pba = E1000_PBA_34K;
903        break;
904    case em_82573: /* 82573: Total Packet Buffer is 32K */
905        /* Jumbo frames not supported */
906        pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
907        break;
908    case em_82574: /* Total Packet Buffer is 40k */
909        pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
910        break;
911    case em_ich8lan:
912        pba = E1000_PBA_8K;
913        break;
914    case em_ich9lan:
915    case em_ich10lan:
916        /* Boost Receive side for jumbo frames */
917        if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
918            pba = E1000_PBA_14K;
919        else
920            pba = E1000_PBA_10K;
921        break;
922    case em_pchlan:
923    case em_pch2lan:
924    case em_pch_lpt:
925    case em_pch_spt:
926    case em_pch_cnp:
927    case em_pch_tgp:
928    case em_pch_adp:
929        pba = E1000_PBA_26K;
930        break;
931    default:
932        /* Devices before 82547 had a Packet Buffer of 64K.   */
933        if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
934            pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
935        else
936            pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
937    }
938    INIT_DEBUGOUT1("em_init: pba=%dK",pba);
939    E1000_WRITE_REG(&sc->hw, PBA, pba);
940
941    /* Get the latest mac address, User can use a LAA */
942    bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
943
944    /* Initialize the hardware */
945    if (em_hardware_init(sc)) {
946        printf("%s: Unable to initialize the hardware\n",
947               DEVNAME(sc));
948        splx(s);
949        return;
950    }
951    em_update_link_status(sc);
952
953    E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
954    if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
955        em_enable_hw_vlans(sc);
956
957    /* Prepare transmit descriptors and buffers */
958    if (em_setup_transmit_structures(sc)) {
959        printf("%s: Could not setup transmit structures\n",
960               DEVNAME(sc));
961        em_stop(sc, 0);
962        splx(s);
963        return;
964    }
965    em_initialize_transmit_unit(sc);
966
967    /* Prepare receive descriptors and buffers */
968    if (em_setup_receive_structures(sc)) {
969        printf("%s: Could not setup receive structures\n",
970               DEVNAME(sc));
971        em_stop(sc, 0);
972        splx(s);
973        return;
974    }
975    em_initialize_receive_unit(sc);
976
977#ifndef SMALL_KERNEL
978    if (sc->msix) {
979        if (em_setup_queues_msix(sc)) {
980            printf("%s: Can't setup msix queues\n", DEVNAME(sc));
981            splx(s);
982            return;
983        }
984    }
985#endif
986
987    /* Program promiscuous mode and multicast filters. */
988    em_iff(sc);
989
990    ifp->if_flags |= IFF_RUNNING;
991    ifq_clr_oactive(&ifp->if_snd);
992
993    timeout_add_sec(&sc->timer_handle, 1);
994    em_clear_hw_cntrs(&sc->hw);
995    em_enable_intr(sc);
996
997    /* Don't reset the phy next time init gets called */
998    sc->hw.phy_reset_disable = TRUE;
999
1000    splx(s);
1001}
1002
1003/*********************************************************************
1004 *
1005 *  Interrupt Service routine
1006 *
1007 **********************************************************************/
1008int
1009em_intr(void *arg)
1010{
1011    struct em_softc *sc = arg;
1012    struct em_queue *que = sc->queues; /* single queue */
1013    struct ifnet    *ifp = &sc->sc_ac.ac_if;
1014    u_int32_t   reg_icr, test_icr;
1015
1016    test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
1017    if (sc->hw.mac_type >= em_82571)
1018        test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
1019    if (!test_icr)
1020        return (0);
1021
1022    if (ifp->if_flags & IFF_RUNNING) {
1023        em_txeof(que);
1024        if (em_rxeof(que))
1025            em_rxrefill_locked(que);
1026    }
1027
1028    /* Link status change */
1029    if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1030        KERNEL_LOCK();
1031        sc->hw.get_link_status = 1;
1032        em_check_for_link(&sc->hw);
1033        em_update_link_status(sc);
1034        KERNEL_UNLOCK();
1035    }
1036
1037    return (1);
1038}
1039
1040/*********************************************************************
1041 *
1042 *  Media Ioctl callback
1043 *
1044 *  This routine is called whenever the user queries the status of
1045 *  the interface using ifconfig.
1046 *
1047 **********************************************************************/
1048void
1049em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1050{
1051    struct em_softc *sc = ifp->if_softc;
1052    uint64_t fiber_type = IFM_1000_SX;
1053    u_int16_t gsr;
1054
1055    INIT_DEBUGOUT("em_media_status: begin");
1056
1057    em_check_for_link(&sc->hw);
1058    em_update_link_status(sc);
1059
1060    ifmr->ifm_status = IFM_AVALID;
1061    ifmr->ifm_active = IFM_ETHER;
1062
1063    if (!sc->link_active) {
1064        ifmr->ifm_active |= IFM_NONE;
1065        return;
1066    }
1067
1068    ifmr->ifm_status |= IFM_ACTIVE;
1069
1070    if (sc->hw.media_type == em_media_type_fiber ||
1071        sc->hw.media_type == em_media_type_internal_serdes) {
1072        if (sc->hw.mac_type == em_82545)
1073            fiber_type = IFM_1000_LX;
1074        ifmr->ifm_active |= fiber_type | IFM_FDX;
1075    } else {
1076        switch (sc->link_speed) {
1077        case 10:
1078            ifmr->ifm_active |= IFM_10_T;
1079            break;
1080        case 100:
1081            ifmr->ifm_active |= IFM_100_TX;
1082            break;
1083        case 1000:
1084            ifmr->ifm_active |= IFM_1000_T;
1085            break;
1086        }
1087
1088        if (sc->link_duplex == FULL_DUPLEX)
1089            ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1090        else
1091            ifmr->ifm_active |= IFM_HDX;
1092
1093        if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1094            em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1095            if (gsr & SR_1000T_MS_CONFIG_RES)
1096                ifmr->ifm_active |= IFM_ETH_MASTER;
1097        }
1098    }
1099}
1100
1101/*********************************************************************
1102 *
1103 *  Media Ioctl callback
1104 *
1105 *  This routine is called when the user changes speed/duplex using
1106 *  media/mediopt option with ifconfig.
1107 *
1108 **********************************************************************/
1109int
1110em_media_change(struct ifnet *ifp)
1111{
1112    struct em_softc *sc = ifp->if_softc;
1113    struct ifmedia  *ifm = &sc->media;
1114
1115    INIT_DEBUGOUT("em_media_change: begin");
1116
1117    if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1118        return (EINVAL);
1119
1120    switch (IFM_SUBTYPE(ifm->ifm_media)) {
1121    case IFM_AUTO:
1122        sc->hw.autoneg = DO_AUTO_NEG;
1123        sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1124        break;
1125    case IFM_1000_LX:
1126    case IFM_1000_SX:
1127    case IFM_1000_T:
1128        sc->hw.autoneg = DO_AUTO_NEG;
1129        sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1130        break;
1131    case IFM_100_TX:
1132        sc->hw.autoneg = FALSE;
1133        sc->hw.autoneg_advertised = 0;
1134        if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1135            sc->hw.forced_speed_duplex = em_100_full;
1136        else
1137            sc->hw.forced_speed_duplex = em_100_half;
1138        break;
1139    case IFM_10_T:
1140        sc->hw.autoneg = FALSE;
1141        sc->hw.autoneg_advertised = 0;
1142        if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1143            sc->hw.forced_speed_duplex = em_10_full;
1144        else
1145            sc->hw.forced_speed_duplex = em_10_half;
1146        break;
1147    default:
1148        printf("%s: Unsupported media type\n", DEVNAME(sc));
1149    }
1150
1151    /*
1152     * As the speed/duplex settings may have changed we need to
1153     * reset the PHY.
1154     */
1155    sc->hw.phy_reset_disable = FALSE;
1156
1157    em_init(sc);
1158
1159    return (0);
1160}
1161
1162uint64_t
1163em_flowstatus(struct em_softc *sc)
1164{
1165    u_int16_t ar, lpar;
1166
1167    if (sc->hw.media_type == em_media_type_fiber ||
1168        sc->hw.media_type == em_media_type_internal_serdes)
1169        return (0);
1170
1171    em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1172    em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1173
1174    if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1175        return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1176    else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1177        (lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1178        return (IFM_FLOW|IFM_ETH_TXPAUSE);
1179    else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1180        !(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1181        return (IFM_FLOW|IFM_ETH_RXPAUSE);
1182
1183    return (0);
1184}
1185
1186/*********************************************************************
1187 *
1188 *  This routine maps the mbufs to tx descriptors.
1189 *
1190 *  return 0 on failure, positive on success
1191 **********************************************************************/
1192u_int
1193em_encap(struct em_queue *que, struct mbuf *m)
1194{
1195    struct em_softc *sc = que->sc;
1196    struct em_packet *pkt;
1197    struct em_tx_desc *desc;
1198    bus_dmamap_t map;
1199    u_int32_t txd_upper, txd_lower;
1200    u_int head, last, used = 0;
1201    int i, j;
1202
1203    /* For 82544 Workaround */
1204    DESC_ARRAY      desc_array;
1205    u_int32_t       array_elements;
1206
1207    /* get a dmamap for this packet from the next free slot */
1208    head = que->tx.sc_tx_desc_head;
1209    pkt = &que->tx.sc_tx_pkts_ring[head];
1210    map = pkt->pkt_map;
1211
1212    switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1213    case 0:
1214        break;
1215    case EFBIG:
1216        if (m_defrag(m, M_DONTWAIT) == 0 &&
1217            bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1218             BUS_DMA_NOWAIT) == 0)
1219            break;
1220
1221        /* FALLTHROUGH */
1222    default:
1223        sc->no_tx_dma_setup++;
1224        return (0);
1225    }
1226
1227    bus_dmamap_sync(sc->sc_dmat, map,
1228        0, map->dm_mapsize,
1229        BUS_DMASYNC_PREWRITE);
1230
1231    if (sc->hw.mac_type == em_82547) {
1232        bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1233            0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1234            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1235    }
1236
1237    if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
1238        if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
1239            used += em_tso_setup(que, m, head, &txd_upper,
1240                &txd_lower);
1241            if (!used)
1242                return (used);
1243        } else {
1244            used += em_tx_ctx_setup(que, m, head, &txd_upper,
1245                &txd_lower);
1246        }
1247    } else if (sc->hw.mac_type >= em_82543) {
1248        used += em_transmit_checksum_setup(que, m, head,
1249            &txd_upper, &txd_lower);
1250    } else {
1251        txd_upper = txd_lower = 0;
1252    }
1253
1254    head += used;
1255    if (head >= sc->sc_tx_slots)
1256        head -= sc->sc_tx_slots;
1257
1258    for (i = 0; i < map->dm_nsegs; i++) {
1259        /* If sc is 82544 and on PCI-X bus */
1260        if (sc->pcix_82544) {
1261            /*
1262             * Check the Address and Length combination and
1263             * split the data accordingly
1264             */
1265            array_elements = em_fill_descriptors(
1266                map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1267                &desc_array);
1268            for (j = 0; j < array_elements; j++) {
1269                desc = &que->tx.sc_tx_desc_ring[head];
1270
1271                desc->buffer_addr = htole64(
1272                    desc_array.descriptor[j].address);
1273                desc->lower.data = htole32(
1274                    (que->tx.sc_txd_cmd | txd_lower |
1275                     (u_int16_t)desc_array.descriptor[j].length));
1276                desc->upper.data = htole32(txd_upper);
1277
1278                last = head;
1279                if (++head == sc->sc_tx_slots)
1280                    head = 0;
1281
1282                used++;
1283            }
1284        } else {
1285            desc = &que->tx.sc_tx_desc_ring[head];
1286
1287            desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1288            desc->lower.data = htole32(que->tx.sc_txd_cmd |
1289                txd_lower | map->dm_segs[i].ds_len);
1290            desc->upper.data = htole32(txd_upper);
1291
1292            last = head;
1293            if (++head == sc->sc_tx_slots)
1294                    head = 0;
1295
1296            used++;
1297        }
1298    }
1299
1300#if NVLAN > 0
1301    /* Find out if we are in VLAN mode */
1302    if (m->m_flags & M_VLANTAG && (sc->hw.mac_type < em_82575 ||
1303        sc->hw.mac_type > em_i210)) {
1304        /* Set the VLAN id */
1305        desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1306
1307        /* Tell hardware to add tag */
1308        desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1309    }
1310#endif
1311
1312    /* mark the packet with the mbuf and last desc slot */
1313    pkt->pkt_m = m;
1314    pkt->pkt_eop = last;
1315
1316    que->tx.sc_tx_desc_head = head;
1317
1318    /*
1319     * Last Descriptor of Packet
1320     * needs End Of Packet (EOP)
1321     * and Report Status (RS)
1322     */
1323    desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1324
1325    if (sc->hw.mac_type == em_82547) {
1326        bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1327            0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1328            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1329    }
1330
1331    return (used);
1332}
1333
1334/*********************************************************************
1335 *
1336 * 82547 workaround to avoid controller hang in half-duplex environment.
1337 * The workaround is to avoid queuing a large packet that would span
1338 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1339 * in this case. We do that only when FIFO is quiescent.
1340 *
1341 **********************************************************************/
1342void
1343em_82547_move_tail_locked(struct em_softc *sc)
1344{
1345    uint16_t hw_tdt;
1346    uint16_t sw_tdt;
1347    struct em_tx_desc *tx_desc;
1348    uint16_t length = 0;
1349    boolean_t eop = 0;
1350    struct em_queue *que = sc->queues; /* single queue chip */
1351
1352    hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1353    sw_tdt = que->tx.sc_tx_desc_head;
1354
1355    while (hw_tdt != sw_tdt) {
1356        tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1357        length += tx_desc->lower.flags.length;
1358        eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1359        if (++hw_tdt == sc->sc_tx_slots)
1360            hw_tdt = 0;
1361
1362        if (eop) {
1363            if (em_82547_fifo_workaround(sc, length)) {
1364                sc->tx_fifo_wrk_cnt++;
1365                timeout_add(&sc->tx_fifo_timer_handle, 1);
1366                break;
1367            }
1368            E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1369            em_82547_update_fifo_head(sc, length);
1370            length = 0;
1371        }
1372    }
1373}
1374
1375void
1376em_82547_move_tail(void *arg)
1377{
1378    struct em_softc *sc = arg;
1379    int s;
1380
1381    s = splnet();
1382    em_82547_move_tail_locked(sc);
1383    splx(s);
1384}
1385
1386int
1387em_82547_fifo_workaround(struct em_softc *sc, int len)
1388{
1389    int fifo_space, fifo_pkt_len;
1390
1391    fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1392
1393    if (sc->link_duplex == HALF_DUPLEX) {
1394        fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1395
1396        if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1397            if (em_82547_tx_fifo_reset(sc))
1398                return (0);
1399            else
1400                return (1);
1401        }
1402    }
1403
1404    return (0);
1405}
1406
1407void
1408em_82547_update_fifo_head(struct em_softc *sc, int len)
1409{
1410    int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1411
1412    /* tx_fifo_head is always 16 byte aligned */
1413    sc->tx_fifo_head += fifo_pkt_len;
1414    if (sc->tx_fifo_head >= sc->tx_fifo_size)
1415        sc->tx_fifo_head -= sc->tx_fifo_size;
1416}
1417
1418int
1419em_82547_tx_fifo_reset(struct em_softc *sc)
1420{
1421    uint32_t tctl;
1422    struct em_queue *que = sc->queues; /* single queue chip */
1423
1424    if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1425         E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1426        (E1000_READ_REG(&sc->hw, TDFT) ==
1427         E1000_READ_REG(&sc->hw, TDFH)) &&
1428        (E1000_READ_REG(&sc->hw, TDFTS) ==
1429         E1000_READ_REG(&sc->hw, TDFHS)) &&
1430        (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1431
1432        /* Disable TX unit */
1433        tctl = E1000_READ_REG(&sc->hw, TCTL);
1434        E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1435
1436        /* Reset FIFO pointers */
1437        E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1438        E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1439        E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1440        E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1441
1442        /* Re-enable TX unit */
1443        E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1444        E1000_WRITE_FLUSH(&sc->hw);
1445
1446        sc->tx_fifo_head = 0;
1447        sc->tx_fifo_reset_cnt++;
1448
1449        return (TRUE);
1450    } else
1451        return (FALSE);
1452}
1453
1454void
1455em_iff(struct em_softc *sc)
1456{
1457    struct ifnet *ifp = &sc->sc_ac.ac_if;
1458    struct arpcom *ac = &sc->sc_ac;
1459    u_int32_t reg_rctl = 0;
1460    u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1461    struct ether_multi *enm;
1462    struct ether_multistep step;
1463    int i = 0;
1464
1465    IOCTL_DEBUGOUT("em_iff: begin");
1466
1467    if (sc->hw.mac_type == em_82542_rev2_0) {
1468        reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1469        if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1470            em_pci_clear_mwi(&sc->hw);
1471        reg_rctl |= E1000_RCTL_RST;
1472        E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1473        msec_delay(5);
1474    }
1475
1476    reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1477    reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1478    ifp->if_flags &= ~IFF_ALLMULTI;
1479
1480    if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1481        ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1482        ifp->if_flags |= IFF_ALLMULTI;
1483        reg_rctl |= E1000_RCTL_MPE;
1484        if (ifp->if_flags & IFF_PROMISC)
1485            reg_rctl |= E1000_RCTL_UPE;
1486    } else {
1487        ETHER_FIRST_MULTI(step, ac, enm);
1488        while (enm != NULL) {
1489            bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1490            i += ETH_LENGTH_OF_ADDRESS;
1491
1492            ETHER_NEXT_MULTI(step, enm);
1493        }
1494
1495        em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0);
1496    }
1497
1498    E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1499
1500    if (sc->hw.mac_type == em_82542_rev2_0) {
1501        reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1502        reg_rctl &= ~E1000_RCTL_RST;
1503        E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1504        msec_delay(5);
1505        if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1506            em_pci_set_mwi(&sc->hw);
1507    }
1508}
1509
1510/*********************************************************************
1511 *  Timer routine
1512 *
1513 *  This routine checks for link status and updates statistics.
1514 *
1515 **********************************************************************/
1516
1517void
1518em_local_timer(void *arg)
1519{
1520    struct em_softc *sc = arg;
1521    int s;
1522
1523    timeout_add_sec(&sc->timer_handle, 1);
1524
1525    s = splnet();
1526    em_smartspeed(sc);
1527    splx(s);
1528
1529#if NKSTAT > 0
1530    if (sc->kstat != NULL && mtx_enter_try(&sc->kstat_mtx)) {
1531        em_kstat_read(sc->kstat);
1532        mtx_leave(&sc->kstat_mtx);
1533    }
1534#endif
1535}
1536
1537void
1538em_update_link_status(struct em_softc *sc)
1539{
1540    struct ifnet *ifp = &sc->sc_ac.ac_if;
1541    u_char link_state;
1542
1543    if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1544        if (sc->link_active == 0) {
1545            em_get_speed_and_duplex(&sc->hw,
1546                        &sc->link_speed,
1547                        &sc->link_duplex);
1548            /* Check if we may set SPEED_MODE bit on PCI-E */
1549            if ((sc->link_speed == SPEED_1000) &&
1550                ((sc->hw.mac_type == em_82571) ||
1551                (sc->hw.mac_type == em_82572) ||
1552                (sc->hw.mac_type == em_82575) ||
1553                (sc->hw.mac_type == em_82576) ||
1554                (sc->hw.mac_type == em_82580))) {
1555                int tarc0;
1556
1557                tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1558                tarc0 |= SPEED_MODE_BIT;
1559                E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1560            }
1561            sc->link_active = 1;
1562            sc->smartspeed = 0;
1563            ifp->if_baudrate = IF_Mbps(sc->link_speed);
1564        }
1565        link_state = (sc->link_duplex == FULL_DUPLEX) ?
1566            LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1567    } else {
1568        if (sc->link_active == 1) {
1569            ifp->if_baudrate = sc->link_speed = 0;
1570            sc->link_duplex = 0;
1571            sc->link_active = 0;
1572        }
1573        link_state = LINK_STATE_DOWN;
1574    }
1575    if (ifp->if_link_state != link_state) {
1576        ifp->if_link_state = link_state;
1577        if_link_state_change(ifp);
1578    }
1579
1580    /* Disable TSO for 10/100 speeds to avoid some hardware issues */
1581    switch (sc->link_speed) {
1582    case SPEED_10:
1583    case SPEED_100:
1584        if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
1585            ifp->if_capabilities &= ~IFCAP_TSOv4;
1586            ifp->if_capabilities &= ~IFCAP_TSOv6;
1587        }
1588        break;
1589    case SPEED_1000:
1590        if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210)
1591            ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
1592        break;
1593    }
1594}
1595
1596/*********************************************************************
1597 *
1598 *  This routine disables all traffic on the adapter by issuing a
1599 *  global reset on the MAC and deallocates TX/RX buffers.
1600 *
1601 **********************************************************************/
1602
1603void
1604em_stop(void *arg, int softonly)
1605{
1606    struct em_softc *sc = arg;
1607    struct em_queue *que = sc->queues; /* Use only first queue. */
1608    struct ifnet   *ifp = &sc->sc_ac.ac_if;
1609
1610    /* Tell the stack that the interface is no longer active */
1611    ifp->if_flags &= ~IFF_RUNNING;
1612
1613    INIT_DEBUGOUT("em_stop: begin");
1614
1615    timeout_del(&que->rx_refill);
1616    timeout_del(&sc->timer_handle);
1617    timeout_del(&sc->tx_fifo_timer_handle);
1618
1619    if (!softonly)
1620        em_disable_intr(sc);
1621    if (sc->hw.mac_type >= em_pch_spt)
1622        em_flush_desc_rings(sc);
1623    if (!softonly)
1624        em_reset_hw(&sc->hw);
1625
1626    intr_barrier(sc->sc_intrhand);
1627    ifq_barrier(&ifp->if_snd);
1628
1629    KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1630
1631    ifq_clr_oactive(&ifp->if_snd);
1632    ifp->if_timer = 0;
1633
1634    em_free_transmit_structures(sc);
1635    em_free_receive_structures(sc);
1636}
1637
1638/*********************************************************************
1639 *
1640 *  Determine hardware revision.
1641 *
1642 **********************************************************************/
1643void
1644em_identify_hardware(struct em_softc *sc)
1645{
1646    u_int32_t reg;
1647    struct pci_attach_args *pa = &sc->osdep.em_pa;
1648
1649    /* Make sure our PCI config space has the necessary stuff set */
1650    sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1651                        PCI_COMMAND_STATUS_REG);
1652
1653    /* Save off the information about this board */
1654    sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1655    sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1656
1657    reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1658    sc->hw.revision_id = PCI_REVISION(reg);
1659
1660    reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1661    sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1662    sc->hw.subsystem_id = PCI_PRODUCT(reg);
1663
1664    /* Identify the MAC */
1665    if (em_set_mac_type(&sc->hw))
1666        printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1667
1668    if (sc->hw.mac_type == em_pchlan)
1669        sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1670
1671    if (sc->hw.mac_type == em_82541 ||
1672        sc->hw.mac_type == em_82541_rev_2 ||
1673        sc->hw.mac_type == em_82547 ||
1674        sc->hw.mac_type == em_82547_rev_2)
1675        sc->hw.phy_init_script = TRUE;
1676}
1677
1678void
1679em_legacy_irq_quirk_spt(struct em_softc *sc)
1680{
1681    uint32_t    reg;
1682
1683    /* Legacy interrupt: SPT needs a quirk. */
1684    if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp &&
1685        sc->hw.mac_type != em_pch_tgp && sc->hw.mac_type != em_pch_adp)
1686        return;
1687    if (sc->legacy_irq == 0)
1688        return;
1689
1690    reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1691    reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1692    EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1693
1694    reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1695    reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1696        E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1697    EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1698}
1699
1700int
1701em_allocate_pci_resources(struct em_softc *sc)
1702{
1703    int     val, rid;
1704    struct pci_attach_args *pa = &sc->osdep.em_pa;
1705    struct em_queue        *que = NULL;
1706
1707    val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1708    if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1709        printf(": mmba is not mem space\n");
1710        return (ENXIO);
1711    }
1712    if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1713        &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1714        &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1715        printf(": cannot find mem space\n");
1716        return (ENXIO);
1717    }
1718
1719    switch (sc->hw.mac_type) {
1720    case em_82544:
1721    case em_82540:
1722    case em_82545:
1723    case em_82546:
1724    case em_82541:
1725    case em_82541_rev_2:
1726        /* Figure out where our I/O BAR is ? */
1727        for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1728            val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1729            if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1730                sc->io_rid = rid;
1731                break;
1732            }
1733            rid += 4;
1734            if (PCI_MAPREG_MEM_TYPE(val) ==
1735                PCI_MAPREG_MEM_TYPE_64BIT)
1736                rid += 4;   /* skip high bits, too */
1737        }
1738
1739        if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1740            &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1741            &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1742            printf(": cannot find i/o space\n");
1743            return (ENXIO);
1744        }
1745
1746        sc->hw.io_base = 0;
1747        break;
1748    default:
1749        break;
1750    }
1751
1752    sc->osdep.em_flashoffset = 0;
1753    /* for ICH8 and family we need to find the flash memory */
1754    if (sc->hw.mac_type >= em_pch_spt) {
1755        sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1756        sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1757        sc->osdep.em_flashbase = 0;
1758        sc->osdep.em_flashsize = 0;
1759        sc->osdep.em_flashoffset = 0xe000;
1760    } else if (IS_ICH8(sc->hw.mac_type)) {
1761        val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1762        if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1763            printf(": flash is not mem space\n");
1764            return (ENXIO);
1765        }
1766
1767        if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1768            &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1769            &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1770            printf(": cannot find mem space\n");
1771            return (ENXIO);
1772        }
1773        }
1774
1775    sc->osdep.dev = (struct device *)sc;
1776    sc->hw.back = &sc->osdep;
1777
1778    /* Only one queue for the moment. */
1779    que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1780    if (que == NULL) {
1781        printf(": unable to allocate queue memory\n");
1782        return (ENOMEM);
1783    }
1784    que->me = 0;
1785    que->sc = sc;
1786    timeout_set(&que->rx_refill, em_rxrefill, que);
1787
1788    sc->queues = que;
1789    sc->num_queues = 1;
1790    sc->msix = 0;
1791    sc->legacy_irq = 0;
1792    if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1793        return (ENXIO);
1794
1795    /*
1796     * the ICP_xxxx device has multiple, duplicate register sets for
1797     * use when it is being used as a network processor. Disable those
1798     * registers here, as they are not necessary in this context and
1799     * can confuse the system
1800     */
1801    if(sc->hw.mac_type == em_icp_xxxx) {
1802        int offset;
1803        pcireg_t val;
1804
1805        if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1806            sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1807            return (0);
1808        }
1809        offset += PCI_ST_SMIA_OFFSET;
1810        pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1811            offset, 0x06);
1812        E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1813        E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1814    }
1815    return (0);
1816}
1817
1818void
1819em_free_pci_resources(struct em_softc *sc)
1820{
1821    struct pci_attach_args *pa = &sc->osdep.em_pa;
1822    pci_chipset_tag_t   pc = pa->pa_pc;
1823    struct em_queue        *que = NULL;
1824    if (sc->sc_intrhand)
1825        pci_intr_disestablish(pc, sc->sc_intrhand);
1826    sc->sc_intrhand = 0;
1827
1828    if (sc->osdep.em_flashbase)
1829        bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1830                sc->osdep.em_flashsize);
1831    sc->osdep.em_flashbase = 0;
1832
1833    if (sc->osdep.em_iobase)
1834        bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1835                sc->osdep.em_iosize);
1836    sc->osdep.em_iobase = 0;
1837
1838    if (sc->osdep.em_membase)
1839        bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1840                sc->osdep.em_memsize);
1841    sc->osdep.em_membase = 0;
1842
1843    FOREACH_QUEUE(sc, que) {
1844        if (que->rx.sc_rx_desc_ring != NULL) {
1845            que->rx.sc_rx_desc_ring = NULL;
1846            em_dma_free(sc, &que->rx.sc_rx_dma);
1847        }
1848        if (que->tx.sc_tx_desc_ring != NULL) {
1849            que->tx.sc_tx_desc_ring = NULL;
1850            em_dma_free(sc, &que->tx.sc_tx_dma);
1851        }
1852        if (que->tag)
1853            pci_intr_disestablish(pc, que->tag);
1854        que->tag = NULL;
1855        que->eims = 0;
1856        que->me = 0;
1857        que->sc = NULL;
1858    }
1859    sc->legacy_irq = 0;
1860    sc->msix_linkvec = 0;
1861    sc->msix_queuesmask = 0;
1862    if (sc->queues)
1863        free(sc->queues, M_DEVBUF,
1864            sc->num_queues * sizeof(struct em_queue));
1865    sc->num_queues = 0;
1866    sc->queues = NULL;
1867}
1868
1869/*********************************************************************
1870 *
1871 *  Initialize the hardware to a configuration as specified by the
1872 *  em_softc structure. The controller is reset, the EEPROM is
1873 *  verified, the MAC address is set, then the shared initialization
1874 *  routines are called.
1875 *
1876 **********************************************************************/
1877int
1878em_hardware_init(struct em_softc *sc)
1879{
1880    uint32_t ret_val;
1881    u_int16_t rx_buffer_size;
1882
1883    INIT_DEBUGOUT("em_hardware_init: begin");
1884    if (sc->hw.mac_type >= em_pch_spt)
1885        em_flush_desc_rings(sc);
1886    /* Issue a global reset */
1887    em_reset_hw(&sc->hw);
1888
1889    /* When hardware is reset, fifo_head is also reset */
1890    sc->tx_fifo_head = 0;
1891
1892    /* Make sure we have a good EEPROM before we read from it */
1893    if (em_get_flash_presence_i210(&sc->hw) &&
1894        em_validate_eeprom_checksum(&sc->hw) < 0) {
1895        /*
1896         * Some PCIe parts fail the first check due to
1897         * the link being in sleep state, call it again,
1898         * if it fails a second time its a real issue.
1899         */
1900        if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1901            printf("%s: The EEPROM Checksum Is Not Valid\n",
1902                   DEVNAME(sc));
1903            return (EIO);
1904        }
1905    }
1906
1907    if (em_get_flash_presence_i210(&sc->hw) &&
1908        em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1909        printf("%s: EEPROM read error while reading part number\n",
1910               DEVNAME(sc));
1911        return (EIO);
1912    }
1913
1914    /* Set up smart power down as default off on newer adapters */
1915    if (!em_smart_pwr_down &&
1916         (sc->hw.mac_type == em_82571 ||
1917          sc->hw.mac_type == em_82572 ||
1918          sc->hw.mac_type == em_82575 ||
1919          sc->hw.mac_type == em_82576 ||
1920          sc->hw.mac_type == em_82580 ||
1921          sc->hw.mac_type == em_i210 ||
1922          sc->hw.mac_type == em_i350 )) {
1923        uint16_t phy_tmp = 0;
1924
1925        /* Speed up time to link by disabling smart power down */
1926        em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1927        phy_tmp &= ~IGP02E1000_PM_SPD;
1928        em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1929    }
1930
1931    em_legacy_irq_quirk_spt(sc);
1932
1933    /*
1934     * These parameters control the automatic generation (Tx) and
1935     * response (Rx) to Ethernet PAUSE frames.
1936     * - High water mark should allow for at least two frames to be
1937     *   received after sending an XOFF.
1938     * - Low water mark works best when it is very near the high water mark.
1939     *   This allows the receiver to restart by sending XON when it has
1940     *   drained a bit.  Here we use an arbitrary value of 1500 which will
1941     *   restart after one full frame is pulled from the buffer.  There
1942     *   could be several smaller frames in the buffer and if so they will
1943     *   not trigger the XON until their total number reduces the buffer
1944     *   by 1500.
1945     * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1946     */
1947    rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1948
1949    sc->hw.fc_high_water = rx_buffer_size -
1950        EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1951    sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1952    if (sc->hw.mac_type == em_80003es2lan)
1953        sc->hw.fc_pause_time = 0xFFFF;
1954    else
1955        sc->hw.fc_pause_time = 1000;
1956    sc->hw.fc_send_xon = TRUE;
1957    sc->hw.fc = E1000_FC_FULL;
1958
1959    em_disable_aspm(sc);
1960
1961    if ((ret_val = em_init_hw(sc)) != 0) {
1962        if (ret_val == E1000_DEFER_INIT) {
1963            INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1964            return (EAGAIN);
1965        }
1966        printf("\n%s: Hardware Initialization Failed: %d\n",
1967               DEVNAME(sc), ret_val);
1968        return (EIO);
1969    }
1970
1971    em_check_for_link(&sc->hw);
1972
1973    return (0);
1974}
1975
1976/*********************************************************************
1977 *
1978 *  Setup networking device structure and register an interface.
1979 *
1980 **********************************************************************/
1981void
1982em_setup_interface(struct em_softc *sc)
1983{
1984    struct ifnet   *ifp;
1985    uint64_t fiber_type = IFM_1000_SX;
1986
1987    INIT_DEBUGOUT("em_setup_interface: begin");
1988
1989    ifp = &sc->sc_ac.ac_if;
1990    strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1991    ifp->if_softc = sc;
1992    ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1993    ifp->if_xflags = IFXF_MPSAFE;
1994    ifp->if_ioctl = em_ioctl;
1995    ifp->if_qstart = em_start;
1996    ifp->if_watchdog = em_watchdog;
1997    ifp->if_hardmtu =
1998        sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1999    ifq_init_maxlen(&ifp->if_snd, sc->sc_tx_slots - 1);
2000
2001    ifp->if_capabilities = IFCAP_VLAN_MTU;
2002
2003#if NVLAN > 0
2004    ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
2005#endif
2006
2007    if (sc->hw.mac_type >= em_82543) {
2008        ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
2009    }
2010    if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
2011        ifp->if_capabilities |= IFCAP_CSUM_IPv4;
2012        ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
2013        ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
2014    }
2015
2016    /*
2017     * Specify the media types supported by this adapter and register
2018     * callbacks to update media and link information
2019     */
2020    ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
2021             em_media_status);
2022    if (sc->hw.media_type == em_media_type_fiber ||
2023        sc->hw.media_type == em_media_type_internal_serdes) {
2024        if (sc->hw.mac_type == em_82545)
2025            fiber_type = IFM_1000_LX;
2026        ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
2027                0, NULL);
2028        ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
2029                0, NULL);
2030    } else {
2031        ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
2032        ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2033                0, NULL);
2034        ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
2035                0, NULL);
2036        ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2037                0, NULL);
2038        if (sc->hw.phy_type != em_phy_ife) {
2039            ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
2040                    0, NULL);
2041            ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2042        }
2043    }
2044    ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2045    ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
2046
2047    if_attach(ifp);
2048    ether_ifattach(ifp);
2049    em_enable_intr(sc);
2050}
2051
2052int
2053em_detach(struct device *self, int flags)
2054{
2055    struct em_softc *sc = (struct em_softc *)self;
2056    struct ifnet *ifp = &sc->sc_ac.ac_if;
2057    struct pci_attach_args *pa = &sc->osdep.em_pa;
2058    pci_chipset_tag_t   pc = pa->pa_pc;
2059
2060    if (sc->sc_intrhand)
2061        pci_intr_disestablish(pc, sc->sc_intrhand);
2062    sc->sc_intrhand = 0;
2063
2064    em_stop(sc, 1);
2065
2066    em_free_pci_resources(sc);
2067
2068    ether_ifdetach(ifp);
2069    if_detach(ifp);
2070
2071    return (0);
2072}
2073
2074int
2075em_activate(struct device *self, int act)
2076{
2077    struct em_softc *sc = (struct em_softc *)self;
2078    struct ifnet *ifp = &sc->sc_ac.ac_if;
2079
2080    switch (act) {
2081    case DVACT_SUSPEND:
2082        if (ifp->if_flags & IFF_RUNNING)
2083            em_stop(sc, 0);
2084        break;
2085    case DVACT_RESUME:
2086        if (ifp->if_flags & IFF_UP)
2087            em_init(sc);
2088        break;
2089    }
2090    return (0);
2091}
2092
2093/*********************************************************************
2094 *
2095 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2096 *
2097 **********************************************************************/
2098void
2099em_smartspeed(struct em_softc *sc)
2100{
2101    uint16_t phy_tmp;
2102
2103    if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2104        !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2105        return;
2106
2107    if (sc->smartspeed == 0) {
2108        /* If Master/Slave config fault is asserted twice,
2109         * we assume back-to-back */
2110        em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2111        if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2112            return;
2113        em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2114        if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2115            em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2116                    &phy_tmp);
2117            if (phy_tmp & CR_1000T_MS_ENABLE) {
2118                phy_tmp &= ~CR_1000T_MS_ENABLE;
2119                em_write_phy_reg(&sc->hw,
2120                            PHY_1000T_CTRL, phy_tmp);
2121                sc->smartspeed++;
2122                if (sc->hw.autoneg &&
2123                    !em_phy_setup_autoneg(&sc->hw) &&
2124                    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2125                               &phy_tmp)) {
2126                    phy_tmp |= (MII_CR_AUTO_NEG_EN |
2127                            MII_CR_RESTART_AUTO_NEG);
2128                    em_write_phy_reg(&sc->hw,
2129                             PHY_CTRL, phy_tmp);
2130                }
2131            }
2132        }
2133        return;
2134    } else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2135        /* If still no link, perhaps using 2/3 pair cable */
2136        em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2137        phy_tmp |= CR_1000T_MS_ENABLE;
2138        em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2139        if (sc->hw.autoneg &&
2140            !em_phy_setup_autoneg(&sc->hw) &&
2141            !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2142            phy_tmp |= (MII_CR_AUTO_NEG_EN |
2143                    MII_CR_RESTART_AUTO_NEG);
2144            em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2145        }
2146    }
2147    /* Restart process after EM_SMARTSPEED_MAX iterations */
2148    if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2149        sc->smartspeed = 0;
2150}
2151
2152/*
2153 * Manage DMA'able memory.
2154 */
2155int
2156em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2157{
2158    int r;
2159
2160    r = bus_dmamap_create(sc->sc_dmat, size, 1,
2161        size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2162    if (r != 0)
2163        return (r);
2164
2165    r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2166        1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2167    if (r != 0)
2168        goto destroy;
2169
2170    r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2171        &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2172    if (r != 0)
2173        goto free;
2174
2175    r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2176        NULL, BUS_DMA_WAITOK);
2177    if (r != 0)
2178        goto unmap;
2179
2180    dma->dma_size = size;
2181    return (0);
2182
2183unmap:
2184    bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2185free:
2186    bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2187destroy:
2188    bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2189
2190    return (r);
2191}
2192
2193void
2194em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2195{
2196    bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2197    bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2198    bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2199    bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2200}
2201
2202/*********************************************************************
2203 *
2204 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2205 *  the information needed to transmit a packet on the wire.
2206 *
2207 **********************************************************************/
2208int
2209em_allocate_transmit_structures(struct em_softc *sc)
2210{
2211    struct em_queue *que;
2212
2213    FOREACH_QUEUE(sc, que) {
2214        bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2215            0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2216            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2217
2218        que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2219            sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2220        if (que->tx.sc_tx_pkts_ring == NULL) {
2221            printf("%s: Unable to allocate tx_buffer memory\n",
2222                DEVNAME(sc));
2223            return (ENOMEM);
2224        }
2225    }
2226
2227    return (0);
2228}
2229
2230/*********************************************************************
2231 *
2232 *  Allocate and initialize transmit structures.
2233 *
2234 **********************************************************************/
2235int
2236em_setup_transmit_structures(struct em_softc *sc)
2237{
2238    struct em_queue *que;
2239    struct em_packet *pkt;
2240    int error, i;
2241
2242    if ((error = em_allocate_transmit_structures(sc)) != 0)
2243        goto fail;
2244
2245    FOREACH_QUEUE(sc, que) {
2246        bzero((void *) que->tx.sc_tx_desc_ring,
2247            (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2248
2249        for (i = 0; i < sc->sc_tx_slots; i++) {
2250            pkt = &que->tx.sc_tx_pkts_ring[i];
2251            error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE,
2252                EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2253                EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2254            if (error != 0) {
2255                printf("%s: Unable to create TX DMA map\n",
2256                    DEVNAME(sc));
2257                goto fail;
2258            }
2259        }
2260
2261        que->tx.sc_tx_desc_head = 0;
2262        que->tx.sc_tx_desc_tail = 0;
2263
2264        /* Set checksum context */
2265        que->tx.active_checksum_context = OFFLOAD_NONE;
2266    }
2267
2268    return (0);
2269
2270fail:
2271    em_free_transmit_structures(sc);
2272    return (error);
2273}
2274
2275/*********************************************************************
2276 *
2277 *  Enable transmit unit.
2278 *
2279 **********************************************************************/
2280void
2281em_initialize_transmit_unit(struct em_softc *sc)
2282{
2283    u_int32_t   reg_tctl, reg_tipg = 0;
2284    u_int64_t   bus_addr;
2285    struct em_queue *que;
2286
2287    INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2288
2289    FOREACH_QUEUE(sc, que) {
2290        /* Setup the Base and Length of the Tx Descriptor Ring */
2291        bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2292        E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2293            sc->sc_tx_slots *
2294            sizeof(struct em_tx_desc));
2295        E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2296        E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2297
2298        /* Setup the HW Tx Head and Tail descriptor pointers */
2299        E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2300        E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2301
2302        HW_DEBUGOUT2("Base = %x, Length = %x\n",
2303            E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2304            E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2305
2306        /* Set the default values for the Tx Inter Packet Gap timer */
2307        switch (sc->hw.mac_type) {
2308        case em_82542_rev2_0:
2309        case em_82542_rev2_1:
2310            reg_tipg = DEFAULT_82542_TIPG_IPGT;
2311            reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2312            reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2313            break;
2314        case em_80003es2lan:
2315            reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2316            reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2317            break;
2318        default:
2319            if (sc->hw.media_type == em_media_type_fiber ||
2320                sc->hw.media_type == em_media_type_internal_serdes)
2321                reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2322            else
2323                reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2324            reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2325            reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2326        }
2327
2328
2329        E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2330        E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2331        if (sc->hw.mac_type >= em_82540)
2332            E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2333
2334        /* Setup Transmit Descriptor Base Settings */
2335        que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2336
2337        if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2338            sc->hw.mac_type == em_82576 ||
2339            sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2340            /* 82575/6 need to enable the TX queue and lack the IDE bit */
2341            reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2342            reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2343            E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2344        } else if (sc->tx_int_delay > 0)
2345            que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2346    }
2347
2348    /* Program the Transmit Control Register */
2349    reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2350           (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2351    if (sc->hw.mac_type >= em_82571)
2352        reg_tctl |= E1000_TCTL_MULR;
2353    if (sc->link_duplex == FULL_DUPLEX)
2354        reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2355    else
2356        reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2357    /* This write will effectively turn on the transmit unit */
2358    E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2359
2360    /* SPT Si errata workaround to avoid data corruption */
2361
2362    if (sc->hw.mac_type == em_pch_spt) {
2363        uint32_t    reg_val;
2364
2365        reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2366        reg_val |= E1000_RCTL_RDMTS_HEX;
2367        EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2368
2369        reg_val = E1000_READ_REG(&sc->hw, TARC0);
2370        /* i218-i219 Specification Update 1.5.4.5 */
2371        reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2372        reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2373        E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2374    }
2375}
2376
2377/*********************************************************************
2378 *
2379 *  Free all transmit related data structures.
2380 *
2381 **********************************************************************/
2382void
2383em_free_transmit_structures(struct em_softc *sc)
2384{
2385    struct em_queue *que;
2386    struct em_packet *pkt;
2387    int i;
2388
2389    INIT_DEBUGOUT("free_transmit_structures: begin");
2390
2391    FOREACH_QUEUE(sc, que) {
2392        if (que->tx.sc_tx_pkts_ring != NULL) {
2393            for (i = 0; i < sc->sc_tx_slots; i++) {
2394                pkt = &que->tx.sc_tx_pkts_ring[i];
2395
2396                if (pkt->pkt_m != NULL) {
2397                    bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2398                        0, pkt->pkt_map->dm_mapsize,
2399                        BUS_DMASYNC_POSTWRITE);
2400                    bus_dmamap_unload(sc->sc_dmat,
2401                        pkt->pkt_map);
2402
2403                    m_freem(pkt->pkt_m);
2404                    pkt->pkt_m = NULL;
2405                }
2406
2407                if (pkt->pkt_map != NULL) {
2408                    bus_dmamap_destroy(sc->sc_dmat,
2409                        pkt->pkt_map);
2410                    pkt->pkt_map = NULL;
2411                }
2412            }
2413
2414            free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2415                sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2416            que->tx.sc_tx_pkts_ring = NULL;
2417        }
2418
2419        bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2420            0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2421            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2422    }
2423}
2424
2425u_int
2426em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2427    u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
2428{
2429    struct ether_extracted ext;
2430    struct e1000_adv_tx_context_desc *TD;
2431    uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
2432
2433    *olinfo_status = 0;
2434    *cmd_type_len = 0;
2435    TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
2436
2437#if NVLAN > 0
2438    if (ISSET(mp->m_flags, M_VLANTAG)) {
2439        uint32_t vtag = mp->m_pkthdr.ether_vtag;
2440        vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
2441        *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
2442    }
2443#endif
2444
2445    ether_extract_headers(mp, &ext);
2446    if (ext.tcp == NULL || mp->m_pkthdr.ph_mss == 0)
2447        goto out;
2448
2449    vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
2450
2451    if (ext.ip4) {
2452        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2453        *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2454#ifdef INET6
2455    } else if (ext.ip6) {
2456        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2457#endif
2458    } else {
2459        goto out;
2460    }
2461
2462    *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
2463    *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
2464    *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2465    vlan_macip_lens |= ext.iphlen;
2466    type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2467
2468    type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2469    *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2470
2471    mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
2472    mss_l4len_idx |= ext.tcphlen << E1000_ADVTXD_L4LEN_SHIFT;
2473    /* 82575 needs the queue index added */
2474    if (que->sc->hw.mac_type == em_82575)
2475        mss_l4len_idx |= (que->me & 0xff) << 4;
2476
2477    htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
2478    htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
2479    htolem32(&TD->u.seqnum_seed, 0);
2480    htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
2481
2482    tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) /
2483        mp->m_pkthdr.ph_mss);
2484
2485    return 1;
2486
2487out:
2488    tcpstat_inc(tcps_outbadtso);
2489    return 0;
2490}
2491
2492u_int
2493em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2494    u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
2495{
2496    struct ether_extracted ext;
2497    struct e1000_adv_tx_context_desc *TD;
2498    uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
2499    int off = 0;
2500
2501    *olinfo_status = 0;
2502    *cmd_type_len = 0;
2503    TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
2504
2505#if NVLAN > 0
2506    if (ISSET(mp->m_flags, M_VLANTAG)) {
2507        uint32_t vtag = mp->m_pkthdr.ether_vtag;
2508        vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
2509        *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
2510        off = 1;
2511    }
2512#endif
2513
2514    ether_extract_headers(mp, &ext);
2515
2516    vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
2517
2518    if (ext.ip4) {
2519        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2520        if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
2521            *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2522            off = 1;
2523        }
2524#ifdef INET6
2525    } else if (ext.ip6) {
2526        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2527#endif
2528    }
2529
2530    *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
2531    *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT;
2532    *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
2533    vlan_macip_lens |= ext.iphlen;
2534    type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2535
2536    if (ext.tcp) {
2537        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2538        if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
2539            *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2540            off = 1;
2541        }
2542    } else if (ext.udp) {
2543        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
2544        if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
2545            *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2546            off = 1;
2547        }
2548    }
2549
2550    if (!off)
2551        return (0);
2552
2553    /* 82575 needs the queue index added */
2554    if (que->sc->hw.mac_type == em_82575)
2555        mss_l4len_idx |= (que->me & 0xff) << 4;
2556
2557    htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
2558    htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
2559    htolem32(&TD->u.seqnum_seed, 0);
2560    htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
2561
2562    return (1);
2563}
2564
2565/*********************************************************************
2566 *
2567 *  The offload context needs to be set when we transfer the first
2568 *  packet of a particular protocol (TCP/UDP). We change the
2569 *  context only if the protocol type changes.
2570 *
2571 **********************************************************************/
2572u_int
2573em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2574    u_int32_t *txd_upper, u_int32_t *txd_lower)
2575{
2576    struct em_context_desc *TXD;
2577
2578    if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2579        *txd_upper = E1000_TXD_POPTS_TXSM << 8;
2580        *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2581        if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2582            return (0);
2583        else
2584            que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2585    } else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2586        *txd_upper = E1000_TXD_POPTS_TXSM << 8;
2587        *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2588        if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2589            return (0);
2590        else
2591            que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2592    } else {
2593        *txd_upper = 0;
2594        *txd_lower = 0;
2595        return (0);
2596    }
2597
2598    /* If we reach this point, the checksum offload context
2599     * needs to be reset.
2600     */
2601    TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2602
2603    TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2604    TXD->lower_setup.ip_fields.ipcso =
2605        ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2606    TXD->lower_setup.ip_fields.ipcse =
2607        htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2608
2609    TXD->upper_setup.tcp_fields.tucss =
2610        ETHER_HDR_LEN + sizeof(struct ip);
2611    TXD->upper_setup.tcp_fields.tucse = htole16(0);
2612
2613    if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2614        TXD->upper_setup.tcp_fields.tucso =
2615            ETHER_HDR_LEN + sizeof(struct ip) +
2616            offsetof(struct tcphdr, th_sum);
2617    } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2618        TXD->upper_setup.tcp_fields.tucso =
2619            ETHER_HDR_LEN + sizeof(struct ip) +
2620            offsetof(struct udphdr, uh_sum);
2621    }
2622
2623    TXD->tcp_seg_setup.data = htole32(0);
2624    TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2625
2626    return (1);
2627}
2628
2629/**********************************************************************
2630 *
2631 *  Examine each tx_buffer in the used queue. If the hardware is done
2632 *  processing the packet then free associated resources. The
2633 *  tx_buffer is put back on the free queue.
2634 *
2635 **********************************************************************/
2636void
2637em_txeof(struct em_queue *que)
2638{
2639    struct em_softc *sc = que->sc;
2640    struct ifnet *ifp = &sc->sc_ac.ac_if;
2641    struct em_packet *pkt;
2642    struct em_tx_desc *desc;
2643    u_int head, tail;
2644    u_int free = 0;
2645
2646    head = que->tx.sc_tx_desc_head;
2647    tail = que->tx.sc_tx_desc_tail;
2648
2649    if (head == tail)
2650        return;
2651
2652    bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2653        0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2654        BUS_DMASYNC_POSTREAD);
2655
2656    do {
2657        pkt = &que->tx.sc_tx_pkts_ring[tail];
2658        desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2659
2660        if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2661            break;
2662
2663        bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2664            0, pkt->pkt_map->dm_mapsize,
2665            BUS_DMASYNC_POSTWRITE);
2666        bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2667
2668        KASSERT(pkt->pkt_m != NULL);
2669
2670        m_freem(pkt->pkt_m);
2671        pkt->pkt_m = NULL;
2672
2673        tail = pkt->pkt_eop;
2674
2675        if (++tail == sc->sc_tx_slots)
2676            tail = 0;
2677
2678        free++;
2679    } while (tail != head);
2680
2681    bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2682        0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2683        BUS_DMASYNC_PREREAD);
2684
2685    if (free == 0)
2686        return;
2687
2688    que->tx.sc_tx_desc_tail = tail;
2689
2690    if (ifq_is_oactive(&ifp->if_snd))
2691        ifq_restart(&ifp->if_snd);
2692    else if (tail == head)
2693        ifp->if_timer = 0;
2694}
2695
2696/*********************************************************************
2697 *
2698 *  Get a buffer from system mbuf buffer pool.
2699 *
2700 **********************************************************************/
2701int
2702em_get_buf(struct em_queue *que, int i)
2703{
2704    struct em_softc *sc = que->sc;
2705    struct mbuf    *m;
2706    struct em_packet *pkt;
2707    struct em_rx_desc *desc;
2708    int error;
2709
2710    pkt = &que->rx.sc_rx_pkts_ring[i];
2711    desc = &que->rx.sc_rx_desc_ring[i];
2712
2713    KASSERT(pkt->pkt_m == NULL);
2714
2715    m = MCLGETL(NULL, M_DONTWAIT, EM_MCLBYTES);
2716    if (m == NULL) {
2717        sc->mbuf_cluster_failed++;
2718        return (ENOBUFS);
2719    }
2720    m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2721    m_adj(m, ETHER_ALIGN);
2722
2723    error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2724        m, BUS_DMA_NOWAIT);
2725    if (error) {
2726        m_freem(m);
2727        return (error);
2728    }
2729
2730    bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2731        0, pkt->pkt_map->dm_mapsize,
2732        BUS_DMASYNC_PREREAD);
2733    pkt->pkt_m = m;
2734
2735    memset(desc, 0, sizeof(*desc));
2736    htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2737
2738    return (0);
2739}
2740
2741/*********************************************************************
2742 *
2743 *  Allocate memory for rx_buffer structures. Since we use one
2744 *  rx_buffer per received packet, the maximum number of rx_buffer's
2745 *  that we'll need is equal to the number of receive descriptors
2746 *  that we've allocated.
2747 *
2748 **********************************************************************/
2749int
2750em_allocate_receive_structures(struct em_softc *sc)
2751{
2752    struct em_queue *que;
2753    struct em_packet *pkt;
2754    int i;
2755    int error;
2756
2757    FOREACH_QUEUE(sc, que) {
2758        que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2759            sizeof(*que->rx.sc_rx_pkts_ring),
2760            M_DEVBUF, M_NOWAIT | M_ZERO);
2761        if (que->rx.sc_rx_pkts_ring == NULL) {
2762            printf("%s: Unable to allocate rx_buffer memory\n",
2763                DEVNAME(sc));
2764            return (ENOMEM);
2765        }
2766
2767        bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2768            0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2769            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2770
2771        for (i = 0; i < sc->sc_rx_slots; i++) {
2772            pkt = &que->rx.sc_rx_pkts_ring[i];
2773
2774            error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2775                EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2776            if (error != 0) {
2777                printf("%s: em_allocate_receive_structures: "
2778                    "bus_dmamap_create failed; error %u\n",
2779                    DEVNAME(sc), error);
2780                goto fail;
2781            }
2782
2783            pkt->pkt_m = NULL;
2784        }
2785    }
2786
2787        return (0);
2788
2789fail:
2790    em_free_receive_structures(sc);
2791    return (error);
2792}
2793
2794/*********************************************************************
2795 *
2796 *  Allocate and initialize receive structures.
2797 *
2798 **********************************************************************/
2799int
2800em_setup_receive_structures(struct em_softc *sc)
2801{
2802    struct ifnet *ifp = &sc->sc_ac.ac_if;
2803    struct em_queue *que;
2804    u_int lwm;
2805
2806    if (em_allocate_receive_structures(sc))
2807        return (ENOMEM);
2808
2809    FOREACH_QUEUE(sc, que) {
2810        memset(que->rx.sc_rx_desc_ring, 0,
2811            sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2812
2813        /* Setup our descriptor pointers */
2814        que->rx.sc_rx_desc_tail = 0;
2815        que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2816
2817        lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2818        if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2819
2820        if (em_rxfill(que) == 0) {
2821            printf("%s: unable to fill any rx descriptors\n",
2822                DEVNAME(sc));
2823            return (ENOMEM);
2824        }
2825    }
2826
2827    return (0);
2828}
2829
2830/*********************************************************************
2831 *
2832 *  Enable receive unit.
2833 *
2834 **********************************************************************/
2835void
2836em_initialize_receive_unit(struct em_softc *sc)
2837{
2838    struct em_queue *que;
2839    u_int32_t   reg_rctl;
2840    u_int32_t   reg_rxcsum;
2841    u_int32_t   reg_srrctl;
2842    u_int64_t   bus_addr;
2843
2844    INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2845
2846    /* Make sure receives are disabled while setting up the descriptor ring */
2847    E1000_WRITE_REG(&sc->hw, RCTL, 0);
2848
2849    /* Set the Receive Delay Timer Register */
2850    E1000_WRITE_REG(&sc->hw, RDTR,
2851            sc->rx_int_delay | E1000_RDT_FPDB);
2852
2853    if (sc->hw.mac_type >= em_82540) {
2854        if (sc->rx_int_delay)
2855            E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2856
2857        /* Set the interrupt throttling rate.  Value is calculated
2858         * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2859        E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2860    }
2861
2862    /* Setup the Receive Control Register */
2863    reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2864        E1000_RCTL_RDMTS_HALF |
2865        (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2866
2867    if (sc->hw.tbi_compatibility_on == TRUE)
2868        reg_rctl |= E1000_RCTL_SBP;
2869
2870    /*
2871     * The i350 has a bug where it always strips the CRC whether
2872     * asked to or not.  So ask for stripped CRC here and
2873     * cope in rxeof
2874     */
2875    if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2876        reg_rctl |= E1000_RCTL_SECRC;
2877
2878    switch (sc->sc_rx_buffer_len) {
2879    default:
2880    case EM_RXBUFFER_2048:
2881        reg_rctl |= E1000_RCTL_SZ_2048;
2882        break;
2883    case EM_RXBUFFER_4096:
2884        reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2885        break;
2886    case EM_RXBUFFER_8192:
2887        reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2888        break;
2889    case EM_RXBUFFER_16384:
2890        reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2891        break;
2892    }
2893
2894    if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2895        reg_rctl |= E1000_RCTL_LPE;
2896
2897    /* Enable 82543 Receive Checksum Offload for TCP and UDP */
2898    if (sc->hw.mac_type >= em_82543) {
2899        reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2900        reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2901        E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2902    }
2903
2904    /*
2905     * XXX TEMPORARY WORKAROUND: on some systems with 82573
2906     * long latencies are observed, like Lenovo X60.
2907     */
2908    if (sc->hw.mac_type == em_82573)
2909        E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2910
2911    FOREACH_QUEUE(sc, que) {
2912        if (sc->num_queues > 1) {
2913            /*
2914             * Disable Drop Enable for every queue, default has
2915             * it enabled for queues > 0
2916             */
2917            reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2918            reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2919            E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2920        }
2921
2922        /* Setup the Base and Length of the Rx Descriptor Ring */
2923        bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2924        E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2925            sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2926        E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2927        E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2928
2929        if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2930            sc->hw.mac_type == em_82576 ||
2931            sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2932            /* 82575/6 need to enable the RX queue */
2933            uint32_t reg;
2934            reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2935            reg |= E1000_RXDCTL_QUEUE_ENABLE;
2936            E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2937        }
2938    }
2939
2940    /* Enable Receives */
2941    E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2942
2943    /* Setup the HW Rx Head and Tail Descriptor Pointers */
2944    FOREACH_QUEUE(sc, que) {
2945        E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2946        E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2947    }
2948}
2949
2950/*********************************************************************
2951 *
2952 *  Free receive related data structures.
2953 *
2954 **********************************************************************/
2955void
2956em_free_receive_structures(struct em_softc *sc)
2957{
2958    struct em_queue *que;
2959    struct em_packet *pkt;
2960    int i;
2961
2962    INIT_DEBUGOUT("free_receive_structures: begin");
2963
2964    FOREACH_QUEUE(sc, que) {
2965        if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2966
2967        bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2968            0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2969            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2970
2971        if (que->rx.sc_rx_pkts_ring != NULL) {
2972            for (i = 0; i < sc->sc_rx_slots; i++) {
2973                pkt = &que->rx.sc_rx_pkts_ring[i];
2974                if (pkt->pkt_m != NULL) {
2975                    bus_dmamap_sync(sc->sc_dmat,
2976                        pkt->pkt_map,
2977                        0, pkt->pkt_map->dm_mapsize,
2978                        BUS_DMASYNC_POSTREAD);
2979                    bus_dmamap_unload(sc->sc_dmat,
2980                        pkt->pkt_map);
2981                    m_freem(pkt->pkt_m);
2982                    pkt->pkt_m = NULL;
2983                }
2984                bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2985            }
2986
2987            free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2988                sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2989            que->rx.sc_rx_pkts_ring = NULL;
2990        }
2991
2992        if (que->rx.fmp != NULL) {
2993            m_freem(que->rx.fmp);
2994            que->rx.fmp = NULL;
2995            que->rx.lmp = NULL;
2996        }
2997    }
2998}
2999
3000int
3001em_rxfill(struct em_queue *que)
3002{
3003    struct em_softc *sc = que->sc;
3004    u_int slots;
3005    int post = 0;
3006    int i;
3007
3008    i = que->rx.sc_rx_desc_head;
3009
3010    bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3011        0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3012        BUS_DMASYNC_POSTWRITE);
3013
3014    for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
3015        slots > 0; slots--) {
3016        if (++i == sc->sc_rx_slots)
3017            i = 0;
3018
3019        if (em_get_buf(que, i) != 0)
3020            break;
3021
3022        que->rx.sc_rx_desc_head = i;
3023        post = 1;
3024    }
3025
3026    if_rxr_put(&que->rx.sc_rx_ring, slots);
3027
3028    bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3029        0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3030        BUS_DMASYNC_PREWRITE);
3031
3032    return (post);
3033}
3034
3035void
3036em_rxrefill(void *arg)
3037{
3038    struct em_queue *que = arg;
3039    int s;
3040
3041    s = splnet();
3042    em_rxrefill_locked(que);
3043    splx(s);
3044}
3045
3046void
3047em_rxrefill_locked(struct em_queue *que)
3048{
3049    struct em_softc *sc = que->sc;
3050
3051    if (em_rxfill(que))
3052        E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
3053    else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
3054        timeout_add(&que->rx_refill, 1);
3055}
3056
3057/*********************************************************************
3058 *
3059 *  This routine executes in interrupt context. It replenishes
3060 *  the mbufs in the descriptor and sends data which has been
3061 *  dma'ed into host memory to upper layer.
3062 *
3063 *********************************************************************/
3064int
3065em_rxeof(struct em_queue *que)
3066{
3067    struct em_softc     *sc = que->sc;
3068    struct ifnet        *ifp = &sc->sc_ac.ac_if;
3069    struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
3070    struct mbuf     *m;
3071    u_int8_t        accept_frame = 0;
3072    u_int8_t        eop = 0;
3073    u_int16_t       len, desc_len, prev_len_adj;
3074    int         i, rv = 0;
3075
3076    /* Pointer to the receive descriptor being examined. */
3077    struct em_rx_desc   *desc;
3078    struct em_packet    *pkt;
3079    u_int8_t        status;
3080
3081    if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
3082        return (0);
3083
3084    i = que->rx.sc_rx_desc_tail;
3085
3086    bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3087        0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3088        BUS_DMASYNC_POSTREAD);
3089
3090    do {
3091        m = NULL;
3092
3093        pkt = &que->rx.sc_rx_pkts_ring[i];
3094        desc = &que->rx.sc_rx_desc_ring[i];
3095
3096        status = desc->status;
3097        if (!ISSET(status, E1000_RXD_STAT_DD))
3098            break;
3099
3100        /* pull the mbuf off the ring */
3101        bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
3102            0, pkt->pkt_map->dm_mapsize,
3103            BUS_DMASYNC_POSTREAD);
3104        bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
3105        m = pkt->pkt_m;
3106        pkt->pkt_m = NULL;
3107
3108        KASSERT(m != NULL);
3109
3110        if_rxr_put(&que->rx.sc_rx_ring, 1);
3111        rv = 1;
3112
3113        accept_frame = 1;
3114        prev_len_adj = 0;
3115        desc_len = letoh16(desc->length);
3116
3117        if (status & E1000_RXD_STAT_EOP) {
3118            eop = 1;
3119            if (sc->hw.mac_type == em_i210 ||
3120                sc->hw.mac_type == em_i350) {
3121                /* crc has already been stripped */
3122                len = desc_len;
3123            } else if (desc_len < ETHER_CRC_LEN) {
3124                len = 0;
3125                prev_len_adj = ETHER_CRC_LEN - desc_len;
3126            } else
3127                len = desc_len - ETHER_CRC_LEN;
3128        } else {
3129            eop = 0;
3130            len = desc_len;
3131        }
3132
3133        if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3134            u_int8_t last_byte;
3135            u_int32_t pkt_len = desc_len;
3136
3137            if (que->rx.fmp != NULL)
3138                pkt_len += que->rx.fmp->m_pkthdr.len;
3139
3140            last_byte = *(mtod(m, caddr_t) + desc_len - 1);
3141            if (TBI_ACCEPT(&sc->hw, status, desc->errors,
3142                pkt_len, last_byte)) {
3143#if NKSTAT > 0
3144                em_tbi_adjust_stats(sc,
3145                    pkt_len, sc->hw.mac_addr);
3146#endif
3147                if (len > 0)
3148                    len--;
3149            } else
3150                accept_frame = 0;
3151        }
3152
3153        if (accept_frame) {
3154            /* Assign correct length to the current fragment */
3155            m->m_len = len;
3156
3157            if (que->rx.fmp == NULL) {
3158                m->m_pkthdr.len = m->m_len;
3159                que->rx.fmp = m;     /* Store the first mbuf */
3160                que->rx.lmp = m;
3161            } else {
3162                /* Chain mbuf's together */
3163                m->m_flags &= ~M_PKTHDR;
3164                /*
3165                 * Adjust length of previous mbuf in chain if
3166                 * we received less than 4 bytes in the last
3167                 * descriptor.
3168                 */
3169                if (prev_len_adj > 0) {
3170                    que->rx.lmp->m_len -= prev_len_adj;
3171                    que->rx.fmp->m_pkthdr.len -= prev_len_adj;
3172                }
3173                que->rx.lmp->m_next = m;
3174                que->rx.lmp = m;
3175                que->rx.fmp->m_pkthdr.len += m->m_len;
3176            }
3177
3178            if (eop) {
3179                m = que->rx.fmp;
3180
3181                em_receive_checksum(sc, desc, m);
3182#if NVLAN > 0
3183                if (desc->status & E1000_RXD_STAT_VP) {
3184                    m->m_pkthdr.ether_vtag =
3185                        letoh16(desc->special);
3186                    m->m_flags |= M_VLANTAG;
3187                }
3188#endif
3189                ml_enqueue(&ml, m);
3190
3191                que->rx.fmp = NULL;
3192                que->rx.lmp = NULL;
3193            }
3194        } else {
3195            que->rx.dropped_pkts++;
3196
3197            if (que->rx.fmp != NULL) {
3198                m_freem(que->rx.fmp);
3199                que->rx.fmp = NULL;
3200                que->rx.lmp = NULL;
3201            }
3202
3203            m_freem(m);
3204        }
3205
3206        /* Advance our pointers to the next descriptor. */
3207        if (++i == sc->sc_rx_slots)
3208            i = 0;
3209    } while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3210
3211    bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3212        0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3213        BUS_DMASYNC_PREREAD);
3214
3215    que->rx.sc_rx_desc_tail = i;
3216
3217    if (ifiq_input(&ifp->if_rcv, &ml))
3218        if_rxr_livelocked(&que->rx.sc_rx_ring);
3219
3220    return (rv);
3221}
3222
3223/*********************************************************************
3224 *
3225 *  Verify that the hardware indicated that the checksum is valid.
3226 *  Inform the stack about the status of checksum so that stack
3227 *  doesn't spend time verifying the checksum.
3228 *
3229 *********************************************************************/
3230void
3231em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3232    struct mbuf *mp)
3233{
3234    /* 82543 or newer only */
3235    if ((sc->hw.mac_type < em_82543) ||
3236        /* Ignore Checksum bit is set */
3237        (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3238        mp->m_pkthdr.csum_flags = 0;
3239        return;
3240    }
3241
3242    if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3243        /* Did it pass? */
3244        if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3245            /* IP Checksum Good */
3246            mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3247
3248        } else
3249            mp->m_pkthdr.csum_flags = 0;
3250    }
3251
3252    if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3253        /* Did it pass? */
3254        if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3255            mp->m_pkthdr.csum_flags |=
3256                M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3257    }
3258}
3259
3260/*
3261 * This turns on the hardware offload of the VLAN
3262 * tag insertion and strip
3263 */
3264void
3265em_enable_hw_vlans(struct em_softc *sc)
3266{
3267    uint32_t ctrl;
3268
3269    ctrl = E1000_READ_REG(&sc->hw, CTRL);
3270    ctrl |= E1000_CTRL_VME;
3271    E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3272}
3273
3274void
3275em_enable_intr(struct em_softc *sc)
3276{
3277    uint32_t mask;
3278
3279    if (sc->msix) {
3280        mask = sc->msix_queuesmask | sc->msix_linkmask;
3281        E1000_WRITE_REG(&sc->hw, EIAC, mask);
3282        E1000_WRITE_REG(&sc->hw, EIAM, mask);
3283        E1000_WRITE_REG(&sc->hw, EIMS, mask);
3284        E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3285    } else
3286        E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3287}
3288
3289void
3290em_disable_intr(struct em_softc *sc)
3291{
3292    /*
3293     * The first version of 82542 had an errata where when link
3294     * was forced it would stay up even if the cable was disconnected
3295     * Sequence errors were used to detect the disconnect and then
3296     * the driver would unforce the link.  This code is in the ISR.
3297     * For this to work correctly the Sequence error interrupt had
3298     * to be enabled all the time.
3299     */
3300    if (sc->msix) {
3301        E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3302        E1000_WRITE_REG(&sc->hw, EIAC, 0);
3303    } else if (sc->hw.mac_type == em_82542_rev2_0)
3304        E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3305    else
3306        E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3307}
3308
3309void
3310em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3311{
3312    struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3313    pcireg_t val;
3314
3315    val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3316    if (reg & 0x2) {
3317        val &= 0x0000ffff;
3318        val |= (*value << 16);
3319    } else {
3320        val &= 0xffff0000;
3321        val |= *value;
3322    }
3323    pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3324}
3325
3326void
3327em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3328{
3329    struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3330    pcireg_t val;
3331
3332    val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3333    if (reg & 0x2)
3334        *value = (val >> 16) & 0xffff;
3335    else
3336        *value = val & 0xffff;
3337}
3338
3339void
3340em_pci_set_mwi(struct em_hw *hw)
3341{
3342    struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3343
3344    pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3345        (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3346}
3347
3348void
3349em_pci_clear_mwi(struct em_hw *hw)
3350{
3351    struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3352
3353    pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3354        (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3355}
3356
3357/*
3358 * We may eventually really do this, but its unnecessary
3359 * for now so we just return unsupported.
3360 */
3361int32_t
3362em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3363{
3364    return -E1000_NOT_IMPLEMENTED;
3365}
3366
3367/*********************************************************************
3368* 82544 Coexistence issue workaround.
3369*    There are 2 issues.
3370*       1. Transmit Hang issue.
3371*    To detect this issue, following equation can be used...
3372*          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3373*          If SUM[3:0] is in between 1 to 4, we will have this issue.
3374*
3375*       2. DAC issue.
3376*    To detect this issue, following equation can be used...
3377*          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3378*          If SUM[3:0] is in between 9 to c, we will have this issue.
3379*
3380*
3381*    WORKAROUND:
3382*          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3383*
3384*** *********************************************************************/
3385u_int32_t
3386em_fill_descriptors(u_int64_t address, u_int32_t length,
3387    PDESC_ARRAY desc_array)
3388{
3389        /* Since issue is sensitive to length and address.*/
3390        /* Let us first check the address...*/
3391        u_int32_t safe_terminator;
3392        if (length <= 4) {
3393                desc_array->descriptor[0].address = address;
3394                desc_array->descriptor[0].length = length;
3395                desc_array->elements = 1;
3396                return desc_array->elements;
3397        }
3398        safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3399        /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3400        if (safe_terminator == 0   ||
3401        (safe_terminator > 4   &&
3402        safe_terminator < 9)   ||
3403        (safe_terminator > 0xC &&
3404        safe_terminator <= 0xF)) {
3405                desc_array->descriptor[0].address = address;
3406                desc_array->descriptor[0].length = length;
3407                desc_array->elements = 1;
3408                return desc_array->elements;
3409        }
3410
3411        desc_array->descriptor[0].address = address;
3412        desc_array->descriptor[0].length = length - 4;
3413        desc_array->descriptor[1].address = address + (length - 4);
3414        desc_array->descriptor[1].length = 4;
3415        desc_array->elements = 2;
3416        return desc_array->elements;
3417}
3418
3419/*
3420 * Disable the L0S and L1 LINK states.
3421 */
3422void
3423em_disable_aspm(struct em_softc *sc)
3424{
3425    int offset;
3426    pcireg_t val;
3427
3428    switch (sc->hw.mac_type) {
3429        case em_82571:
3430        case em_82572:
3431        case em_82573:
3432        case em_82574:
3433            break;
3434        default:
3435            return;
3436    }
3437
3438    if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3439        PCI_CAP_PCIEXPRESS, &offset, NULL))
3440        return;
3441
3442    /* Disable PCIe Active State Power Management (ASPM). */
3443    val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3444        offset + PCI_PCIE_LCSR);
3445
3446    switch (sc->hw.mac_type) {
3447        case em_82571:
3448        case em_82572:
3449            val &= ~PCI_PCIE_LCSR_ASPM_L1;
3450            break;
3451        case em_82573:
3452        case em_82574:
3453            val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3454                PCI_PCIE_LCSR_ASPM_L1);
3455            break;
3456        default:
3457            break;
3458    }
3459
3460    pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3461        offset + PCI_PCIE_LCSR, val);
3462}
3463
3464/*
3465 * em_flush_tx_ring - remove all descriptors from the tx_ring
3466 *
3467 * We want to clear all pending descriptors from the TX ring.
3468 * zeroing happens when the HW reads the regs. We assign the ring itself as
3469 * the data of the next descriptor. We don't care about the data we are about
3470 * to reset the HW.
3471 */
3472void
3473em_flush_tx_ring(struct em_queue *que)
3474{
3475    struct em_softc     *sc = que->sc;
3476    uint32_t         tctl, txd_lower = E1000_TXD_CMD_IFCS;
3477    uint16_t         size = 512;
3478    struct em_tx_desc   *txd;
3479
3480    KASSERT(que->tx.sc_tx_desc_ring != NULL);
3481
3482    tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3483    EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3484
3485    KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3486
3487    txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3488    txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3489    txd->lower.data = htole32(txd_lower | size);
3490    txd->upper.data = 0;
3491
3492    /* flush descriptors to memory before notifying the HW */
3493    bus_space_barrier(sc->osdep.mem_bus_space_tag,
3494        sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3495
3496    if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3497        que->tx.sc_tx_desc_head = 0;
3498
3499    EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3500    bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3501        0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3502    usec_delay(250);
3503}
3504
3505/*
3506 * em_flush_rx_ring - remove all descriptors from the rx_ring
3507 *
3508 * Mark all descriptors in the RX ring as consumed and disable the rx ring
3509 */
3510void
3511em_flush_rx_ring(struct em_queue *que)
3512{
3513    uint32_t    rctl, rxdctl;
3514    struct em_softc *sc = que->sc;
3515
3516    rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3517    EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3518    E1000_WRITE_FLUSH(&sc->hw);
3519    usec_delay(150);
3520
3521    rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3522    /* zero the lower 14 bits (prefetch and host thresholds) */
3523    rxdctl &= 0xffffc000;
3524    /*
3525     * update thresholds: prefetch threshold to 31, host threshold to 1
3526     * and make sure the granularity is "descriptors" and not "cache lines"
3527     */
3528    rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3529    EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3530
3531    /* momentarily enable the RX ring for the changes to take effect */
3532    EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3533    E1000_WRITE_FLUSH(&sc->hw);
3534    usec_delay(150);
3535    EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3536}
3537
3538/*
3539 * em_flush_desc_rings - remove all descriptors from the descriptor rings
3540 *
3541 * In i219, the descriptor rings must be emptied before resetting the HW
3542 * or before changing the device state to D3 during runtime (runtime PM).
3543 *
3544 * Failure to do this will cause the HW to enter a unit hang state which can
3545 * only be released by PCI reset on the device
3546 *
3547 */
3548void
3549em_flush_desc_rings(struct em_softc *sc)
3550{
3551    struct em_queue     *que = sc->queues; /* Use only first queue. */
3552    struct pci_attach_args  *pa = &sc->osdep.em_pa;
3553    uint32_t         fextnvm11, tdlen;
3554    uint16_t         hang_state;
3555
3556    /* First, disable MULR fix in FEXTNVM11 */
3557    fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3558    fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3559    EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3560
3561    /* do nothing if we're not in faulty state, or if the queue is empty */
3562    tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3563    hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3564    if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3565        return;
3566    em_flush_tx_ring(que);
3567
3568    /* recheck, maybe the fault is caused by the rx ring */
3569    hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3570    if (hang_state & FLUSH_DESC_REQUIRED)
3571        em_flush_rx_ring(que);
3572}
3573
3574int
3575em_allocate_legacy(struct em_softc *sc)
3576{
3577    pci_intr_handle_t    ih;
3578    const char      *intrstr = NULL;
3579    struct pci_attach_args  *pa = &sc->osdep.em_pa;
3580    pci_chipset_tag_t    pc = pa->pa_pc;
3581
3582    if (pci_intr_map_msi(pa, &ih)) {
3583        if (pci_intr_map(pa, &ih)) {
3584            printf(": couldn't map interrupt\n");
3585            return (ENXIO);
3586        }
3587        sc->legacy_irq = 1;
3588    }
3589
3590    intrstr = pci_intr_string(pc, ih);
3591    sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3592        em_intr, sc, DEVNAME(sc));
3593    if (sc->sc_intrhand == NULL) {
3594        printf(": couldn't establish interrupt");
3595        if (intrstr != NULL)
3596            printf(" at %s", intrstr);
3597        printf("\n");
3598        return (ENXIO);
3599    }
3600    printf(": %s", intrstr);
3601
3602    return (0);
3603}
3604
3605#if NKSTAT > 0
3606/* this is used to look up the array of kstats quickly */
3607enum em_stat {
3608    em_stat_crcerrs,
3609    em_stat_algnerrc,
3610    em_stat_symerrs,
3611    em_stat_rxerrc,
3612    em_stat_mpc,
3613    em_stat_scc,
3614    em_stat_ecol,
3615    em_stat_mcc,
3616    em_stat_latecol,
3617    em_stat_colc,
3618    em_stat_dc,
3619    em_stat_tncrs,
3620    em_stat_sec,
3621    em_stat_cexterr,
3622    em_stat_rlec,
3623    em_stat_xonrxc,
3624    em_stat_xontxc,
3625    em_stat_xoffrxc,
3626    em_stat_xofftxc,
3627    em_stat_fcruc,
3628    em_stat_prc64,
3629    em_stat_prc127,
3630    em_stat_prc255,
3631    em_stat_prc511,
3632    em_stat_prc1023,
3633    em_stat_prc1522,
3634    em_stat_gprc,
3635    em_stat_bprc,
3636    em_stat_mprc,
3637    em_stat_gptc,
3638    em_stat_gorc,
3639    em_stat_gotc,
3640    em_stat_rnbc,
3641    em_stat_ruc,
3642    em_stat_rfc,
3643    em_stat_roc,
3644    em_stat_rjc,
3645    em_stat_mgtprc,
3646    em_stat_mgtpdc,
3647    em_stat_mgtptc,
3648    em_stat_tor,
3649    em_stat_tot,
3650    em_stat_tpr,
3651    em_stat_tpt,
3652    em_stat_ptc64,
3653    em_stat_ptc127,
3654    em_stat_ptc255,
3655    em_stat_ptc511,
3656    em_stat_ptc1023,
3657    em_stat_ptc1522,
3658    em_stat_mptc,
3659    em_stat_bptc,
3660#if 0
3661    em_stat_tsctc,
3662    em_stat_tsctf,
3663#endif
3664
3665    em_stat_count,
3666};
3667
3668struct em_counter {
3669    const char      *name;
3670    enum kstat_kv_unit   unit;
3671    uint32_t         reg;
3672};
3673
3674static const struct em_counter em_counters[em_stat_count] = {
3675    [em_stat_crcerrs] =
3676        { "rx crc errs",    KSTAT_KV_U_PACKETS, E1000_CRCERRS },
3677    [em_stat_algnerrc] = /* >= em_82543 */
3678        { "rx align errs",  KSTAT_KV_U_PACKETS, 0 },
3679    [em_stat_symerrs] = /* >= em_82543 */
3680        { "rx align errs",  KSTAT_KV_U_PACKETS, 0 },
3681    [em_stat_rxerrc] =
3682        { "rx errs",    KSTAT_KV_U_PACKETS, E1000_RXERRC },
3683    [em_stat_mpc] =
3684        { "rx missed",  KSTAT_KV_U_PACKETS, E1000_MPC },
3685    [em_stat_scc] =
3686        { "tx single coll", KSTAT_KV_U_PACKETS, E1000_SCC },
3687    [em_stat_ecol] =
3688        { "tx excess coll", KSTAT_KV_U_PACKETS, E1000_ECOL },
3689    [em_stat_mcc] =
3690        { "tx multi coll",  KSTAT_KV_U_PACKETS, E1000_MCC },
3691    [em_stat_latecol] =
3692        { "tx late coll",   KSTAT_KV_U_PACKETS, E1000_LATECOL },
3693    [em_stat_colc] =
3694        { "tx coll",    KSTAT_KV_U_NONE,    E1000_COLC },
3695    [em_stat_dc] =
3696        { "tx defers",  KSTAT_KV_U_NONE,    E1000_DC },
3697    [em_stat_tncrs] = /* >= em_82543 */
3698        { "tx no CRS",  KSTAT_KV_U_PACKETS, 0 },
3699    [em_stat_sec] =
3700        { "seq errs",   KSTAT_KV_U_NONE,    E1000_SEC },
3701    [em_stat_cexterr] = /* >= em_82543 */
3702        { "carr ext errs",  KSTAT_KV_U_PACKETS, 0 },
3703    [em_stat_rlec] =
3704        { "rx len errs",    KSTAT_KV_U_PACKETS, E1000_RLEC },
3705    [em_stat_xonrxc] =
3706        { "rx xon",     KSTAT_KV_U_PACKETS, E1000_XONRXC },
3707    [em_stat_xontxc] =
3708        { "tx xon",     KSTAT_KV_U_PACKETS, E1000_XONTXC },
3709    [em_stat_xoffrxc] =
3710        { "rx xoff",    KSTAT_KV_U_PACKETS, E1000_XOFFRXC },
3711    [em_stat_xofftxc] =
3712        { "tx xoff",    KSTAT_KV_U_PACKETS, E1000_XOFFTXC },
3713    [em_stat_fcruc] =
3714        { "FC unsupported", KSTAT_KV_U_PACKETS, E1000_FCRUC },
3715    [em_stat_prc64] =
3716        { "rx 64B",     KSTAT_KV_U_PACKETS, E1000_PRC64 },
3717    [em_stat_prc127] =
3718        { "rx 65-127B", KSTAT_KV_U_PACKETS, E1000_PRC127 },
3719    [em_stat_prc255] =
3720        { "rx 128-255B",    KSTAT_KV_U_PACKETS, E1000_PRC255 },
3721    [em_stat_prc511] =
3722        { "rx 256-511B",    KSTAT_KV_U_PACKETS, E1000_PRC511 },
3723    [em_stat_prc1023] =
3724        { "rx 512-1023B",   KSTAT_KV_U_PACKETS, E1000_PRC1023 },
3725    [em_stat_prc1522] =
3726        { "rx 1024-maxB",   KSTAT_KV_U_PACKETS, E1000_PRC1522 },
3727    [em_stat_gprc] =
3728        { "rx good",    KSTAT_KV_U_PACKETS, E1000_GPRC },
3729    [em_stat_bprc] =
3730        { "rx bcast",   KSTAT_KV_U_PACKETS, E1000_BPRC },
3731    [em_stat_mprc] =
3732        { "rx mcast",   KSTAT_KV_U_PACKETS, E1000_MPRC },
3733    [em_stat_gptc] =
3734        { "tx good",    KSTAT_KV_U_PACKETS, E1000_GPTC },
3735    [em_stat_gorc] = /* 64bit */
3736        { "rx good",    KSTAT_KV_U_BYTES,   0 },
3737    [em_stat_gotc] = /* 64bit */
3738        { "tx good",    KSTAT_KV_U_BYTES,   0 },
3739    [em_stat_rnbc] =
3740        { "rx no buffers",  KSTAT_KV_U_PACKETS, E1000_RNBC },
3741    [em_stat_ruc] =
3742        { "rx undersize",   KSTAT_KV_U_PACKETS, E1000_RUC },
3743    [em_stat_rfc] =
3744        { "rx fragments",   KSTAT_KV_U_PACKETS, E1000_RFC },
3745    [em_stat_roc] =
3746        { "rx oversize",    KSTAT_KV_U_PACKETS, E1000_ROC },
3747    [em_stat_rjc] =
3748        { "rx jabbers", KSTAT_KV_U_PACKETS, E1000_RJC },
3749    [em_stat_mgtprc] =
3750        { "rx mgmt",    KSTAT_KV_U_PACKETS, E1000_MGTPRC },
3751    [em_stat_mgtpdc] =
3752        { "rx mgmt drops",  KSTAT_KV_U_PACKETS, E1000_MGTPDC },
3753    [em_stat_mgtptc] =
3754        { "tx mgmt",    KSTAT_KV_U_PACKETS, E1000_MGTPTC },
3755    [em_stat_tor] = /* 64bit */
3756        { "rx total",   KSTAT_KV_U_BYTES,   0 },
3757    [em_stat_tot] = /* 64bit */
3758        { "tx total",   KSTAT_KV_U_BYTES,   0 },
3759    [em_stat_tpr] =
3760        { "rx total",   KSTAT_KV_U_PACKETS, E1000_TPR },
3761    [em_stat_tpt] =
3762        { "tx total",   KSTAT_KV_U_PACKETS, E1000_TPT },
3763    [em_stat_ptc64] =
3764        { "tx 64B",     KSTAT_KV_U_PACKETS, E1000_PTC64 },
3765    [em_stat_ptc127] =
3766        { "tx 65-127B", KSTAT_KV_U_PACKETS, E1000_PTC127 },
3767    [em_stat_ptc255] =
3768        { "tx 128-255B",    KSTAT_KV_U_PACKETS, E1000_PTC255 },
3769    [em_stat_ptc511] =
3770        { "tx 256-511B",    KSTAT_KV_U_PACKETS, E1000_PTC511 },
3771    [em_stat_ptc1023] =
3772        { "tx 512-1023B",   KSTAT_KV_U_PACKETS, E1000_PTC1023 },
3773    [em_stat_ptc1522] =
3774        { "tx 1024-maxB",   KSTAT_KV_U_PACKETS, E1000_PTC1522 },
3775    [em_stat_mptc] =
3776        { "tx mcast",   KSTAT_KV_U_PACKETS, E1000_MPTC },
3777    [em_stat_bptc] =
3778        { "tx bcast",   KSTAT_KV_U_PACKETS, E1000_BPTC },
3779};
3780
3781/**********************************************************************
3782 *
3783 *  Update the board statistics counters.
3784 *
3785 **********************************************************************/
3786int
3787em_kstat_read(struct kstat *ks)
3788{
3789    struct em_softc *sc = ks->ks_softc;
3790    struct em_hw *hw = &sc->hw;
3791    struct kstat_kv *kvs = ks->ks_data;
3792    uint32_t lo, hi;
3793    unsigned int i;
3794
3795    for (i = 0; i < nitems(em_counters); i++) {
3796        const struct em_counter *c = &em_counters[i];
3797        if (c->reg == 0)
3798            continue;
3799
3800        kstat_kv_u64(&kvs[i]) += EM_READ_REG(hw,
3801            E1000_REG_TR(hw, c->reg)); /* wtf */
3802    }
3803
3804    /* Handle the exceptions. */
3805
3806    if (sc->hw.mac_type >= em_82543) {
3807        kstat_kv_u64(&kvs[em_stat_algnerrc]) +=
3808            E1000_READ_REG(hw, ALGNERRC);
3809        kstat_kv_u64(&kvs[em_stat_rxerrc]) +=
3810            E1000_READ_REG(hw, RXERRC);
3811        kstat_kv_u64(&kvs[em_stat_cexterr]) +=
3812            E1000_READ_REG(hw, CEXTERR);
3813        kstat_kv_u64(&kvs[em_stat_tncrs]) +=
3814            E1000_READ_REG(hw, TNCRS);
3815#if 0
3816        sc->stats.tsctc +=
3817        E1000_READ_REG(hw, TSCTC);
3818        sc->stats.tsctfc +=
3819        E1000_READ_REG(hw, TSCTFC);
3820#endif
3821    }
3822
3823    /* For the 64-bit byte counters the low dword must be read first. */
3824    /* Both registers clear on the read of the high dword */
3825
3826    lo = E1000_READ_REG(hw, GORCL);
3827    hi = E1000_READ_REG(hw, GORCH);
3828    kstat_kv_u64(&kvs[em_stat_gorc]) +=
3829        ((uint64_t)hi << 32) | (uint64_t)lo;
3830
3831    lo = E1000_READ_REG(hw, GOTCL);
3832    hi = E1000_READ_REG(hw, GOTCH);
3833    kstat_kv_u64(&kvs[em_stat_gotc]) +=
3834        ((uint64_t)hi << 32) | (uint64_t)lo;
3835
3836    lo = E1000_READ_REG(hw, TORL);
3837    hi = E1000_READ_REG(hw, TORH);
3838    kstat_kv_u64(&kvs[em_stat_tor]) +=
3839        ((uint64_t)hi << 32) | (uint64_t)lo;
3840
3841    lo = E1000_READ_REG(hw, TOTL);
3842    hi = E1000_READ_REG(hw, TOTH);
3843    kstat_kv_u64(&kvs[em_stat_tot]) +=
3844        ((uint64_t)hi << 32) | (uint64_t)lo;
3845
3846    getnanouptime(&ks->ks_updated);
3847
3848    return (0);
3849}
3850
3851void
3852em_kstat_attach(struct em_softc *sc)
3853{
3854    struct kstat *ks;
3855    struct kstat_kv *kvs;
3856    unsigned int i;
3857
3858    mtx_init(&sc->kstat_mtx, IPL_SOFTCLOCK);
3859
3860    ks = kstat_create(DEVNAME(sc), 0, "em-stats", 0,
3861        KSTAT_T_KV, 0);
3862    if (ks == NULL)
3863        return;
3864
3865    kvs = mallocarray(nitems(em_counters), sizeof(*kvs),
3866        M_DEVBUF, M_WAITOK|M_ZERO);
3867    for (i = 0; i < nitems(em_counters); i++) {
3868        const struct em_counter *c = &em_counters[i];
3869        kstat_kv_unit_init(&kvs[i], c->name,
3870            KSTAT_KV_T_COUNTER64, c->unit);
3871    }
3872
3873    ks->ks_softc = sc;
3874    ks->ks_data = kvs;
3875    ks->ks_datalen = nitems(em_counters) * sizeof(*kvs);
3876    ks->ks_read = em_kstat_read;
3877    kstat_set_mutex(ks, &sc->kstat_mtx);
3878
3879    kstat_install(ks);
3880}
3881
3882/******************************************************************************
3883 * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
3884 *****************************************************************************/
3885void
3886em_tbi_adjust_stats(struct em_softc *sc, uint32_t frame_len, uint8_t *mac_addr)
3887{
3888    struct em_hw *hw = &sc->hw;
3889    struct kstat *ks = sc->kstat;
3890    struct kstat_kv *kvs;
3891
3892    if (ks == NULL)
3893        return;
3894
3895    /* First adjust the frame length. */
3896    frame_len--;
3897
3898    mtx_enter(&sc->kstat_mtx);
3899    kvs = ks->ks_data;
3900
3901    /*
3902     * We need to adjust the statistics counters, since the hardware
3903     * counters overcount this packet as a CRC error and undercount the
3904     * packet as a good packet
3905     */
3906
3907    /* This packet should not be counted as a CRC error.    */
3908    kstat_kv_u64(&kvs[em_stat_crcerrs])--;
3909    /* This packet does count as a Good Packet Received.    */
3910    kstat_kv_u64(&kvs[em_stat_gprc])++;
3911
3912    /* Adjust the Good Octets received counters     */
3913    kstat_kv_u64(&kvs[em_stat_gorc]) += frame_len;
3914
3915    /*
3916     * Is this a broadcast or multicast?  Check broadcast first, since
3917     * the test for a multicast frame will test positive on a broadcast
3918     * frame.
3919     */
3920    if (ETHER_IS_BROADCAST(mac_addr)) {
3921        /* Broadcast packet */
3922        kstat_kv_u64(&kvs[em_stat_bprc])++;
3923    } else if (ETHER_IS_MULTICAST(mac_addr)) {
3924        /* Multicast packet */
3925        kstat_kv_u64(&kvs[em_stat_mprc])++;
3926    }
3927
3928    if (frame_len == hw->max_frame_size) {
3929        /*
3930         * In this case, the hardware has overcounted the number of
3931         * oversize frames.
3932         */
3933        kstat_kv_u64(&kvs[em_stat_roc])--;
3934    }
3935
3936    /*
3937     * Adjust the bin counters when the extra byte put the frame in the
3938     * wrong bin. Remember that the frame_len was adjusted above.
3939     */
3940    if (frame_len == 64) {
3941        kstat_kv_u64(&kvs[em_stat_prc64])++;
3942        kstat_kv_u64(&kvs[em_stat_prc127])--;
3943    } else if (frame_len == 127) {
3944        kstat_kv_u64(&kvs[em_stat_prc127])++;
3945        kstat_kv_u64(&kvs[em_stat_prc255])--;
3946    } else if (frame_len == 255) {
3947        kstat_kv_u64(&kvs[em_stat_prc255])++;
3948        kstat_kv_u64(&kvs[em_stat_prc511])--;
3949    } else if (frame_len == 511) {
3950        kstat_kv_u64(&kvs[em_stat_prc511])++;
3951        kstat_kv_u64(&kvs[em_stat_prc1023])--;
3952    } else if (frame_len == 1023) {
3953        kstat_kv_u64(&kvs[em_stat_prc1023])++;
3954        kstat_kv_u64(&kvs[em_stat_prc1522])--;
3955    } else if (frame_len == 1522) {
3956        kstat_kv_u64(&kvs[em_stat_prc1522])++;
3957    }
3958
3959    mtx_leave(&sc->kstat_mtx);
3960}
3961#endif /* NKSTAT > 0 */
3962
3963#ifndef SMALL_KERNEL
3964int
3965em_allocate_msix(struct em_softc *sc)
3966{
3967    pci_intr_handle_t    ih;
3968    const char      *intrstr = NULL;
3969    struct pci_attach_args  *pa = &sc->osdep.em_pa;
3970    pci_chipset_tag_t    pc = pa->pa_pc;
3971    struct em_queue     *que = sc->queues; /* Use only first queue. */
3972    int          vec;
3973
3974    if (!em_enable_msix)
3975        return (ENODEV);
3976
3977    switch (sc->hw.mac_type) {
3978    case em_82576:
3979    case em_82580:
3980    case em_i350:
3981    case em_i210:
3982        break;
3983    default:
3984        return (ENODEV);
3985    }
3986
3987    vec = 0;
3988    if (pci_intr_map_msix(pa, vec, &ih))
3989        return (ENODEV);
3990    sc->msix = 1;
3991
3992    que->me = vec;
3993    que->eims = 1 << vec;
3994    snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3995
3996    intrstr = pci_intr_string(pc, ih);
3997    que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3998        em_queue_intr_msix, que, que->name);
3999    if (que->tag == NULL) {
4000        printf(": couldn't establish interrupt");
4001        if (intrstr != NULL)
4002            printf(" at %s", intrstr);
4003        printf("\n");
4004        return (ENXIO);
4005    }
4006
4007    /* Setup linkvector, use last queue vector + 1 */
4008    vec++;
4009    sc->msix_linkvec = vec;
4010    if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
4011        printf(": couldn't map link vector\n");
4012        return (ENXIO);
4013    }
4014
4015    intrstr = pci_intr_string(pc, ih);
4016    sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
4017        em_link_intr_msix, sc, DEVNAME(sc));
4018    if (sc->sc_intrhand == NULL) {
4019        printf(": couldn't establish interrupt");
4020        if (intrstr != NULL)
4021            printf(" at %s", intrstr);
4022        printf("\n");
4023        return (ENXIO);
4024    }
4025    printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
4026
4027    return (0);
4028}
4029
4030/*
4031 * Interrupt for a specific queue, (not link interrupts). The EICR bit which
4032 * maps to the EIMS bit expresses both RX and TX, therefore we can't
4033 * distinguish if this is a RX completion of TX completion and must do both.
4034 * The bits in EICR are autocleared and we _cannot_ read EICR.
4035 */
4036int
4037em_queue_intr_msix(void *vque)
4038{
4039    struct em_queue *que = vque;
4040    struct em_softc *sc = que->sc;
4041    struct ifnet   *ifp = &sc->sc_ac.ac_if;
4042
4043    if (ifp->if_flags & IFF_RUNNING) {
4044        em_txeof(que);
4045        if (em_rxeof(que))
4046            em_rxrefill_locked(que);
4047    }
4048
4049    em_enable_queue_intr_msix(que);
4050
4051    return (1);
4052}
4053
4054int
4055em_link_intr_msix(void *arg)
4056{
4057    struct em_softc *sc = arg;
4058    uint32_t icr;
4059
4060    icr = E1000_READ_REG(&sc->hw, ICR);
4061
4062    /* Link status change */
4063    if (icr & E1000_ICR_LSC) {
4064        KERNEL_LOCK();
4065        sc->hw.get_link_status = 1;
4066        em_check_for_link(&sc->hw);
4067        em_update_link_status(sc);
4068        KERNEL_UNLOCK();
4069    }
4070
4071    /* Re-arm unconditionally */
4072    E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
4073    E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
4074
4075    return (1);
4076}
4077
4078/*
4079 * Maps queues into msix interrupt vectors.
4080 */
4081int
4082em_setup_queues_msix(struct em_softc *sc)
4083{
4084    uint32_t ivar, newitr, index;
4085    struct em_queue *que;
4086
4087    KASSERT(sc->msix);
4088
4089    /* First turn on RSS capability */
4090    if (sc->hw.mac_type != em_82575)
4091        E1000_WRITE_REG(&sc->hw, GPIE,
4092            E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
4093            E1000_GPIE_PBA | E1000_GPIE_NSICR);
4094
4095    /* Turn on MSIX */
4096    switch (sc->hw.mac_type) {
4097    case em_82580:
4098    case em_i350:
4099    case em_i210:
4100        /* RX entries */
4101        /*
4102         * Note, this maps Queues into MSIX vectors, it works fine.
4103         * The funky calculation of offsets and checking if que->me is
4104         * odd is due to the weird register distribution, the datasheet
4105         * explains it well.
4106         */
4107        FOREACH_QUEUE(sc, que) {
4108            index = que->me >> 1;
4109            ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4110            if (que->me & 1) {
4111                ivar &= 0xFF00FFFF;
4112                ivar |= (que->me | E1000_IVAR_VALID) << 16;
4113            } else {
4114                ivar &= 0xFFFFFF00;
4115                ivar |= que->me | E1000_IVAR_VALID;
4116            }
4117            E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4118        }
4119
4120        /* TX entries */
4121        FOREACH_QUEUE(sc, que) {
4122            index = que->me >> 1;
4123            ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4124            if (que->me & 1) {
4125                ivar &= 0x00FFFFFF;
4126                ivar |= (que->me | E1000_IVAR_VALID) << 24;
4127            } else {
4128                ivar &= 0xFFFF00FF;
4129                ivar |= (que->me | E1000_IVAR_VALID) << 8;
4130            }
4131            E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4132            sc->msix_queuesmask |= que->eims;
4133        }
4134
4135        /* And for the link interrupt */
4136        ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
4137        sc->msix_linkmask = 1 << sc->msix_linkvec;
4138        E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
4139        break;
4140    case em_82576:
4141        /* RX entries */
4142        FOREACH_QUEUE(sc, que) {
4143            index = que->me & 0x7; /* Each IVAR has two entries */
4144            ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4145            if (que->me < 8) {
4146                ivar &= 0xFFFFFF00;
4147                ivar |= que->me | E1000_IVAR_VALID;
4148            } else {
4149                ivar &= 0xFF00FFFF;
4150                ivar |= (que->me | E1000_IVAR_VALID) << 16;
4151            }
4152            E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4153            sc->msix_queuesmask |= que->eims;
4154        }
4155        /* TX entries */
4156        FOREACH_QUEUE(sc, que) {
4157            index = que->me & 0x7; /* Each IVAR has two entries */
4158            ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4159            if (que->me < 8) {
4160                ivar &= 0xFFFF00FF;
4161                ivar |= (que->me | E1000_IVAR_VALID) << 8;
4162            } else {
4163                ivar &= 0x00FFFFFF;
4164                ivar |= (que->me | E1000_IVAR_VALID) << 24;
4165            }
4166            E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4167            sc->msix_queuesmask |= que->eims;
4168        }
4169
4170        /* And for the link interrupt */
4171        ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
4172        sc->msix_linkmask = 1 << sc->msix_linkvec;
4173        E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
4174        break;
4175    default:
4176        panic("unsupported mac");
4177        break;
4178    }
4179
4180    /* Set the starting interrupt rate */
4181    newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
4182
4183    if (sc->hw.mac_type == em_82575)
4184        newitr |= newitr << 16;
4185    else
4186        newitr |= E1000_EITR_CNT_IGNR;
4187
4188    FOREACH_QUEUE(sc, que)
4189        E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
4190
4191    return (0);
4192}
4193
4194void
4195em_enable_queue_intr_msix(struct em_queue *que)
4196{
4197    E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
4198}
4199#endif /* !SMALL_KERNEL */
4200
4201int
4202em_allocate_desc_rings(struct em_softc *sc)
4203{
4204    struct em_queue *que;
4205
4206    FOREACH_QUEUE(sc, que) {
4207        /* Allocate Transmit Descriptor ring */
4208        if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
4209            &que->tx.sc_tx_dma) != 0) {
4210            printf("%s: Unable to allocate tx_desc memory\n",
4211                DEVNAME(sc));
4212            return (ENOMEM);
4213        }
4214        que->tx.sc_tx_desc_ring =
4215            (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
4216
4217        /* Allocate Receive Descriptor ring */
4218        if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
4219            &que->rx.sc_rx_dma) != 0) {
4220            printf("%s: Unable to allocate rx_desc memory\n",
4221                DEVNAME(sc));
4222            return (ENOMEM);
4223        }
4224        que->rx.sc_rx_desc_ring =
4225            (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
4226    }
4227
4228    return (0);
4229}
4230
4231int
4232em_get_sffpage(struct em_softc *sc, struct if_sffpage *sff)
4233{
4234    struct em_hw *hw = &sc->hw;
4235    size_t i;
4236    int off;
4237
4238    if (hw->mac_type != em_82575 && hw->mac_type != em_82580 &&
4239        hw->mac_type != em_82576 &&
4240        hw->mac_type != em_i210 && hw->mac_type != em_i350)
4241        return (ENODEV);
4242
4243    if (sff->sff_addr == IFSFF_ADDR_EEPROM)
4244        off = E1000_I2CCMD_SFP_DATA_ADDR(0);
4245    else if (sff->sff_addr == IFSFF_ADDR_DDM)
4246        off = E1000_I2CCMD_SFP_DIAG_ADDR(0);
4247    else
4248        return (EIO);
4249
4250    for (i = 0; i < sizeof(sff->sff_data); i++) {
4251        if (em_read_sfp_data_byte(hw, off + i,
4252            &sff->sff_data[i]) != E1000_SUCCESS)
4253            return (EIO);
4254    }
4255
4256    return (0);
4257}
4258