1/*  $OpenBSD: nvme.c,v 1.124 2024/10/08 19:41:23 kettenis Exp $ */
2
3/*
4 * Copyright (c) 2014 David Gwynne <[email protected]>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bio.h"
20
21#include <sys/param.h>
22#include <sys/ioctl.h>
23#include <sys/systm.h>
24#include <sys/buf.h>
25#include <sys/kernel.h>
26#include <sys/malloc.h>
27#include <sys/device.h>
28#include <sys/queue.h>
29#include <sys/mutex.h>
30#include <sys/pool.h>
31#include <sys/disk.h>
32
33#include <sys/atomic.h>
34
35#include <machine/bus.h>
36
37#include <scsi/scsi_all.h>
38#include <scsi/scsi_disk.h>
39#include <scsi/scsiconf.h>
40#include <scsi/sdvar.h>
41
42#include <dev/biovar.h>
43#include <dev/ic/nvmereg.h>
44#include <dev/ic/nvmevar.h>
45#include <dev/ic/nvmeio.h>
46
47struct cfdriver nvme_cd = {
48    NULL,
49    "nvme",
50    DV_DULL
51};
52
53int nvme_ready(struct nvme_softc *, u_int32_t);
54int nvme_enable(struct nvme_softc *);
55int nvme_disable(struct nvme_softc *);
56int nvme_shutdown(struct nvme_softc *);
57int nvme_resume(struct nvme_softc *);
58
59void    nvme_dumpregs(struct nvme_softc *);
60int nvme_identify(struct nvme_softc *, u_int);
61void    nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
62
63#ifndef SMALL_KERNEL
64void    nvme_refresh_sensors(void *);
65#endif
66
67int nvme_ccbs_alloc(struct nvme_softc *, u_int);
68void    nvme_ccbs_free(struct nvme_softc *, u_int);
69
70void *  nvme_ccb_get(void *);
71void    nvme_ccb_put(void *, void *);
72
73int nvme_poll(struct nvme_softc *, struct nvme_queue *, struct nvme_ccb *,
74        void (*)(struct nvme_softc *, struct nvme_ccb *, void *), u_int32_t);
75void    nvme_poll_fill(struct nvme_softc *, struct nvme_ccb *, void *);
76void    nvme_poll_done(struct nvme_softc *, struct nvme_ccb *,
77        struct nvme_cqe *);
78void    nvme_sqe_fill(struct nvme_softc *, struct nvme_ccb *, void *);
79void    nvme_empty_done(struct nvme_softc *, struct nvme_ccb *,
80        struct nvme_cqe *);
81
82struct nvme_queue *
83    nvme_q_alloc(struct nvme_softc *, u_int16_t, u_int, u_int);
84int nvme_q_create(struct nvme_softc *, struct nvme_queue *);
85int nvme_q_reset(struct nvme_softc *, struct nvme_queue *);
86int nvme_q_delete(struct nvme_softc *, struct nvme_queue *);
87void    nvme_q_submit(struct nvme_softc *,
88        struct nvme_queue *, struct nvme_ccb *,
89        void (*)(struct nvme_softc *, struct nvme_ccb *, void *));
90int nvme_q_complete(struct nvme_softc *, struct nvme_queue *);
91void    nvme_q_free(struct nvme_softc *, struct nvme_queue *);
92
93void    nvme_scsi_cmd(struct scsi_xfer *);
94void    nvme_minphys(struct buf *, struct scsi_link *);
95int nvme_scsi_probe(struct scsi_link *);
96void    nvme_scsi_free(struct scsi_link *);
97uint64_t nvme_scsi_size(const struct nvm_identify_namespace *);
98int nvme_scsi_ioctl(struct scsi_link *, u_long, caddr_t, int);
99int nvme_passthrough_cmd(struct nvme_softc *, struct nvme_pt_cmd *,
100    int, int);
101
102#ifdef HIBERNATE
103#include <uvm/uvm_extern.h>
104#include <sys/hibernate.h>
105#include <sys/disklabel.h>
106
107int nvme_hibernate_io(dev_t, daddr_t, vaddr_t, size_t, int, void *);
108#endif
109
110#if NBIO > 0
111void    nvme_bio_status(struct bio_status *, const char *, ...);
112
113const char *nvme_bioctl_sdname(const struct nvme_softc *, int);
114
115int nvme_bioctl(struct device *, u_long, caddr_t);
116int nvme_bioctl_inq(struct nvme_softc *, struct bioc_inq *);
117int nvme_bioctl_vol(struct nvme_softc *, struct bioc_vol *);
118int nvme_bioctl_disk(struct nvme_softc *, struct bioc_disk *);
119#endif  /* NBIO > 0 */
120
121const struct scsi_adapter nvme_switch = {
122    nvme_scsi_cmd, nvme_minphys, nvme_scsi_probe, nvme_scsi_free,
123    nvme_scsi_ioctl
124};
125
126void    nvme_scsi_io(struct scsi_xfer *, int);
127void    nvme_scsi_io_fill(struct nvme_softc *, struct nvme_ccb *, void *);
128void    nvme_scsi_io_done(struct nvme_softc *, struct nvme_ccb *,
129        struct nvme_cqe *);
130
131void    nvme_scsi_sync(struct scsi_xfer *);
132void    nvme_scsi_sync_fill(struct nvme_softc *, struct nvme_ccb *, void *);
133void    nvme_scsi_sync_done(struct nvme_softc *, struct nvme_ccb *,
134        struct nvme_cqe *);
135
136void    nvme_scsi_inq(struct scsi_xfer *);
137void    nvme_scsi_inquiry(struct scsi_xfer *);
138void    nvme_scsi_capacity16(struct scsi_xfer *);
139void    nvme_scsi_capacity(struct scsi_xfer *);
140
141uint32_t    nvme_op_sq_enter(struct nvme_softc *,
142            struct nvme_queue *, struct nvme_ccb *);
143void        nvme_op_sq_leave(struct nvme_softc *,
144            struct nvme_queue *, struct nvme_ccb *);
145uint32_t    nvme_op_sq_enter_locked(struct nvme_softc *,
146            struct nvme_queue *, struct nvme_ccb *);
147void        nvme_op_sq_leave_locked(struct nvme_softc *,
148            struct nvme_queue *, struct nvme_ccb *);
149
150void        nvme_op_cq_done(struct nvme_softc *,
151            struct nvme_queue *, struct nvme_ccb *);
152
153static const struct nvme_ops nvme_ops = {
154    .op_sq_enter        = nvme_op_sq_enter,
155    .op_sq_leave        = nvme_op_sq_leave,
156    .op_sq_enter_locked = nvme_op_sq_enter_locked,
157    .op_sq_leave_locked = nvme_op_sq_leave_locked,
158
159    .op_cq_done     = nvme_op_cq_done,
160};
161
162#define NVME_TIMO_QOP           5000    /* ms to create/delete queue */
163#define NVME_TIMO_PT            5000    /* ms to complete passthrough */
164#define NVME_TIMO_IDENT         10000   /* ms to probe/identify */
165#define NVME_TIMO_LOG_PAGE      5000    /* ms to read log pages */
166#define NVME_TIMO_DELAYNS       10  /* ns to delay() in poll loop */
167
168/*
169 * Some controllers, at least Apple NVMe, always require split
170 * transfers, so don't use bus_space_{read,write}_8() on LP64.
171 */
172u_int64_t
173nvme_read8(struct nvme_softc *sc, bus_size_t r)
174{
175    u_int64_t v;
176
177    v = (u_int64_t)nvme_read4(sc, r) |
178        (u_int64_t)nvme_read4(sc, r + 4) << 32;
179
180    return (v);
181}
182
183void
184nvme_write8(struct nvme_softc *sc, bus_size_t r, u_int64_t v)
185{
186    nvme_write4(sc, r, v);
187    nvme_write4(sc, r + 4, v >> 32);
188}
189
190void
191nvme_dumpregs(struct nvme_softc *sc)
192{
193    u_int64_t r8;
194    u_int32_t r4;
195
196    r8 = nvme_read8(sc, NVME_CAP);
197    printf("%s: cap  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP));
198    printf("%s:  mpsmax %u (%u)\n", DEVNAME(sc),
199        (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8)));
200    printf("%s:  mpsmin %u (%u)\n", DEVNAME(sc),
201        (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8)));
202    printf("%s:  css %llu\n", DEVNAME(sc), NVME_CAP_CSS(r8));
203    printf("%s:  nssrs %llu\n", DEVNAME(sc), NVME_CAP_NSSRS(r8));
204    printf("%s:  dstrd %u\n", DEVNAME(sc), NVME_CAP_DSTRD(r8));
205    printf("%s:  to %llu msec\n", DEVNAME(sc), NVME_CAP_TO(r8));
206    printf("%s:  ams %llu\n", DEVNAME(sc), NVME_CAP_AMS(r8));
207    printf("%s:  cqr %llu\n", DEVNAME(sc), NVME_CAP_CQR(r8));
208    printf("%s:  mqes %llu\n", DEVNAME(sc), NVME_CAP_MQES(r8));
209
210    printf("%s: vs   0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS));
211
212    r4 = nvme_read4(sc, NVME_CC);
213    printf("%s: cc   0x%04x\n", DEVNAME(sc), r4);
214    printf("%s:  iocqes %u\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4));
215    printf("%s:  iosqes %u\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4));
216    printf("%s:  shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4));
217    printf("%s:  ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4));
218    printf("%s:  mps %u\n", DEVNAME(sc), NVME_CC_MPS_R(r4));
219    printf("%s:  css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4));
220    printf("%s:  en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN));
221
222    printf("%s: csts 0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_CSTS));
223    printf("%s: aqa  0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_AQA));
224    printf("%s: asq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ));
225    printf("%s: acq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ));
226}
227
228int
229nvme_ready(struct nvme_softc *sc, u_int32_t rdy)
230{
231    u_int i = 0;
232
233    while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) {
234        if (i++ > sc->sc_rdy_to)
235            return (1);
236
237        delay(1000);
238        nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
239    }
240
241    return (0);
242}
243
244int
245nvme_enable(struct nvme_softc *sc)
246{
247    u_int32_t cc;
248
249    cc = nvme_read4(sc, NVME_CC);
250    if (ISSET(cc, NVME_CC_EN))
251        return (nvme_ready(sc, NVME_CSTS_RDY));
252
253    if (sc->sc_ops->op_enable != NULL)
254        sc->sc_ops->op_enable(sc);
255
256    nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) |
257        NVME_AQA_ASQS(sc->sc_admin_q->q_entries));
258    nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
259
260    nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem));
261    nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
262    nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem));
263    nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
264
265    CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK |
266        NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK);
267    SET(cc, NVME_CC_IOSQES(6)); /* Submission queue size == 2**6 (64) */
268    SET(cc, NVME_CC_IOCQES(4)); /* Completion queue size == 2**4 (16) */
269    SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE));
270    SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM));
271    SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR));
272    SET(cc, NVME_CC_MPS(ffs(sc->sc_mps) - 1));
273    SET(cc, NVME_CC_EN);
274
275    nvme_write4(sc, NVME_CC, cc);
276    nvme_barrier(sc, 0, sc->sc_ios,
277        BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
278
279    return (nvme_ready(sc, NVME_CSTS_RDY));
280}
281
282int
283nvme_disable(struct nvme_softc *sc)
284{
285    u_int32_t cc, csts;
286
287    cc = nvme_read4(sc, NVME_CC);
288    if (ISSET(cc, NVME_CC_EN)) {
289        csts = nvme_read4(sc, NVME_CSTS);
290        if (!ISSET(csts, NVME_CSTS_CFS) &&
291            nvme_ready(sc, NVME_CSTS_RDY) != 0)
292            return (1);
293    }
294
295    CLR(cc, NVME_CC_EN);
296
297    nvme_write4(sc, NVME_CC, cc);
298    nvme_barrier(sc, 0, sc->sc_ios,
299        BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
300
301    return (nvme_ready(sc, 0));
302}
303
304int
305nvme_attach(struct nvme_softc *sc)
306{
307    struct scsibus_attach_args saa;
308    u_int64_t cap;
309    u_int32_t reg;
310    u_int nccbs = 0;
311
312    mtx_init(&sc->sc_ccb_mtx, IPL_BIO);
313    rw_init(&sc->sc_lock, "nvme_lock");
314    SIMPLEQ_INIT(&sc->sc_ccb_list);
315    scsi_iopool_init(&sc->sc_iopool, sc, nvme_ccb_get, nvme_ccb_put);
316    if (sc->sc_ops == NULL)
317        sc->sc_ops = &nvme_ops;
318    if (sc->sc_openings == 0)
319        sc->sc_openings = 64;
320
321    reg = nvme_read4(sc, NVME_VS);
322    if (reg == 0xffffffff) {
323        printf("invalid mapping\n");
324        return (1);
325    }
326
327    printf("NVMe %d.%d\n", NVME_VS_MJR(reg), NVME_VS_MNR(reg));
328
329    cap = nvme_read8(sc, NVME_CAP);
330    sc->sc_dstrd = NVME_CAP_DSTRD(cap);
331    if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) {
332        printf("%s: NVMe minimum page size %u "
333            "is greater than CPU page size %u\n", DEVNAME(sc),
334            1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT);
335        return (1);
336    }
337    if (NVME_CAP_MPSMAX(cap) < PAGE_SHIFT)
338        sc->sc_mps = 1 << NVME_CAP_MPSMAX(cap);
339    else
340        sc->sc_mps = 1 << PAGE_SHIFT;
341
342    sc->sc_rdy_to = NVME_CAP_TO(cap);
343    sc->sc_mdts = MAXPHYS;
344    sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
345
346    if (nvme_disable(sc) != 0) {
347        printf("%s: unable to disable controller\n", DEVNAME(sc));
348        return (1);
349    }
350
351    sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, 128, sc->sc_dstrd);
352    if (sc->sc_admin_q == NULL) {
353        printf("%s: unable to allocate admin queue\n", DEVNAME(sc));
354        return (1);
355    }
356
357    if (nvme_ccbs_alloc(sc, 16) != 0) {
358        printf("%s: unable to allocate initial ccbs\n", DEVNAME(sc));
359        goto free_admin_q;
360    }
361    nccbs = 16;
362
363    if (nvme_enable(sc) != 0) {
364        printf("%s: unable to enable controller\n", DEVNAME(sc));
365        goto free_ccbs;
366    }
367
368    if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) {
369        printf("%s: unable to identify controller\n", DEVNAME(sc));
370        goto disable;
371    }
372
373    /* We now know the real values of sc_mdts and sc_max_prpl. */
374    nvme_ccbs_free(sc, nccbs);
375    if (nvme_ccbs_alloc(sc, 64) != 0) {
376        printf("%s: unable to allocate ccbs\n", DEVNAME(sc));
377        goto free_admin_q;
378    }
379    nccbs = 64;
380
381    sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
382    if (sc->sc_q == NULL) {
383        printf("%s: unable to allocate io q\n", DEVNAME(sc));
384        goto disable;
385    }
386
387    if (nvme_q_create(sc, sc->sc_q) != 0) {
388        printf("%s: unable to create io q\n", DEVNAME(sc));
389        goto free_q;
390    }
391
392#ifdef HIBERNATE
393    sc->sc_hib_q = nvme_q_alloc(sc, NVME_HIB_Q, 4, sc->sc_dstrd);
394    if (sc->sc_hib_q == NULL) {
395        printf("%s: unable to allocate hibernate io queue\n", DEVNAME(sc));
396        goto free_q;
397    }
398#endif
399
400    nvme_write4(sc, NVME_INTMC, 1);
401
402    sc->sc_namespaces = mallocarray(sc->sc_nn + 1,
403        sizeof(*sc->sc_namespaces), M_DEVBUF, M_WAITOK|M_ZERO);
404
405    saa.saa_adapter = &nvme_switch;
406    saa.saa_adapter_softc = sc;
407    saa.saa_adapter_buswidth = sc->sc_nn + 1;
408    saa.saa_luns = 1;
409    saa.saa_adapter_target = 0;
410    saa.saa_openings = sc->sc_openings;
411    saa.saa_pool = &sc->sc_iopool;
412    saa.saa_quirks = saa.saa_flags = 0;
413    saa.saa_wwpn = saa.saa_wwnn = 0;
414
415    strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), sizeof(sc->sc_sensordev.xname));
416
417#ifndef SMALL_KERNEL
418    sc->sc_temp_sensor.type = SENSOR_TEMP;
419    sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
420    sensor_attach(&sc->sc_sensordev, &sc->sc_temp_sensor);
421
422    sc->sc_usage_sensor.type = SENSOR_PERCENT;
423    sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
424    strlcpy(sc->sc_usage_sensor.desc, "endurance used",
425        sizeof(sc->sc_usage_sensor.desc));
426    sensor_attach(&sc->sc_sensordev, &sc->sc_usage_sensor);
427
428    sc->sc_spare_sensor.type = SENSOR_PERCENT;
429    sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
430    strlcpy(sc->sc_spare_sensor.desc, "available spare",
431        sizeof(sc->sc_spare_sensor.desc));
432    sensor_attach(&sc->sc_sensordev, &sc->sc_spare_sensor);
433
434    if (sensor_task_register(sc, nvme_refresh_sensors, 60) == NULL)
435        goto free_q;
436
437    sensordev_install(&sc->sc_sensordev);
438#endif
439
440    sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
441        &saa, scsiprint);
442#if NBIO > 0
443    if (bio_register(&sc->sc_dev, nvme_bioctl) != 0)
444        printf("%s: unable to register bioctl\n", DEVNAME(sc));
445#endif  /* NBIO > 0 */
446
447    return (0);
448
449free_q:
450    nvme_q_free(sc, sc->sc_q);
451disable:
452    nvme_disable(sc);
453free_ccbs:
454    nvme_ccbs_free(sc, nccbs);
455free_admin_q:
456    nvme_q_free(sc, sc->sc_admin_q);
457
458    return (1);
459}
460
461int
462nvme_resume(struct nvme_softc *sc)
463{
464    if (nvme_disable(sc) != 0) {
465        printf("%s: unable to disable controller\n", DEVNAME(sc));
466        return (1);
467    }
468
469    if (nvme_q_reset(sc, sc->sc_admin_q) != 0) {
470        printf("%s: unable to reset admin queue\n", DEVNAME(sc));
471        return (1);
472    }
473
474    if (nvme_enable(sc) != 0) {
475        printf("%s: unable to enable controller\n", DEVNAME(sc));
476        return (1);
477    }
478
479    sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
480    if (sc->sc_q == NULL) {
481        printf("%s: unable to allocate io q\n", DEVNAME(sc));
482        goto disable;
483    }
484
485    if (nvme_q_create(sc, sc->sc_q) != 0) {
486        printf("%s: unable to create io q\n", DEVNAME(sc));
487        goto free_q;
488    }
489
490    nvme_write4(sc, NVME_INTMC, 1);
491
492    return (0);
493
494free_q:
495    nvme_q_free(sc, sc->sc_q);
496disable:
497    nvme_disable(sc);
498
499    return (1);
500}
501
502int
503nvme_scsi_probe(struct scsi_link *link)
504{
505    struct nvme_softc *sc = link->bus->sb_adapter_softc;
506    struct nvme_sqe sqe;
507    struct nvm_identify_namespace *identify;
508    struct nvme_dmamem *mem;
509    struct nvme_ccb *ccb;
510    int rv;
511
512    ccb = scsi_io_get(&sc->sc_iopool, 0);
513    KASSERT(ccb != NULL);
514
515    mem = nvme_dmamem_alloc(sc, sizeof(*identify));
516    if (mem == NULL)
517        return (ENOMEM);
518
519    memset(&sqe, 0, sizeof(sqe));
520    sqe.opcode = NVM_ADMIN_IDENTIFY;
521    htolem32(&sqe.nsid, link->target);
522    htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
523    htolem32(&sqe.cdw10, 0);
524
525    ccb->ccb_done = nvme_empty_done;
526    ccb->ccb_cookie = &sqe;
527
528    nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
529    rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT);
530    nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
531
532    scsi_io_put(&sc->sc_iopool, ccb);
533
534    identify = NVME_DMA_KVA(mem);
535    if (rv == 0) {
536        if (nvme_scsi_size(identify) > 0) {
537            /* Commit namespace if it has a size greater than zero. */
538            identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK);
539            memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify));
540            sc->sc_namespaces[link->target].ident = identify;
541        } else {
542            /* Don't attach a namespace if its size is zero. */
543            rv = ENXIO;
544        }
545    }
546
547    nvme_dmamem_free(sc, mem);
548
549    return (rv);
550}
551
552int
553nvme_shutdown(struct nvme_softc *sc)
554{
555    u_int32_t cc, csts;
556    int i;
557
558    nvme_write4(sc, NVME_INTMC, 0);
559
560    if (nvme_q_delete(sc, sc->sc_q) != 0) {
561        printf("%s: unable to delete q, disabling\n", DEVNAME(sc));
562        goto disable;
563    }
564
565    cc = nvme_read4(sc, NVME_CC);
566    CLR(cc, NVME_CC_SHN_MASK);
567    SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL));
568    nvme_write4(sc, NVME_CC, cc);
569
570    for (i = 0; i < 4000; i++) {
571        nvme_barrier(sc, 0, sc->sc_ios,
572            BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
573        csts = nvme_read4(sc, NVME_CSTS);
574        if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE)
575            return (0);
576
577        delay(1000);
578    }
579
580    printf("%s: unable to shutdown, disabling\n", DEVNAME(sc));
581
582disable:
583    nvme_disable(sc);
584    return (0);
585}
586
587int
588nvme_activate(struct nvme_softc *sc, int act)
589{
590    int rv;
591
592    switch (act) {
593    case DVACT_POWERDOWN:
594        rv = config_activate_children(&sc->sc_dev, act);
595        nvme_shutdown(sc);
596        break;
597    case DVACT_RESUME:
598        rv = nvme_resume(sc);
599        if (rv == 0)
600            rv = config_activate_children(&sc->sc_dev, act);
601        break;
602    default:
603        rv = config_activate_children(&sc->sc_dev, act);
604        break;
605    }
606
607    return (rv);
608}
609
610void
611nvme_scsi_cmd(struct scsi_xfer *xs)
612{
613    switch (xs->cmd.opcode) {
614    case READ_COMMAND:
615    case READ_10:
616    case READ_12:
617    case READ_16:
618        nvme_scsi_io(xs, SCSI_DATA_IN);
619        return;
620    case WRITE_COMMAND:
621    case WRITE_10:
622    case WRITE_12:
623    case WRITE_16:
624        nvme_scsi_io(xs, SCSI_DATA_OUT);
625        return;
626
627    case SYNCHRONIZE_CACHE:
628        nvme_scsi_sync(xs);
629        return;
630
631    case INQUIRY:
632        nvme_scsi_inq(xs);
633        return;
634    case READ_CAPACITY_16:
635        nvme_scsi_capacity16(xs);
636        return;
637    case READ_CAPACITY:
638        nvme_scsi_capacity(xs);
639        return;
640
641    case TEST_UNIT_READY:
642    case PREVENT_ALLOW:
643    case START_STOP:
644        xs->error = XS_NOERROR;
645        scsi_done(xs);
646        return;
647
648    default:
649        break;
650    }
651
652    xs->error = XS_DRIVER_STUFFUP;
653    scsi_done(xs);
654}
655
656void
657nvme_minphys(struct buf *bp, struct scsi_link *link)
658{
659    struct nvme_softc *sc = link->bus->sb_adapter_softc;
660
661    if (bp->b_bcount > sc->sc_mdts)
662        bp->b_bcount = sc->sc_mdts;
663}
664
665void
666nvme_scsi_io(struct scsi_xfer *xs, int dir)
667{
668    struct scsi_link *link = xs->sc_link;
669    struct nvme_softc *sc = link->bus->sb_adapter_softc;
670    struct nvme_ccb *ccb = xs->io;
671    bus_dmamap_t dmap = ccb->ccb_dmamap;
672    int i;
673
674    if ((xs->flags & (SCSI_DATA_IN|SCSI_DATA_OUT)) != dir)
675        goto stuffup;
676
677    ccb->ccb_done = nvme_scsi_io_done;
678    ccb->ccb_cookie = xs;
679
680    if (bus_dmamap_load(sc->sc_dmat, dmap,
681        xs->data, xs->datalen, NULL, ISSET(xs->flags, SCSI_NOSLEEP) ?
682        BUS_DMA_NOWAIT : BUS_DMA_WAITOK) != 0)
683        goto stuffup;
684
685    bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
686        ISSET(xs->flags, SCSI_DATA_IN) ?
687        BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
688
689    if (dmap->dm_nsegs > 2) {
690        for (i = 1; i < dmap->dm_nsegs; i++) {
691            htolem64(&ccb->ccb_prpl[i - 1],
692                dmap->dm_segs[i].ds_addr);
693        }
694        bus_dmamap_sync(sc->sc_dmat,
695            NVME_DMA_MAP(sc->sc_ccb_prpls),
696            ccb->ccb_prpl_off,
697            sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
698            BUS_DMASYNC_PREWRITE);
699    }
700
701    if (ISSET(xs->flags, SCSI_POLL)) {
702        nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_io_fill, xs->timeout);
703        return;
704    }
705
706    nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_io_fill);
707    return;
708
709stuffup:
710    xs->error = XS_DRIVER_STUFFUP;
711    scsi_done(xs);
712}
713
714void
715nvme_scsi_io_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
716{
717    struct nvme_sqe_io *sqe = slot;
718    struct scsi_xfer *xs = ccb->ccb_cookie;
719    struct scsi_link *link = xs->sc_link;
720    bus_dmamap_t dmap = ccb->ccb_dmamap;
721    u_int64_t lba;
722    u_int32_t blocks;
723
724    scsi_cmd_rw_decode(&xs->cmd, &lba, &blocks);
725
726    sqe->opcode = ISSET(xs->flags, SCSI_DATA_IN) ?
727        NVM_CMD_READ : NVM_CMD_WRITE;
728    htolem32(&sqe->nsid, link->target);
729
730    htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
731    switch (dmap->dm_nsegs) {
732    case 1:
733        break;
734    case 2:
735        htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
736        break;
737    default:
738        /* the prp list is already set up and synced */
739        htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
740        break;
741    }
742
743    htolem64(&sqe->slba, lba);
744    htolem16(&sqe->nlb, blocks - 1);
745}
746
747void
748nvme_scsi_io_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
749    struct nvme_cqe *cqe)
750{
751    struct scsi_xfer *xs = ccb->ccb_cookie;
752    bus_dmamap_t dmap = ccb->ccb_dmamap;
753    u_int16_t flags;
754
755    if (dmap->dm_nsegs > 2) {
756        bus_dmamap_sync(sc->sc_dmat,
757            NVME_DMA_MAP(sc->sc_ccb_prpls),
758            ccb->ccb_prpl_off,
759            sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
760            BUS_DMASYNC_POSTWRITE);
761    }
762
763    bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
764        ISSET(xs->flags, SCSI_DATA_IN) ?
765        BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
766
767    bus_dmamap_unload(sc->sc_dmat, dmap);
768
769    flags = lemtoh16(&cqe->flags);
770
771    xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
772        XS_NOERROR : XS_DRIVER_STUFFUP;
773    xs->status = SCSI_OK;
774    xs->resid = 0;
775    scsi_done(xs);
776}
777
778void
779nvme_scsi_sync(struct scsi_xfer *xs)
780{
781    struct scsi_link *link = xs->sc_link;
782    struct nvme_softc *sc = link->bus->sb_adapter_softc;
783    struct nvme_ccb *ccb = xs->io;
784
785    ccb->ccb_done = nvme_scsi_sync_done;
786    ccb->ccb_cookie = xs;
787
788    if (ISSET(xs->flags, SCSI_POLL)) {
789        nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_sync_fill, xs->timeout);
790        return;
791    }
792
793    nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_sync_fill);
794}
795
796void
797nvme_scsi_sync_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
798{
799    struct nvme_sqe *sqe = slot;
800    struct scsi_xfer *xs = ccb->ccb_cookie;
801    struct scsi_link *link = xs->sc_link;
802
803    sqe->opcode = NVM_CMD_FLUSH;
804    htolem32(&sqe->nsid, link->target);
805}
806
807void
808nvme_scsi_sync_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
809    struct nvme_cqe *cqe)
810{
811    struct scsi_xfer *xs = ccb->ccb_cookie;
812    u_int16_t flags;
813
814    flags = lemtoh16(&cqe->flags);
815
816    xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
817        XS_NOERROR : XS_DRIVER_STUFFUP;
818    xs->status = SCSI_OK;
819    xs->resid = 0;
820    scsi_done(xs);
821}
822
823void
824nvme_scsi_inq(struct scsi_xfer *xs)
825{
826    struct scsi_inquiry *inq = (struct scsi_inquiry *)&xs->cmd;
827
828    if (!ISSET(inq->flags, SI_EVPD)) {
829        nvme_scsi_inquiry(xs);
830        return;
831    }
832
833    switch (inq->pagecode) {
834    default:
835        /* printf("%s: %d\n", __func__, inq->pagecode); */
836        break;
837    }
838
839    xs->error = XS_DRIVER_STUFFUP;
840    scsi_done(xs);
841}
842
843void
844nvme_scsi_inquiry(struct scsi_xfer *xs)
845{
846    struct scsi_inquiry_data inq;
847    struct scsi_link *link = xs->sc_link;
848    struct nvme_softc *sc = link->bus->sb_adapter_softc;
849    struct nvm_identify_namespace *ns;
850
851    ns = sc->sc_namespaces[link->target].ident;
852
853    memset(&inq, 0, sizeof(inq));
854
855    inq.device = T_DIRECT;
856    inq.version = SCSI_REV_SPC4;
857    inq.response_format = SID_SCSI2_RESPONSE;
858    inq.additional_length = SID_SCSI2_ALEN;
859    inq.flags |= SID_CmdQue;
860    memcpy(inq.vendor, "NVMe    ", sizeof(inq.vendor));
861    memcpy(inq.product, sc->sc_identify.mn, sizeof(inq.product));
862    memcpy(inq.revision, sc->sc_identify.fr, sizeof(inq.revision));
863
864    scsi_copy_internal_data(xs, &inq, sizeof(inq));
865
866    xs->error = XS_NOERROR;
867    scsi_done(xs);
868}
869
870void
871nvme_scsi_capacity16(struct scsi_xfer *xs)
872{
873    struct scsi_read_cap_data_16 rcd;
874    struct scsi_link *link = xs->sc_link;
875    struct nvme_softc *sc = link->bus->sb_adapter_softc;
876    struct nvm_identify_namespace *ns;
877    struct nvm_namespace_format *f;
878    u_int64_t addr;
879    u_int16_t tpe = READ_CAP_16_TPE;
880
881    ns = sc->sc_namespaces[link->target].ident;
882
883    if (xs->cmdlen != sizeof(struct scsi_read_capacity_16)) {
884        xs->error = XS_DRIVER_STUFFUP;
885        scsi_done(xs);
886        return;
887    }
888
889    addr = nvme_scsi_size(ns) - 1;
890    f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
891
892    memset(&rcd, 0, sizeof(rcd));
893    _lto8b(addr, rcd.addr);
894    _lto4b(1 << f->lbads, rcd.length);
895    _lto2b(tpe, rcd.lowest_aligned);
896
897    memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
898
899    xs->error = XS_NOERROR;
900    scsi_done(xs);
901}
902
903void
904nvme_scsi_capacity(struct scsi_xfer *xs)
905{
906    struct scsi_read_cap_data rcd;
907    struct scsi_link *link = xs->sc_link;
908    struct nvme_softc *sc = link->bus->sb_adapter_softc;
909    struct nvm_identify_namespace *ns;
910    struct nvm_namespace_format *f;
911    u_int64_t addr;
912
913    ns = sc->sc_namespaces[link->target].ident;
914
915    if (xs->cmdlen != sizeof(struct scsi_read_capacity)) {
916        xs->error = XS_DRIVER_STUFFUP;
917        scsi_done(xs);
918        return;
919    }
920
921    addr = nvme_scsi_size(ns) - 1;
922    if (addr > 0xffffffff)
923        addr = 0xffffffff;
924
925    f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
926
927    memset(&rcd, 0, sizeof(rcd));
928    _lto4b(addr, rcd.addr);
929    _lto4b(1 << f->lbads, rcd.length);
930
931    memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
932
933    xs->error = XS_NOERROR;
934    scsi_done(xs);
935}
936
937void
938nvme_scsi_free(struct scsi_link *link)
939{
940    struct nvme_softc *sc = link->bus->sb_adapter_softc;
941    struct nvm_identify_namespace *identify;
942
943    identify = sc->sc_namespaces[link->target].ident;
944    sc->sc_namespaces[link->target].ident = NULL;
945
946    free(identify, M_DEVBUF, sizeof(*identify));
947}
948
949uint64_t
950nvme_scsi_size(const struct nvm_identify_namespace *ns)
951{
952    uint64_t        ncap, nsze;
953
954    ncap = lemtoh64(&ns->ncap); /* Max allowed allocation. */
955    nsze = lemtoh64(&ns->nsze);
956
957    if ((ns->nsfeat & NVME_ID_NS_NSFEAT_THIN_PROV) && ncap < nsze)
958        return ncap;
959    else
960        return nsze;
961}
962
963int
964nvme_passthrough_cmd(struct nvme_softc *sc, struct nvme_pt_cmd *pt, int dv_unit,
965    int nsid)
966{
967    struct nvme_pt_status        pt_status;
968    struct nvme_sqe          sqe;
969    struct nvme_dmamem      *mem = NULL;
970    struct nvme_ccb         *ccb = NULL;
971    int              flags;
972    int              rv = 0;
973
974    ccb = nvme_ccb_get(sc);
975    if (ccb == NULL)
976        panic("nvme_passthrough_cmd: nvme_ccb_get returned NULL");
977
978    memset(&sqe, 0, sizeof(sqe));
979    sqe.opcode = pt->pt_opcode;
980    htolem32(&sqe.nsid, pt->pt_nsid);
981    htolem32(&sqe.cdw10, pt->pt_cdw10);
982    htolem32(&sqe.cdw11, pt->pt_cdw11);
983    htolem32(&sqe.cdw12, pt->pt_cdw12);
984    htolem32(&sqe.cdw13, pt->pt_cdw13);
985    htolem32(&sqe.cdw14, pt->pt_cdw14);
986    htolem32(&sqe.cdw15, pt->pt_cdw15);
987
988    ccb->ccb_done = nvme_empty_done;
989    ccb->ccb_cookie = &sqe;
990
991    switch (pt->pt_opcode) {
992    case NVM_ADMIN_IDENTIFY:
993    case NVM_ADMIN_GET_LOG_PG:
994    case NVM_ADMIN_SELFTEST:
995        break;
996
997    default:
998        rv = ENOTTY;
999        goto done;
1000    }
1001
1002    if (pt->pt_databuflen > 0) {
1003        mem = nvme_dmamem_alloc(sc, pt->pt_databuflen);
1004        if (mem == NULL) {
1005            rv = ENOMEM;
1006            goto done;
1007        }
1008        htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
1009        nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
1010    }
1011
1012    flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_PT);
1013
1014    if (pt->pt_databuflen > 0) {
1015        nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
1016        if (flags == 0)
1017            rv = copyout(NVME_DMA_KVA(mem), pt->pt_databuf,
1018                pt->pt_databuflen);
1019    }
1020
1021    if (rv == 0 && pt->pt_statuslen > 0) {
1022        pt_status.ps_dv_unit = dv_unit;
1023        pt_status.ps_nsid = nsid;
1024        pt_status.ps_flags = flags;
1025        pt_status.ps_cc = nvme_read4(sc, NVME_CC);
1026        pt_status.ps_csts = nvme_read4(sc, NVME_CSTS);
1027        rv = copyout(&pt_status, pt->pt_status, pt->pt_statuslen);
1028    }
1029
1030 done:
1031    if (mem)
1032        nvme_dmamem_free(sc, mem);
1033    if (ccb)
1034        nvme_ccb_put(sc, ccb);
1035
1036    return rv;
1037}
1038
1039int
1040nvme_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
1041{
1042    struct nvme_softc       *sc = link->bus->sb_adapter_softc;
1043    struct nvme_pt_cmd      *pt = (struct nvme_pt_cmd *)addr;
1044    int              rv;
1045
1046    switch (cmd) {
1047    case NVME_PASSTHROUGH_CMD:
1048        break;
1049    default:
1050        return ENOTTY;
1051    }
1052
1053    if ((pt->pt_cdw10 & 0xff) == 0)
1054        pt->pt_nsid = link->target;
1055
1056    rv = nvme_passthrough_cmd(sc, pt, sc->sc_dev.dv_unit, link->target);
1057    if (rv)
1058        goto done;
1059
1060 done:
1061    return rv;
1062}
1063
1064uint32_t
1065nvme_op_sq_enter(struct nvme_softc *sc,
1066    struct nvme_queue *q, struct nvme_ccb *ccb)
1067{
1068    mtx_enter(&q->q_sq_mtx);
1069    return (nvme_op_sq_enter_locked(sc, q, ccb));
1070}
1071
1072uint32_t
1073nvme_op_sq_enter_locked(struct nvme_softc *sc,
1074    struct nvme_queue *q, struct nvme_ccb *ccb)
1075{
1076    return (q->q_sq_tail);
1077}
1078
1079void
1080nvme_op_sq_leave_locked(struct nvme_softc *sc,
1081    struct nvme_queue *q, struct nvme_ccb *ccb)
1082{
1083    uint32_t tail;
1084
1085    tail = ++q->q_sq_tail;
1086    if (tail >= q->q_entries)
1087        tail = 0;
1088    q->q_sq_tail = tail;
1089    nvme_write4(sc, q->q_sqtdbl, tail);
1090}
1091
1092void
1093nvme_op_sq_leave(struct nvme_softc *sc,
1094    struct nvme_queue *q, struct nvme_ccb *ccb)
1095{
1096    nvme_op_sq_leave_locked(sc, q, ccb);
1097    mtx_leave(&q->q_sq_mtx);
1098}
1099
1100void
1101nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
1102    void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *))
1103{
1104    struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem);
1105    u_int32_t tail;
1106
1107    tail = sc->sc_ops->op_sq_enter(sc, q, ccb);
1108
1109    sqe += tail;
1110
1111    bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1112        sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
1113    memset(sqe, 0, sizeof(*sqe));
1114    (*fill)(sc, ccb, sqe);
1115    sqe->cid = ccb->ccb_id;
1116    bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1117        sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
1118
1119    sc->sc_ops->op_sq_leave(sc, q, ccb);
1120}
1121
1122struct nvme_poll_state {
1123    struct nvme_sqe s;
1124    struct nvme_cqe c;
1125};
1126
1127int
1128nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
1129    void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *), u_int32_t ms)
1130{
1131    struct nvme_poll_state state;
1132    void (*done)(struct nvme_softc *, struct nvme_ccb *, struct nvme_cqe *);
1133    void *cookie;
1134    int64_t us;
1135    u_int16_t flags;
1136
1137    memset(&state, 0, sizeof(state));
1138    (*fill)(sc, ccb, &state.s);
1139
1140    done = ccb->ccb_done;
1141    cookie = ccb->ccb_cookie;
1142
1143    ccb->ccb_done = nvme_poll_done;
1144    ccb->ccb_cookie = &state;
1145
1146    nvme_q_submit(sc, q, ccb, nvme_poll_fill);
1147    for (us = ms * 1000; ms == 0 || us > 0; us -= NVME_TIMO_DELAYNS) {
1148        if (ISSET(state.c.flags, htole16(NVME_CQE_PHASE)))
1149            break;
1150        if (nvme_q_complete(sc, q) == 0)
1151            delay(NVME_TIMO_DELAYNS);
1152        nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
1153    }
1154
1155    ccb->ccb_cookie = cookie;
1156    done(sc, ccb, &state.c);
1157
1158    flags = lemtoh16(&state.c.flags);
1159
1160    return (flags & ~NVME_CQE_PHASE);
1161}
1162
1163void
1164nvme_poll_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1165{
1166    struct nvme_sqe *sqe = slot;
1167    struct nvme_poll_state *state = ccb->ccb_cookie;
1168
1169    *sqe = state->s;
1170}
1171
1172void
1173nvme_poll_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
1174    struct nvme_cqe *cqe)
1175{
1176    struct nvme_poll_state *state = ccb->ccb_cookie;
1177
1178    state->c = *cqe;
1179    SET(state->c.flags, htole16(NVME_CQE_PHASE));
1180}
1181
1182void
1183nvme_sqe_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1184{
1185    struct nvme_sqe *src = ccb->ccb_cookie;
1186    struct nvme_sqe *dst = slot;
1187
1188    *dst = *src;
1189}
1190
1191void
1192nvme_empty_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
1193    struct nvme_cqe *cqe)
1194{
1195}
1196
1197void
1198nvme_op_cq_done(struct nvme_softc *sc,
1199    struct nvme_queue *q, struct nvme_ccb *ccb)
1200{
1201    /* nop */
1202}
1203
1204int
1205nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q)
1206{
1207    struct nvme_ccb *ccb;
1208    struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
1209    u_int32_t head;
1210    u_int16_t flags;
1211    int rv = 0;
1212
1213    if (!mtx_enter_try(&q->q_cq_mtx))
1214        return (-1);
1215
1216    head = q->q_cq_head;
1217
1218    nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1219    for (;;) {
1220        cqe = &ring[head];
1221        flags = lemtoh16(&cqe->flags);
1222        if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
1223            break;
1224
1225        membar_consumer();
1226
1227        ccb = &sc->sc_ccbs[cqe->cid];
1228        sc->sc_ops->op_cq_done(sc, q, ccb);
1229        ccb->ccb_done(sc, ccb, cqe);
1230
1231        if (++head >= q->q_entries) {
1232            head = 0;
1233            q->q_cq_phase ^= NVME_CQE_PHASE;
1234        }
1235
1236        rv = 1;
1237    }
1238    nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1239
1240    if (rv)
1241        nvme_write4(sc, q->q_cqhdbl, q->q_cq_head = head);
1242    mtx_leave(&q->q_cq_mtx);
1243
1244    return (rv);
1245}
1246
1247int
1248nvme_identify(struct nvme_softc *sc, u_int mpsmin)
1249{
1250    char sn[41], mn[81], fr[17];
1251    struct nvm_identify_controller *identify;
1252    struct nvme_dmamem *mem;
1253    struct nvme_ccb *ccb;
1254    int rv = 1;
1255
1256    ccb = nvme_ccb_get(sc);
1257    if (ccb == NULL)
1258        panic("nvme_identify: nvme_ccb_get returned NULL");
1259
1260    mem = nvme_dmamem_alloc(sc, sizeof(*identify));
1261    if (mem == NULL)
1262        return (1);
1263
1264    ccb->ccb_done = nvme_empty_done;
1265    ccb->ccb_cookie = mem;
1266
1267    nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
1268    rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify,
1269        NVME_TIMO_IDENT);
1270    nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
1271
1272    nvme_ccb_put(sc, ccb);
1273
1274    if (rv != 0)
1275        goto done;
1276
1277    identify = NVME_DMA_KVA(mem);
1278
1279    scsi_strvis(sn, identify->sn, sizeof(identify->sn));
1280    scsi_strvis(mn, identify->mn, sizeof(identify->mn));
1281    scsi_strvis(fr, identify->fr, sizeof(identify->fr));
1282
1283    printf("%s: %s, firmware %s, serial %s\n", DEVNAME(sc), mn, fr, sn);
1284
1285    if (identify->mdts > 0) {
1286        sc->sc_mdts = (1 << identify->mdts) * (1 << mpsmin);
1287        if (sc->sc_mdts > NVME_MAXPHYS)
1288            sc->sc_mdts = NVME_MAXPHYS;
1289        sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
1290    }
1291
1292    sc->sc_nn = lemtoh32(&identify->nn);
1293
1294    /*
1295     * At least one Apple NVMe device presents a second, bogus disk that is
1296     * inaccessible, so cap targets at 1.
1297     *
1298     * sd1 at scsibus1 targ 2 lun 0: <NVMe, APPLE SSD AP0512, 16.1> [..]
1299     * sd1: 0MB, 4096 bytes/sector, 2 sectors
1300     */
1301    if (sc->sc_nn > 1 &&
1302        mn[0] == 'A' && mn[1] == 'P' && mn[2] == 'P' && mn[3] == 'L' &&
1303        mn[4] == 'E')
1304        sc->sc_nn = 1;
1305
1306    memcpy(&sc->sc_identify, identify, sizeof(sc->sc_identify));
1307
1308done:
1309    nvme_dmamem_free(sc, mem);
1310
1311    return (rv);
1312}
1313
1314int
1315nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q)
1316{
1317    struct nvme_sqe_q sqe;
1318    struct nvme_ccb *ccb;
1319    int rv;
1320
1321    ccb = scsi_io_get(&sc->sc_iopool, 0);
1322    KASSERT(ccb != NULL);
1323
1324    ccb->ccb_done = nvme_empty_done;
1325    ccb->ccb_cookie = &sqe;
1326
1327    memset(&sqe, 0, sizeof(sqe));
1328    sqe.opcode = NVM_ADMIN_ADD_IOCQ;
1329    htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem));
1330    htolem16(&sqe.qsize, q->q_entries - 1);
1331    htolem16(&sqe.qid, q->q_id);
1332    sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1333
1334    rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1335    if (rv != 0)
1336        goto fail;
1337
1338    ccb->ccb_done = nvme_empty_done;
1339    ccb->ccb_cookie = &sqe;
1340
1341    memset(&sqe, 0, sizeof(sqe));
1342    sqe.opcode = NVM_ADMIN_ADD_IOSQ;
1343    htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem));
1344    htolem16(&sqe.qsize, q->q_entries - 1);
1345    htolem16(&sqe.qid, q->q_id);
1346    htolem16(&sqe.cqid, q->q_id);
1347    sqe.qflags = NVM_SQE_Q_PC;
1348
1349    rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1350    if (rv != 0)
1351        goto fail;
1352
1353fail:
1354    scsi_io_put(&sc->sc_iopool, ccb);
1355    return (rv);
1356}
1357
1358int
1359nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q)
1360{
1361    struct nvme_sqe_q sqe;
1362    struct nvme_ccb *ccb;
1363    int rv;
1364
1365    ccb = scsi_io_get(&sc->sc_iopool, 0);
1366    KASSERT(ccb != NULL);
1367
1368    ccb->ccb_done = nvme_empty_done;
1369    ccb->ccb_cookie = &sqe;
1370
1371    memset(&sqe, 0, sizeof(sqe));
1372    sqe.opcode = NVM_ADMIN_DEL_IOSQ;
1373    htolem16(&sqe.qid, q->q_id);
1374
1375    rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1376    if (rv != 0)
1377        goto fail;
1378
1379    ccb->ccb_done = nvme_empty_done;
1380    ccb->ccb_cookie = &sqe;
1381
1382    memset(&sqe, 0, sizeof(sqe));
1383    sqe.opcode = NVM_ADMIN_DEL_IOCQ;
1384    htolem16(&sqe.qid, q->q_id);
1385
1386    rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1387    if (rv != 0)
1388        goto fail;
1389
1390    nvme_q_free(sc, q);
1391
1392fail:
1393    scsi_io_put(&sc->sc_iopool, ccb);
1394    return (rv);
1395
1396}
1397
1398void
1399nvme_fill_identify(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1400{
1401    struct nvme_sqe *sqe = slot;
1402    struct nvme_dmamem *mem = ccb->ccb_cookie;
1403
1404    sqe->opcode = NVM_ADMIN_IDENTIFY;
1405    htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem));
1406    htolem32(&sqe->cdw10, 1);
1407}
1408
1409int
1410nvme_ccbs_alloc(struct nvme_softc *sc, u_int nccbs)
1411{
1412    struct nvme_ccb *ccb;
1413    bus_addr_t off;
1414    u_int64_t *prpl;
1415    u_int i;
1416
1417    sc->sc_ccbs = mallocarray(nccbs, sizeof(*ccb), M_DEVBUF,
1418        M_WAITOK | M_CANFAIL);
1419    if (sc->sc_ccbs == NULL)
1420        return (1);
1421
1422    sc->sc_ccb_prpls = nvme_dmamem_alloc(sc,
1423        sizeof(*prpl) * sc->sc_max_prpl * nccbs);
1424
1425    prpl = NVME_DMA_KVA(sc->sc_ccb_prpls);
1426    off = 0;
1427
1428    for (i = 0; i < nccbs; i++) {
1429        ccb = &sc->sc_ccbs[i];
1430
1431        if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts,
1432            sc->sc_max_prpl + 1, /* we get a free prp in the sqe */
1433            sc->sc_mps, sc->sc_mps,
1434            BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
1435            &ccb->ccb_dmamap) != 0)
1436            goto free_maps;
1437
1438        ccb->ccb_id = i;
1439        ccb->ccb_prpl = prpl;
1440        ccb->ccb_prpl_off = off;
1441        ccb->ccb_prpl_dva = NVME_DMA_DVA(sc->sc_ccb_prpls) + off;
1442
1443        SIMPLEQ_INSERT_TAIL(&sc->sc_ccb_list, ccb, ccb_entry);
1444
1445        prpl += sc->sc_max_prpl;
1446        off += sizeof(*prpl) * sc->sc_max_prpl;
1447    }
1448
1449    return (0);
1450
1451free_maps:
1452    nvme_ccbs_free(sc, nccbs);
1453    return (1);
1454}
1455
1456void *
1457nvme_ccb_get(void *cookie)
1458{
1459    struct nvme_softc *sc = cookie;
1460    struct nvme_ccb *ccb;
1461
1462    mtx_enter(&sc->sc_ccb_mtx);
1463    ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list);
1464    if (ccb != NULL)
1465        SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1466    mtx_leave(&sc->sc_ccb_mtx);
1467
1468    return (ccb);
1469}
1470
1471void
1472nvme_ccb_put(void *cookie, void *io)
1473{
1474    struct nvme_softc *sc = cookie;
1475    struct nvme_ccb *ccb = io;
1476
1477    mtx_enter(&sc->sc_ccb_mtx);
1478    SIMPLEQ_INSERT_HEAD(&sc->sc_ccb_list, ccb, ccb_entry);
1479    mtx_leave(&sc->sc_ccb_mtx);
1480}
1481
1482void
1483nvme_ccbs_free(struct nvme_softc *sc, unsigned int nccbs)
1484{
1485    struct nvme_ccb *ccb;
1486
1487    while ((ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list)) != NULL) {
1488        SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1489        bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap);
1490    }
1491
1492    nvme_dmamem_free(sc, sc->sc_ccb_prpls);
1493    free(sc->sc_ccbs, M_DEVBUF, nccbs * sizeof(*ccb));
1494}
1495
1496struct nvme_queue *
1497nvme_q_alloc(struct nvme_softc *sc, u_int16_t id, u_int entries, u_int dstrd)
1498{
1499    struct nvme_queue *q;
1500
1501    q = malloc(sizeof(*q), M_DEVBUF, M_WAITOK | M_CANFAIL);
1502    if (q == NULL)
1503        return (NULL);
1504
1505    q->q_sq_dmamem = nvme_dmamem_alloc(sc,
1506        sizeof(struct nvme_sqe) * entries);
1507    if (q->q_sq_dmamem == NULL)
1508        goto free;
1509
1510    q->q_cq_dmamem = nvme_dmamem_alloc(sc,
1511        sizeof(struct nvme_cqe) * entries);
1512    if (q->q_cq_dmamem == NULL)
1513        goto free_sq;
1514
1515    memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1516    memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1517
1518    mtx_init(&q->q_sq_mtx, IPL_BIO);
1519    mtx_init(&q->q_cq_mtx, IPL_BIO);
1520    q->q_sqtdbl = NVME_SQTDBL(id, dstrd);
1521    q->q_cqhdbl = NVME_CQHDBL(id, dstrd);
1522
1523    q->q_id = id;
1524    q->q_entries = entries;
1525    q->q_sq_tail = 0;
1526    q->q_cq_head = 0;
1527    q->q_cq_phase = NVME_CQE_PHASE;
1528
1529    if (sc->sc_ops->op_q_alloc != NULL) {
1530        if (sc->sc_ops->op_q_alloc(sc, q) != 0)
1531            goto free_cq;
1532    }
1533
1534    nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1535    nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1536
1537    return (q);
1538
1539free_cq:
1540    nvme_dmamem_free(sc, q->q_cq_dmamem);
1541free_sq:
1542    nvme_dmamem_free(sc, q->q_sq_dmamem);
1543free:
1544    free(q, M_DEVBUF, sizeof *q);
1545
1546    return (NULL);
1547}
1548
1549int
1550nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q)
1551{
1552    memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1553    memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1554
1555    q->q_sq_tail = 0;
1556    q->q_cq_head = 0;
1557    q->q_cq_phase = NVME_CQE_PHASE;
1558
1559    nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1560    nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1561
1562    return (0);
1563}
1564
1565void
1566nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q)
1567{
1568    nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1569    nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE);
1570
1571    if (sc->sc_ops->op_q_free != NULL)
1572        sc->sc_ops->op_q_free(sc, q);
1573
1574    nvme_dmamem_free(sc, q->q_cq_dmamem);
1575    nvme_dmamem_free(sc, q->q_sq_dmamem);
1576    free(q, M_DEVBUF, sizeof *q);
1577}
1578
1579int
1580nvme_intr(void *xsc)
1581{
1582    struct nvme_softc *sc = xsc;
1583    int rv = 0;
1584
1585    if (nvme_q_complete(sc, sc->sc_q))
1586        rv = 1;
1587    if (nvme_q_complete(sc, sc->sc_admin_q))
1588        rv = 1;
1589
1590    return (rv);
1591}
1592
1593int
1594nvme_intr_intx(void *xsc)
1595{
1596    struct nvme_softc *sc = xsc;
1597    int rv;
1598
1599    nvme_write4(sc, NVME_INTMS, 1);
1600    rv = nvme_intr(sc);
1601    nvme_write4(sc, NVME_INTMC, 1);
1602
1603    return (rv);
1604}
1605
1606struct nvme_dmamem *
1607nvme_dmamem_alloc(struct nvme_softc *sc, size_t size)
1608{
1609    struct nvme_dmamem *ndm;
1610    int nsegs;
1611
1612    ndm = malloc(sizeof(*ndm), M_DEVBUF, M_WAITOK | M_ZERO);
1613    if (ndm == NULL)
1614        return (NULL);
1615
1616    ndm->ndm_size = size;
1617
1618    if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
1619        BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
1620        &ndm->ndm_map) != 0)
1621        goto ndmfree;
1622
1623    if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg,
1624        1, &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_64BIT) != 0)
1625        goto destroy;
1626
1627    if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size,
1628        &ndm->ndm_kva, BUS_DMA_WAITOK) != 0)
1629        goto free;
1630
1631    if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size,
1632        NULL, BUS_DMA_WAITOK) != 0)
1633        goto unmap;
1634
1635    return (ndm);
1636
1637unmap:
1638    bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size);
1639free:
1640    bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1641destroy:
1642    bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1643ndmfree:
1644    free(ndm, M_DEVBUF, sizeof *ndm);
1645
1646    return (NULL);
1647}
1648
1649void
1650nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops)
1651{
1652    bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem),
1653        0, NVME_DMA_LEN(mem), ops);
1654}
1655
1656void
1657nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm)
1658{
1659    bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map);
1660    bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size);
1661    bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1662    bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1663    free(ndm, M_DEVBUF, sizeof *ndm);
1664}
1665
1666#ifdef HIBERNATE
1667
1668int
1669nvme_hibernate_admin_cmd(struct nvme_softc *sc, struct nvme_sqe *sqe,
1670    struct nvme_cqe *cqe, int cid)
1671{
1672    struct nvme_sqe *asqe = NVME_DMA_KVA(sc->sc_admin_q->q_sq_dmamem);
1673    struct nvme_cqe *acqe = NVME_DMA_KVA(sc->sc_admin_q->q_cq_dmamem);
1674    struct nvme_queue *q = sc->sc_admin_q;
1675    int tail;
1676    u_int16_t flags;
1677
1678    /* submit command */
1679    tail = sc->sc_ops->op_sq_enter_locked(sc, q, /* XXX ccb */ NULL);
1680
1681    asqe += tail;
1682    bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1683        sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
1684    *asqe = *sqe;
1685    asqe->cid = cid;
1686    bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1687        sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
1688
1689    sc->sc_ops->op_sq_leave_locked(sc, q, /* XXX ccb */ NULL);
1690
1691    /* wait for completion */
1692    acqe += q->q_cq_head;
1693    for (;;) {
1694        nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1695        flags = lemtoh16(&acqe->flags);
1696        if ((flags & NVME_CQE_PHASE) == q->q_cq_phase)
1697            break;
1698
1699        delay(10);
1700    }
1701
1702    if (++q->q_cq_head >= q->q_entries) {
1703        q->q_cq_head = 0;
1704        q->q_cq_phase ^= NVME_CQE_PHASE;
1705    }
1706    nvme_write4(sc, q->q_cqhdbl, q->q_cq_head);
1707    if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) || (acqe->cid != cid))
1708        return (EIO);
1709
1710    return (0);
1711}
1712
1713int
1714nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size,
1715    int op, void *page)
1716{
1717    struct nvme_hibernate_page {
1718        u_int64_t       prpl[MAXPHYS / PAGE_SIZE];
1719
1720        struct nvme_softc   *sc;
1721        int         nsid;
1722        int         sq_tail;
1723        int         cq_head;
1724        int         cqe_phase;
1725
1726        daddr_t         poffset;
1727        size_t          psize;
1728        u_int32_t       secsize;
1729    } *my = page;
1730    struct nvme_sqe_io *isqe;
1731    struct nvme_cqe *icqe;
1732    paddr_t data_phys, page_phys;
1733    u_int64_t data_bus_phys, page_bus_phys;
1734    u_int16_t flags;
1735    int i;
1736    int error;
1737
1738    if (op == HIB_INIT) {
1739        struct device *disk;
1740        struct device *scsibus;
1741        struct nvm_identify_namespace *ns;
1742        struct nvm_namespace_format *f;
1743        extern struct cfdriver sd_cd;
1744        struct scsi_link *link;
1745        struct scsibus_softc *bus_sc;
1746        struct nvme_sqe_q qsqe;
1747        struct nvme_cqe qcqe;
1748
1749        /* find nvme softc */
1750        disk = disk_lookup(&sd_cd, DISKUNIT(dev));
1751        scsibus = disk->dv_parent;
1752        my->sc = (struct nvme_softc *)disk->dv_parent->dv_parent;
1753
1754        /* find scsi_link, which tells us the target */
1755        my->nsid = 0;
1756        bus_sc = (struct scsibus_softc *)scsibus;
1757        SLIST_FOREACH(link, &bus_sc->sc_link_list, bus_list) {
1758            if (link->device_softc == disk) {
1759                my->nsid = link->target;
1760                break;
1761            }
1762        }
1763        if (my->nsid == 0)
1764            return (EIO);
1765        ns = my->sc->sc_namespaces[my->nsid].ident;
1766        f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
1767
1768        my->poffset = blkno;
1769        my->psize = size;
1770        my->secsize = 1 << f->lbads;
1771
1772        memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem), 0,
1773            my->sc->sc_hib_q->q_entries * sizeof(struct nvme_cqe));
1774        memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem), 0,
1775            my->sc->sc_hib_q->q_entries * sizeof(struct nvme_sqe));
1776
1777        my->sq_tail = 0;
1778        my->cq_head = 0;
1779        my->cqe_phase = NVME_CQE_PHASE;
1780
1781        memset(&qsqe, 0, sizeof(qsqe));
1782        qsqe.opcode = NVM_ADMIN_ADD_IOCQ;
1783        htolem64(&qsqe.prp1,
1784            NVME_DMA_DVA(my->sc->sc_hib_q->q_cq_dmamem));
1785        htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1786        htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1787        qsqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1788        if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1789            &qcqe, 1) != 0)
1790            return (EIO);
1791
1792        memset(&qsqe, 0, sizeof(qsqe));
1793        qsqe.opcode = NVM_ADMIN_ADD_IOSQ;
1794        htolem64(&qsqe.prp1,
1795            NVME_DMA_DVA(my->sc->sc_hib_q->q_sq_dmamem));
1796        htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1797        htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1798        htolem16(&qsqe.cqid, my->sc->sc_hib_q->q_id);
1799        qsqe.qflags = NVM_SQE_Q_PC;
1800        if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1801            &qcqe, 2) != 0)
1802            return (EIO);
1803
1804        return (0);
1805    }
1806
1807    if (op != HIB_W)
1808        return (0);
1809
1810    if (blkno + (size / DEV_BSIZE) > my->psize)
1811        return E2BIG;
1812
1813    isqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem);
1814    isqe += my->sq_tail;
1815    if (++my->sq_tail == my->sc->sc_hib_q->q_entries)
1816        my->sq_tail = 0;
1817
1818    memset(isqe, 0, sizeof(*isqe));
1819    isqe->opcode = NVM_CMD_WRITE;
1820    htolem32(&isqe->nsid, my->nsid);
1821
1822    pmap_extract(pmap_kernel(), addr, &data_phys);
1823    data_bus_phys = data_phys;
1824    htolem64(&isqe->entry.prp[0], data_bus_phys);
1825    if ((size > my->sc->sc_mps) && (size <= my->sc->sc_mps * 2)) {
1826        htolem64(&isqe->entry.prp[1], data_bus_phys + my->sc->sc_mps);
1827    } else if (size > my->sc->sc_mps * 2) {
1828        pmap_extract(pmap_kernel(), (vaddr_t)page, &page_phys);
1829        page_bus_phys = page_phys;
1830        htolem64(&isqe->entry.prp[1], page_bus_phys +
1831            offsetof(struct nvme_hibernate_page, prpl));
1832        for (i = 1; i < howmany(size, my->sc->sc_mps); i++) {
1833            htolem64(&my->prpl[i - 1], data_bus_phys +
1834                (i * my->sc->sc_mps));
1835        }
1836    }
1837
1838    isqe->slba = (blkno + my->poffset) / (my->secsize / DEV_BSIZE);
1839    isqe->nlb = (size / my->secsize) - 1;
1840    isqe->cid = blkno % 0xffff;
1841
1842    nvme_write4(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1843        my->sq_tail);
1844    nvme_barrier(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd), 4,
1845        BUS_SPACE_BARRIER_WRITE);
1846
1847    error = 0;
1848
1849    icqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem);
1850    icqe += my->cq_head;
1851
1852    nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem,
1853        BUS_DMASYNC_POSTREAD);
1854    for (;;) {
1855        flags = lemtoh16(&icqe->flags);
1856        if ((flags & NVME_CQE_PHASE) == my->cqe_phase) {
1857            if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) ||
1858                (icqe->cid != blkno % 0xffff))
1859                error = EIO;
1860
1861            break;
1862        }
1863
1864        delay(1);
1865        nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem,
1866            BUS_DMASYNC_PREREAD|BUS_DMASYNC_POSTREAD);
1867    }
1868    nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem,
1869        BUS_DMASYNC_PREREAD);
1870
1871    if (++my->cq_head == my->sc->sc_hib_q->q_entries) {
1872        my->cq_head = 0;
1873        my->cqe_phase ^= NVME_CQE_PHASE;
1874    }
1875
1876    nvme_write4(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1877        my->cq_head);
1878    nvme_barrier(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd), 4,
1879        BUS_SPACE_BARRIER_WRITE);
1880
1881    return (error);
1882}
1883
1884#endif
1885
1886#if NBIO > 0
1887int
1888nvme_bioctl(struct device *self, u_long cmd, caddr_t data)
1889{
1890    struct nvme_softc   *sc = (struct nvme_softc *)self;
1891    struct nvme_pt_cmd  *pt;
1892    int          error = 0;
1893
1894    rw_enter_write(&sc->sc_lock);
1895
1896    switch (cmd) {
1897    case BIOCINQ:
1898        error = nvme_bioctl_inq(sc, (struct bioc_inq *)data);
1899        break;
1900    case BIOCVOL:
1901        error = nvme_bioctl_vol(sc, (struct bioc_vol *)data);
1902        break;
1903    case BIOCDISK:
1904        error = nvme_bioctl_disk(sc, (struct bioc_disk *)data);
1905        break;
1906    case NVME_PASSTHROUGH_CMD:
1907        pt = (struct nvme_pt_cmd *)data;
1908        error = nvme_passthrough_cmd(sc, pt, sc->sc_dev.dv_unit, -1);
1909        break;
1910    default:
1911        printf("nvme_bioctl() Unknown command (%lu)\n", cmd);
1912        error = ENOTTY;
1913    }
1914
1915    rw_exit_write(&sc->sc_lock);
1916
1917    return error;
1918}
1919
1920void
1921nvme_bio_status(struct bio_status *bs, const char *fmt, ...)
1922{
1923    va_list         ap;
1924
1925    va_start(ap, fmt);
1926    bio_status(bs, 0, BIO_MSG_INFO, fmt, &ap);
1927    va_end(ap);
1928}
1929
1930const char *
1931nvme_bioctl_sdname(const struct nvme_softc *sc, int target)
1932{
1933    const struct scsi_link      *link;
1934    const struct sd_softc       *sd;
1935
1936    link = scsi_get_link(sc->sc_scsibus, target, 0);
1937    if (link == NULL)
1938        return NULL;
1939    sd = (struct sd_softc *)(link->device_softc);
1940    if (ISSET(link->state, SDEV_S_DYING) || sd == NULL ||
1941        ISSET(sd->flags, SDF_DYING))
1942        return NULL;
1943
1944    if (nvme_read4(sc, NVME_VS) == 0xffffffff)
1945        return NULL;
1946
1947    return DEVNAME(sd);
1948}
1949
1950int
1951nvme_bioctl_inq(struct nvme_softc *sc, struct bioc_inq *bi)
1952{
1953    char                 sn[41], mn[81], fr[17];
1954    struct nvm_identify_controller  *idctrl = &sc->sc_identify;
1955    struct bio_status       *bs;
1956    unsigned int             nn;
1957    uint32_t             cc, csts, vs;
1958
1959    /* Don't tell bioctl about namespaces > last configured namespace. */
1960    for (nn = sc->sc_nn; nn > 0; nn--) {
1961        if (sc->sc_namespaces[nn].ident)
1962            break;
1963    }
1964    bi->bi_novol = bi->bi_nodisk = nn;
1965    strlcpy(bi->bi_dev, DEVNAME(sc), sizeof(bi->bi_dev));
1966
1967    bs = &bi->bi_bio.bio_status;
1968    bio_status_init(bs, &sc->sc_dev);
1969    bs->bs_status = BIO_STATUS_SUCCESS;
1970
1971    scsi_strvis(sn, idctrl->sn, sizeof(idctrl->sn));
1972    scsi_strvis(mn, idctrl->mn, sizeof(idctrl->mn));
1973    scsi_strvis(fr, idctrl->fr, sizeof(idctrl->fr));
1974
1975    nvme_bio_status(bs, "%s, %s, %s", mn, fr, sn);
1976    nvme_bio_status(bs, "Max i/o %zu bytes%s%s%s, Sanitize 0x%b",
1977        sc->sc_mdts,
1978        ISSET(idctrl->lpa, NVM_ID_CTRL_LPA_PE) ?
1979        ", Persistent Event Log" : "",
1980        ISSET(idctrl->fna, NVM_ID_CTRL_FNA_CRYPTOFORMAT) ?
1981        ", CryptoFormat" : "",
1982        ISSET(idctrl->vwc, NVM_ID_CTRL_VWC_PRESENT) ?
1983        ", Volatile Write Cache" : "",
1984        lemtoh32(&idctrl->sanicap), NVM_ID_CTRL_SANICAP_FMT
1985    );
1986
1987    if (idctrl->ctratt != 0)
1988        nvme_bio_status(bs, "Features 0x%b", lemtoh32(&idctrl->ctratt),
1989            NVM_ID_CTRL_CTRATT_FMT);
1990
1991    if (idctrl->oacs || idctrl->oncs) {
1992        nvme_bio_status(bs, "Admin commands 0x%b, NVM commands 0x%b",
1993            lemtoh16(&idctrl->oacs), NVM_ID_CTRL_OACS_FMT,
1994            lemtoh16(&idctrl->oncs), NVM_ID_CTRL_ONCS_FMT);
1995    }
1996
1997    cc = nvme_read4(sc, NVME_CC);
1998    csts = nvme_read4(sc, NVME_CSTS);
1999    vs = nvme_read4(sc, NVME_VS);
2000
2001    if (vs == 0xffffffff) {
2002        nvme_bio_status(bs, "Invalid PCIe register mapping");
2003        return 0;
2004    }
2005
2006    nvme_bio_status(bs, "NVMe %u.%u%s%s%sabled, %sReady%s%s%s%s",
2007        NVME_VS_MJR(vs), NVME_VS_MNR(vs),
2008        (NVME_CC_CSS_R(cc) == NVME_CC_CSS_NVM) ? ", NVM I/O command set" : "",
2009        (NVME_CC_CSS_R(cc) == 0x7) ? ", Admin command set only" : "",
2010        ISSET(cc, NVME_CC_EN) ? ", En" : "Dis",
2011        ISSET(csts, NVME_CSTS_RDY) ? "" : "Not ",
2012        ISSET(csts, NVME_CSTS_CFS) ? ", Fatal Error, " : "",
2013        (NVME_CC_SHN_R(cc) == NVME_CC_SHN_NORMAL) ? ", Normal shutdown" : "",
2014        (NVME_CC_SHN_R(cc) == NVME_CC_SHN_ABRUPT) ? ", Abrupt shutdown" : "",
2015        ISSET(csts, NVME_CSTS_SHST_DONE) ? " complete" : "");
2016
2017    return 0;
2018}
2019
2020int
2021nvme_bioctl_vol(struct nvme_softc *sc, struct bioc_vol *bv)
2022{
2023    const struct nvm_identify_namespace *idns;
2024    const char              *sd;
2025    int                  target;
2026    unsigned int                 lbaf;
2027
2028    target = bv->bv_volid + 1;
2029    if (target > sc->sc_nn) {
2030        bv->bv_status = BIOC_SVINVALID;
2031        return 0;
2032    }
2033
2034    bv->bv_level = 'c';
2035    bv->bv_nodisk = 1;
2036
2037    idns = sc->sc_namespaces[target].ident;
2038    if (idns == NULL) {
2039        bv->bv_status = BIOC_SVINVALID;
2040        return 0;
2041    }
2042
2043    lbaf = NVME_ID_NS_FLBAS(idns->flbas);
2044    if (idns->nlbaf > 16)
2045        lbaf |= (idns->flbas >> 1) & 0x3f;
2046    bv->bv_size = nvme_scsi_size(idns) << idns->lbaf[lbaf].lbads;
2047
2048    sd = nvme_bioctl_sdname(sc, target);
2049    if (sd) {
2050        strlcpy(bv->bv_dev, sd, sizeof(bv->bv_dev));
2051        bv->bv_status = BIOC_SVONLINE;
2052    } else
2053        bv->bv_status = BIOC_SVOFFLINE;
2054
2055    return 0;
2056}
2057
2058int
2059nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_disk *bd)
2060{
2061    const char          *rpdesc[4] = {
2062        " (Best)",
2063        " (Better)",
2064        " (Good)",
2065        " (Degraded)"
2066    };
2067    const char          *protection[4] = {
2068        "not enabled",
2069        "Type 1",
2070        "Type 2",
2071        "Type 3",
2072    };
2073    char                 buf[32], msg[BIO_MSG_LEN];
2074    struct nvm_identify_namespace   *idns;
2075    struct bio_status       *bs;
2076    uint64_t             id1, id2;
2077    unsigned int             i, lbaf, target;
2078    uint16_t             ms;
2079    uint8_t              dps;
2080
2081    target = bd->bd_volid + 1;
2082    if (target > sc->sc_nn)
2083        return EINVAL;
2084    bd->bd_channel = sc->sc_scsibus->sc_dev.dv_unit;
2085    bd->bd_target = target;
2086    bd->bd_lun = 0;
2087    snprintf(bd->bd_procdev, sizeof(bd->bd_procdev), "Namespace %u", target);
2088
2089    bs = &bd->bd_bio.bio_status;
2090    bs->bs_status = BIO_STATUS_SUCCESS;
2091    snprintf(bs->bs_controller, sizeof(bs->bs_controller), "%11u",
2092        bd->bd_diskid);
2093
2094    idns = sc->sc_namespaces[target].ident;
2095    if (idns == NULL) {
2096        bd->bd_status = BIOC_SDUNUSED;
2097        return 0;
2098    }
2099
2100    lbaf = NVME_ID_NS_FLBAS(idns->flbas);
2101    if (idns->nlbaf > nitems(idns->lbaf))
2102        lbaf |= (idns->flbas >> 1) & 0x3f;
2103    bd->bd_size = lemtoh64(&idns->nsze) << idns->lbaf[lbaf].lbads;
2104
2105    if (memcmp(idns->nguid, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) {
2106        memcpy(&id1, idns->nguid, sizeof(uint64_t));
2107        memcpy(&id2, idns->nguid + sizeof(uint64_t), sizeof(uint64_t));
2108        snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx%08llx",
2109            id1, id2);
2110    } else if (memcmp(idns->eui64, "\0\0\0\0\0\0\0\0", 8)) {
2111        memcpy(&id1, idns->eui64, sizeof(uint64_t));
2112        snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx", id1);
2113    }
2114
2115    msg[0] = '\0';
2116    for (i = 0; i <= idns->nlbaf; i++) {
2117        if (idns->lbaf[i].lbads == 0)
2118            continue;
2119        snprintf(buf, sizeof(buf), "%s%s%u",
2120            strlen(msg) ? ", " : "", (i == lbaf) ? "*" : "",
2121            1 << idns->lbaf[i].lbads);
2122        strlcat(msg, buf, sizeof(msg));
2123        ms = lemtoh16(&idns->lbaf[i].ms);
2124        if (ms) {
2125            snprintf(buf, sizeof(buf), "+%u", ms);
2126            strlcat(msg, buf, sizeof(msg));
2127        }
2128        strlcat(msg, rpdesc[idns->lbaf[i].rp], sizeof(msg));
2129    }
2130    nvme_bio_status(bs, "Formats %s", msg);
2131
2132    if (idns->nsfeat)
2133        nvme_bio_status(bs, "Features 0x%b", idns->nsfeat,
2134            NVME_ID_NS_NSFEAT_FMT);
2135
2136    if (idns->dps) {
2137        dps = idns->dps;
2138        snprintf(msg, sizeof(msg), "Data Protection (0x%02x) "
2139            "Protection Data in ", dps);
2140        if (ISSET(dps, NVME_ID_NS_DPS_PIP))
2141            strlcat(msg, "first", sizeof(msg));
2142        else
2143            strlcat(msg, "last", sizeof(msg));
2144        strlcat(msg, "bytes of metadata, Protection ", sizeof(msg));
2145        if (NVME_ID_NS_DPS_TYPE(dps) >= nitems(protection))
2146            strlcat(msg, "Type unknown", sizeof(msg));
2147        else
2148            strlcat(msg, protection[NVME_ID_NS_DPS_TYPE(dps)],
2149                sizeof(msg));
2150        nvme_bio_status(bs, "%s", msg);
2151    }
2152
2153    if (nvme_bioctl_sdname(sc, target) == NULL)
2154        bd->bd_status = BIOC_SDOFFLINE;
2155    else
2156        bd->bd_status = BIOC_SDONLINE;
2157
2158    return 0;
2159}
2160#endif  /* NBIO > 0 */
2161
2162#ifndef SMALL_KERNEL
2163void
2164nvme_refresh_sensors(void *arg)
2165{
2166    struct nvme_softc       *sc = arg;
2167    struct nvme_sqe          sqe;
2168    struct nvme_dmamem      *mem = NULL;
2169    struct nvme_ccb         *ccb = NULL;
2170    struct nvm_smart_health     *health;
2171    uint32_t             dwlen;
2172    uint8_t              cw;
2173    int              flags;
2174    int64_t              temp;
2175
2176    ccb = nvme_ccb_get(sc);
2177    if (ccb == NULL)
2178        goto failed;
2179
2180    mem = nvme_dmamem_alloc(sc, sizeof(*health));
2181    if (mem == NULL)
2182        goto failed;
2183    nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
2184
2185    dwlen = (sizeof(*health) >> 2) - 1;
2186    memset(&sqe, 0, sizeof(sqe));
2187    sqe.opcode = NVM_ADMIN_GET_LOG_PG;
2188    htolem32(&sqe.nsid, 0xffffffff);
2189    htolem32(&sqe.cdw10, (dwlen << 16 | NVM_LOG_PAGE_SMART_HEALTH));
2190    htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
2191
2192    ccb->ccb_done = nvme_empty_done;
2193    ccb->ccb_cookie = &sqe;
2194    flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_LOG_PAGE);
2195
2196    nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
2197
2198    if (flags != 0)
2199        goto failed;
2200
2201    health = NVME_DMA_KVA(mem);
2202    cw = health->critical_warning;
2203
2204    sc->sc_temp_sensor.status = (cw & NVM_HEALTH_CW_TEMP) ?
2205        SENSOR_S_CRIT : SENSOR_S_OK;
2206    temp = letoh16(health->temperature);
2207    sc->sc_temp_sensor.value = (temp * 1000000) + 150000;
2208
2209    sc->sc_spare_sensor.status = (cw & NVM_HEALTH_CW_SPARE) ?
2210        SENSOR_S_CRIT : SENSOR_S_OK;
2211    sc->sc_spare_sensor.value = health->avail_spare * 1000;
2212
2213    sc->sc_usage_sensor.status = SENSOR_S_OK;
2214    sc->sc_usage_sensor.value = health->percent_used * 1000;
2215    goto done;
2216
2217 failed:
2218    sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
2219    sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
2220    sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
2221 done:
2222    if (mem != NULL)
2223        nvme_dmamem_free(sc, mem);
2224    if (ccb != NULL)
2225        nvme_ccb_put(sc, ccb);
2226}
2227#endif /* SMALL_KERNEL */
2228