appneta/tcpreplay

View on GitHub
src/common/netmap.c

Summary

Maintainability
Test Coverage
/*
 *   Copyright (c) 2013 Fred Klassen <fklassen at appneta dot com> - AppNeta
 *
 *   The Tcpreplay Suite of tools is free software: you can redistribute it
 *   and/or modify it under the terms of the GNU General Public License as
 *   published by the Free Software Foundation, either version 3 of the
 *   License, or with the authors permission any later version.
 *
 *   The Tcpreplay Suite is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with the Tcpreplay Suite.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "netmap.h"
#include "config.h"
#include "common.h"
#include "tcpreplay_api.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>

static int nm_do_ioctl(sendpacket_t *sp, u_long what, int subcmd);

/**
 * Method takes an open "/dev/netmap" file descriptor and returns
 * the netmap version.
 *
 * Returns -1 on error
 */
int
get_netmap_version()
{
    u_int32_t netmap_version = -1;
    nmreq_t nmr;
    int fd;

    if ((fd = open("/dev/netmap", O_RDWR)) < 0)
        return -1;

    /* netmap version discovery */
    bzero(&nmr, sizeof(nmr));
    nmr.nr_version = NETMAP_API;

    /* attempt using the netmap API version that this was compiled under */
    if (ioctl(fd, NIOCGINFO, &nmr) == 0) {
        netmap_version = nmr.nr_version;
        dbgx(1, "netmap detected API version %d which matches compiled version\n", netmap_version);
    } else {
        /* failed.
         *
         * Try other versions in an attempt to find the version
         * that matches this system.
         */
        int x;
        for (x = 0; x < 50; ++x) {
            bzero(&nmr, sizeof(nmr));
            nmr.nr_version = x;
            if (ioctl(fd, NIOCGINFO, &nmr) == 0) {
                netmap_version = nmr.nr_version;
                dbgx(1,
                     "netmap detected API version %d which doesn't match compiled version %d\n",
                     netmap_version,
                     NETMAP_API);
                break;
            }
        }
    }

    close(fd);

    return (int)netmap_version;
}

/**
 * ioctl support for netmap
 */
static int
nm_do_ioctl(sendpacket_t *sp, u_long what, int subcmd)
{
    struct ifreq ifr;
    int error;
    int fd;
#ifdef linux
    struct ethtool_value eval;
#endif

    assert(sp);

    fd = socket(AF_INET, SOCK_DGRAM, 0);
    if (fd < 0) {
        dbg(1, "ioctl error: cannot get device control socket.\n");
        return -1;
    }

    bzero(&ifr, sizeof(ifr));
    strncpy(ifr.ifr_name, sp->device, sizeof(ifr.ifr_name));

    switch (what) {
    case SIOCSIFFLAGS:
        ifr.ifr_flags = sp->if_flags >> 16;
        ifr.ifr_flags = sp->if_flags & 0xffff;
        break;

#ifdef linux
    case SIOCETHTOOL:
        eval.cmd = subcmd;
        eval.data = sp->data;
        ifr.ifr_data = (caddr_t)&eval;
        dbgx(1, "ioctl SIOCETHTOOL subcmd=%d data=%u", subcmd, eval.data);
        break;
#endif
    }

    error = ioctl(fd, what, &ifr);
    if (error)
        goto done;

    switch (what) {
    case SIOCGIFFLAGS:
        sp->if_flags = (ifr.ifr_flags << 16) | (0xffff & ifr.ifr_flags);
        dbgx(1, "SIOCGIFFLAGS flags are 0x%x", sp->if_flags);
        break;

#ifdef linux
    case SIOCETHTOOL:
        switch (subcmd) {
        case ETHTOOL_GGSO:
            sp->gso = eval.data;
            dbgx(1, "ioctl SIOCETHTOOL ETHTOOL_GGSO=%u", eval.data);
            break;

        case ETHTOOL_GTSO:
            sp->tso = eval.data;
            dbgx(1, "ioctl SIOCETHTOOL ETHTOOL_GTSO=%u", eval.data);
            break;

        case ETHTOOL_GRXCSUM:
            sp->rxcsum = eval.data;
            dbgx(1, "ioctl SIOCETHTOOL ETHTOOL_GRXCSUM=%u", eval.data);
            break;

        case ETHTOOL_GTXCSUM:
            sp->txcsum = eval.data;
            dbgx(1, "ioctl SIOCETHTOOL ETHTOOL_GTXCSUM=%u", eval.data);
            break;
        default:
            return -1;
        }
        break;
#endif
    default:
        return -1;
    }

done:
    close(fd);

    if (error)
        warnx("ioctl error %d %lu:%d", error, what, subcmd);
    return error;
}

/**
 * Inner sendpacket_open() method for using netmap
 */
void *
sendpacket_open_netmap(const char *device, char *errbuf, void *arg)
{
    tcpreplay_t *ctx = (tcpreplay_t *)arg;
    sendpacket_t *sp = NULL;
    nmreq_t nmr;
    char ifname_buf[MAX_IFNAMELEN];
    const char *ifname;
    const char *port = NULL;
    size_t namelen;
    u_int32_t nr_ringid = 0;
    u_int32_t nr_flags = NR_REG_DEFAULT;
    int is_default = 0;

    assert(device);
    assert(errbuf);

    dbg(1, "sendpacket_open_netmap: using netmap");

    bzero(&nmr, sizeof(nmr));

    /* prep & return our sp handle */
    sp = (sendpacket_t *)safe_malloc(sizeof(sendpacket_t));

    if (strlen(device) > MAX_IFNAMELEN - 8) {
        snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Interface name is to long: %s\n", device);
        goto IFACENAME_INVALID;
    }

    /* get the version of the netmap driver. If < 0, driver is not installed */
    sp->netmap_version = get_netmap_version();
    if (sp->netmap_version < 0) {
        snprintf(errbuf,
                 SENDPACKET_ERRBUF_SIZE,
                 "Unable to determine the running netmap version.\n"
                 "See INSTALL document for details on installing or upgrading netmap.");
        goto NETMAP_NOT_INSTALLED;
    }

    /*
     * Sort out interface names
     *
     * ifname   (foo, netmap:foo or vale:foo) is the port name
     *      foo         bind to a single NIC hardware queue
     *      netmap:foo  bind to a single NIC hardware queue
     *      vale:foo    bind to the Vale virtual interface
     *
     * for netmap version 10+ a suffix can indicate the following:
     *      netmap:foo!     bind to all NIC hardware queues (may cause TX reordering)
     *      netmap:foo^     bind to the host (sw) ring pair
     *      netmap:foo*     bind to the host (sw) and NIC ring pairs (transparent)
     *      netmap:foo-NN   bind to the individual NIC ring pair (queue) where NN = the ring number
     *      netmap:foo{NN   bind to the master side of pipe NN
     *      netmap:foo}NN   bind to the slave side of pipe NN
     */
    if (strncmp(device, "netmap:", 7) && strncmp(device, "vale", 4)) {
        snprintf(ifname_buf, sizeof(ifname_buf), "netmap:%s", device);
        ifname = ifname_buf;
    } else {
        ifname = device;
    }

    if (!strncmp("vale", device, 4))
        sp->is_vale = 1;

    if (ifname[0] == 'n')
        ifname += 7;

    /* scan for a separator */
    for (port = ifname; *port && !index("!-*^{}", *port); port++)
        ;

    namelen = port - ifname;
    if (namelen > sizeof(nmr.nr_name)) {
        snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Interface name is to long: %s\n", device);
        goto IFACENAME_INVALID;
    }
    /*
     * Open the netmap device to fetch the number of queues of our
     * interface.
     *
     * The first NIOCREGIF also detaches the card from the
     * protocol stack and may cause a reset of the card,
     * which in turn may take some time for the PHY to
     * reconfigure.
     */
    if ((sp->handle.fd = open("/dev/netmap", O_RDWR)) < 0) {
        dbg(1, "sendpacket_open_netmap: Unable to access netmap");
        snprintf(errbuf,
                 SENDPACKET_ERRBUF_SIZE,
                 "Unable to access netmap.\n"
                 "See INSTALL to learn which NICs are supported and\n"
                 "how to set up netmap-capable network drivers.");
        goto OPEN_FAILED;
    }

    /*
     * The nmreq structure must have the NETMAP_API version for the running machine.
     * However the binary may have been compiled on a different machine than the
     * running machine. Discover the true netmap API version, and be careful to call
     * functions that are available on all netmap versions.
     */
    if (sp->netmap_version >= 10) {
        switch (*port) {
        case '-': /* one NIC */
            nr_flags = NR_REG_ONE_NIC;
            nr_ringid = strtol(port + 1, NULL, 10);
            break;

        case '*': /* NIC and SW, ignore port */
            nr_flags = NR_REG_NIC_SW;
            if (port[1]) {
                snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "invalid netmap port for nic+sw");
                goto NETMAP_IF_PARSE_FAIL;
            }
            break;

        case '^': /* only sw ring */
            nr_flags = NR_REG_SW;
            if (port[1]) {
                snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "invalid port for sw ring");
                goto NETMAP_IF_PARSE_FAIL;
            }
            break;

        case '{':
            nr_flags = NR_REG_PIPE_MASTER;
            nr_ringid = strtol(port + 1, NULL, 10);
            break;

        case '}':
            nr_flags = NR_REG_PIPE_SLAVE;
            nr_ringid = strtol(port + 1, NULL, 10);
            break;

        case '!':
            nr_flags = NR_REG_ALL_NIC;
            break;

        default: /* '\0', no suffix */
            nr_flags = NR_REG_ALL_NIC;
            is_default = 1;
            break;
        }

        if (nr_ringid >= NETMAP_RING_MASK) {
            snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "invalid ringid");
            goto NETMAP_IF_PARSE_FAIL;
        }

        nmr.nr_ringid = nr_ringid;
        nmr.nr_flags = nr_flags;
    }

    nmr.nr_version = sp->netmap_version;
    memcpy(nmr.nr_name, ifname, namelen);
    nmr.nr_name[namelen] = '\0';
    strlcpy(sp->device, nmr.nr_name, sizeof(sp->device));

    /*
     * Register the interface on the netmap device: from now on,
     * we can operate on the network interface without any
     * interference from the legacy network stack.
     *
     * Cards take a long time to reset the PHY.
     */
    fprintf(stderr, "Switching network driver for %s to netmap bypass mode... ", sp->device);
    fflush(NULL);
    sleep(1); /* ensure message prints when user is connected via ssh */

    if (ioctl(sp->handle.fd, NIOCREGIF, &nmr)) {
        snprintf(errbuf,
                 SENDPACKET_ERRBUF_SIZE,
                 "Failure accessing netmap.\n"
                 "\tRequest for netmap version %d failed.\n\tCompiled netmap driver is version %d.\n\tError=%s\n",
                 sp->netmap_version,
                 NETMAP_API,
                 strerror(errno));
        goto NETMAP_IF_FAILED;
    }

    if (!nmr.nr_memsize) {
        snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "Netmap interface '%s' not configured.\n", device);
        goto NETMAP_IF_FAILED;
    }

    sp->mmap_size = nmr.nr_memsize;
    sp->mmap_addr = (struct netmap_d *)mmap(0, sp->mmap_size, PROT_WRITE | PROT_READ, MAP_SHARED, sp->handle.fd, 0);

    if (!sp->mmap_addr || sp->mmap_addr == MAP_FAILED) {
        snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "mmap: %s", strerror(errno));
        goto MMAP_FAILED;
    }

    dbgx(1, "sendpacket_open_netmap: mapping %d Kbytes queues=%d", sp->mmap_size >> 10, nmr.nr_tx_rings);

    sp->nm_if = NETMAP_IF(sp->mmap_addr, nmr.nr_offset);
    sp->nmr = nmr;
    sp->handle_type = SP_TYPE_NETMAP;

    /* set up ring IDs */
    sp->cur_tx_ring = 0;
    switch (nr_flags) {
    case NR_REG_DEFAULT: /* only use one queue to prevent TX reordering */
        sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = 0;
        break;

    case NR_REG_ALL_NIC:
        if (is_default) {
            sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = 0;
        } else {
            sp->first_tx_ring = sp->cur_tx_ring = 0;
            sp->last_tx_ring = nmr.nr_tx_rings - 1;
        }
        break;

    case NR_REG_SW:
        sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = nmr.nr_tx_rings;
        break;

    case NR_REG_NIC_SW:
        sp->first_tx_ring = sp->cur_tx_ring = 0;
        sp->last_tx_ring = nmr.nr_tx_rings;
        break;

    case NR_REG_ONE_NIC:
        sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = nr_ringid;
        break;

    default:
        sp->first_tx_ring = sp->last_tx_ring = sp->cur_tx_ring = 0;
    }
    {
        /* debugging code */
        int i;

        dbgx(1, "%s tx first=%d last=%d  num=%d", ifname, sp->first_tx_ring, sp->last_tx_ring, sp->nmr.nr_tx_rings);
        for (i = 0; i <= sp->nmr.nr_tx_rings; i++) {
#ifdef HAVE_NETMAP_RING_HEAD_TAIL
            dbgx(1,
                 "TX%d 0x%p head=%d cur=%d tail=%d",
                 i,
                 NETMAP_TXRING(sp->nm_if, i),
                 (NETMAP_TXRING(sp->nm_if, i))->head,
                 (NETMAP_TXRING(sp->nm_if, i))->cur,
                 (NETMAP_TXRING(sp->nm_if, i))->tail);
#else
            dbgx(1,
                 "TX%d 0x%p cur=%d avail=%d",
                 i,
                 NETMAP_TXRING(sp->nm_if, i),
                 (NETMAP_TXRING(sp->nm_if, i))->cur,
                 (NETMAP_TXRING(sp->nm_if, i))->avail);
#endif
        }
    }

    dbgx(2, "Waiting %d seconds for phy reset...", ctx->options->netmap_delay);
    sleep(ctx->options->netmap_delay);
    dbg(2, "Ready!");

    if (!sp->is_vale) {
        if (nm_do_ioctl(sp, SIOCGIFFLAGS, 0) < 0)
            goto NM_DO_IOCTL_FAILED;

        if ((sp->if_flags & IFF_RUNNING) == 0) {
            dbgx(1, "sendpacket_open_netmap: %s is not running", sp->device);
            snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "interface %s is not running - check cables\n", sp->device);
            goto NETMAP_IF_NOT_RUNNING;
        }

        if ((sp->if_flags & IFF_UP) == 0) {
            dbgx(1, "%s is down, bringing up...", sp->device);
            sp->if_flags |= IFF_UP;
        }

        /* set promiscuous mode */
        sp->if_flags |= IFF_PROMISC;
        if (nm_do_ioctl(sp, SIOCSIFFLAGS, 0) < 0)
            goto NM_DO_IOCTL_FAILED;

#ifdef linux
        /* disable:
         * - generic-segmentation-offload
         * - tcp-segmentation-offload
         * - rx-checksumming
         * - tx-checksumming
         */
        if (nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GGSO) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GTSO) < 0 ||
            nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GRXCSUM) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_GTXCSUM) < 0)
            goto NM_DO_IOCTL_FAILED;

        sp->data = 0;
        if (nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_SGSO) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_STSO) < 0 ||
            nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_SRXCSUM) < 0 || nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_STXCSUM))
            goto NM_DO_IOCTL_FAILED;
#endif
    }

    if (sp->abort)
        goto NETMAP_ABORT;

    notice("done!");

    return sp;

NM_DO_IOCTL_FAILED:
    snprintf(errbuf, SENDPACKET_ERRBUF_SIZE, "nm_do_ioctl: %s", strerror(errno));
NETMAP_IF_NOT_RUNNING:
    notice("failed!");
NETMAP_ABORT:
    fprintf(stderr, " Switching network driver for %s to normal mode... ", sp->device);
    fflush(NULL);
    munmap(sp->mmap_addr, sp->mmap_size);
MMAP_FAILED:
#if NETMAP_API < 10
    ioctl(sp->handle.fd, NIOCUNREGIF, NULL);
#endif
NETMAP_IF_FAILED:
NETMAP_IF_PARSE_FAIL:
    close(sp->handle.fd);
OPEN_FAILED:
    safe_free(sp);
IFACENAME_INVALID:
NETMAP_NOT_INSTALLED:

    return NULL;
}

void
sendpacket_close_netmap(void *p)
{
    sendpacket_t *sp = p;
    fprintf(stderr, "Switching network driver for %s to normal mode... ", sp->device);
    fflush(NULL);

    /* flush any remaining packets */
    ioctl(sp->handle.fd, NIOCTXSYNC, NULL);

    /* wait for traffic to be sent */
    dbgx(2, "Waiting %d seconds for phy reset...", sp->netmap_delay);
    sleep(sp->netmap_delay);
    dbg(2, "Ready!");

#ifdef linux
    if (!sp->is_vale) {
        /* restore original settings:
         * - generic-segmentation-offload
         * - tcp-segmentation-offload
         * - rx-checksumming
         * - tx-checksumming
         */
        sp->data = sp->gso;
        nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_SGSO);
        sp->data = sp->tso;
        nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_STSO);
        sp->data = sp->rxcsum;
        nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_SRXCSUM);
        sp->data = sp->txcsum;
        nm_do_ioctl(sp, SIOCETHTOOL, ETHTOOL_STXCSUM);
    }
#endif /* linux */

    /* restore interface to normal mode */
#if NETMAP_API < 10
    ioctl(sp->handle.fd, NIOCUNREGIF, NULL);
#endif
    if (sp->mmap_addr)
        munmap(sp->mmap_addr, sp->mmap_size);
    close(sp->handle.fd);
    notice("done!");
}

bool
netmap_tx_queues_empty(void *p)
{
    sendpacket_t *sp = p;
    struct netmap_ring *txring;

    assert(sp);

    sp->cur_tx_ring = 0;
    txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring);
    while (NETMAP_TX_RING_EMPTY(txring)) {
        /* current ring is empty- go to next */
        ++sp->cur_tx_ring;
        if (sp->cur_tx_ring > sp->last_tx_ring) {
            /* last ring */
            sp->cur_tx_ring = 0;
            return true;
        }

        txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring);
    }

    /*
     * send TX interrupt signal
     */
    ioctl(sp->handle.fd, NIOCTXSYNC, NULL);

    return false;
}

int
sendpacket_send_netmap(void *p, const u_char *data, size_t len)
{
    sendpacket_t *sp = p;
    struct netmap_ring *txring;
    struct netmap_slot *slot;
    char *pkt;
    uint32_t cur, avail;

    if (sp->abort)
        return 0;

    txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring);
    while ((avail = nm_ring_space(txring)) == 0) {
        /* out of space on current TX queue - go to next */
        ++sp->cur_tx_ring;
        if (sp->cur_tx_ring > sp->last_tx_ring) {
            /*
             * out of space on all queues
             *
             * we have looped through all configured TX queues
             * so we have to reset to the first queue and
             * wait for available space
             */
            sp->cur_tx_ring = sp->first_tx_ring;

            /* send TX interrupt signal
             *
             * On Linux this makes one slot free on the
             * ring, which increases speed by about 10Mbps.
             *
             * But it will never free up all the slots. For
             * that we must poll and call again.
             */
            ioctl(sp->handle.fd, NIOCTXSYNC, NULL);

            /* loop again */
            return -2;
        }

        txring = NETMAP_TXRING(sp->nm_if, sp->cur_tx_ring);
    }

    /*
     * send
     */
    cur = txring->cur;
    slot = &txring->slot[cur];
    slot->flags = 0;
    pkt = NETMAP_BUF(txring, slot->buf_idx);
    memcpy(pkt, data, min(len, txring->nr_buf_size));
    slot->len = len;

    if (avail <= 1)
        slot->flags = NS_REPORT;

    dbgx(3,
         "netmap cur=%d slot index=%d flags=0x%x empty=%d avail=%u bufsize=%d\n",
         cur,
         slot->buf_idx,
         slot->flags,
         NETMAP_TX_RING_EMPTY(txring),
         nm_ring_space(txring),
         txring->nr_buf_size);

    /* let kernel know that packet is available */
    cur = NETMAP_RING_NEXT(txring, cur);
#ifdef HAVE_NETMAP_RING_HEAD_TAIL
    txring->head = cur;
#else
    txring->avail--;
#endif
    txring->cur = cur;

    return len;
}