diff --git a/README.spdk b/README.spdk new file mode 100644 index 000000000..736f904f0 --- /dev/null +++ b/README.spdk @@ -0,0 +1,37 @@ +======================================================================== + README for mTCP development with spdk +======================================================================== + +1. Why mTCP changes + +mTCP is a highly scalable user-level TCP stack for multicore systems. +It requires per-thread architecture and starts "RunMainLoop" for each +thread with dead-loop handling. + +However, spdk will register poller per thread to handle receiving and +events and so it requires mtcp to supply a separated recv/send handler. +It's necessary to split RunMainLoop to two APIs: RunMainLoop and +mtcp_run_instance. Original mTCP apps still use RunMainLoop and it's not +affected and spdk will use mtcp_run_instance for its poller. + +2. How to enable spdk support + +Add option "--enable-spdk" to choose spdk support enable or not. +Also introduce mtcp_crossbuild.sh for reference. + +3. SPDK changes + +For SPDK, it needs more changes. + +- Add mtcp sock interface. Current now it only supports posix/vpp sock + interface. Just follow the framework to add mtcp sock interface. +- Change connection schedule mode to support mtcp per-thread arch. + +Now upstream is ongoing. + +======================================================================== + + Contact: mtcp-user at list.ndsl.kaist.edu + April 2, 2015. + EunYoung Jeong + M. Asim Jamshed diff --git a/apps/example/Makefile.in b/apps/example/Makefile.in index 23544f309..e9086bebd 100644 --- a/apps/example/Makefile.in +++ b/apps/example/Makefile.in @@ -7,9 +7,13 @@ PS=@PSIO@ NETMAP=@NETMAP@ ONVM=@ONVM@ CFLAGS=@CFLAGS@ +LDFLAGS=@LDFLAGS@ + +# If ARCH is not defined, retrive from system +ARCH ?= $(shell uname -m) # Add arch-specific optimization -ifeq ($(shell uname -m),x86_64) +ifeq ($(ARCH),x86_64) LIBS += -m64 endif @@ -44,7 +48,7 @@ endif ifeq ($(DPDK),1) DPDK_MACHINE_LINKER_FLAGS=$${RTE_SDK}/$${RTE_TARGET}/lib/ldflags.txt DPDK_MACHINE_LDFLAGS=$(shell cat ${DPDK_MACHINE_LINKER_FLAGS}) -LIBS += -g -O3 -pthread -lrt -march=native ${MTCP_FLD}/lib/libmtcp.a -lnuma -lmtcp -lpthread -lrt -ldl -lgmp -L${RTE_SDK}/${RTE_TARGET}/lib ${DPDK_MACHINE_LDFLAGS} +LIBS += -g -O3 -pthread -lrt -march=native ${MTCP_FLD}/lib/libmtcp.a -lnuma -lmtcp -lpthread -lrt -ldl -lgmp -L${RTE_SDK}/${RTE_TARGET}/lib ${DPDK_MACHINE_LDFLAGS} ${LDFLAGS} endif # onvm-specific variables diff --git a/configure.ac b/configure.ac index 69f60c584..48d6fa680 100644 --- a/configure.ac +++ b/configure.ac @@ -63,6 +63,17 @@ AC_FUNC_MMAP AC_CHECK_FUNC([clock_gettime],,AC_MSG_ERROR([librt library is missing])) AC_CHECK_FUNCS([bzero getpagesize gettimeofday memmove memset munmap select socket strchr strerror strstr strtol],,AC_MSG_ERROR([glibc library is missing])) +# Reset SPDK to 0 +AC_SUBST(SPDK, 0) + +dnl Example of default-disabled feature +AC_ARG_ENABLE([spdk], + AS_HELP_STRING([--enable-spdk], [Enable SPDK Support])) + +AS_IF([test "x$enable_spdk" = "xyes"], [ + AC_SUBST(SPDK, 1) +]) + # Reset DPDK to 0 AC_SUBST(DPDK, 0) # Reset enforcement value diff --git a/dpdk-iface-kmod/Makefile b/dpdk-iface-kmod/Makefile index 93da3030c..a84732157 100644 --- a/dpdk-iface-kmod/Makefile +++ b/dpdk-iface-kmod/Makefile @@ -6,9 +6,12 @@ endif ifeq ($(RTE_TARGET),) $(error "Please define RTE_TARGET environment variable") endif + +RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build/ + #-------------------------------------------------------------------------# include $(RTE_SDK)/mk/rte.vars.mk -CC=gcc +CC=$(CROSS)gcc obj-m=dpdk_iface.o DPDK_MACHINE_LINKER_FLAGS=$${RTE_SDK}/$${RTE_TARGET}/lib/ldflags.txt DPDK_MACHINE_LDFLAGS=$(shell cat ${DPDK_MACHINE_LINKER_FLAGS}) @@ -25,7 +28,7 @@ else endif #-------------------------------------------------------------------------# all: dpdk_iface.c $(appname) $(appname).c - make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules + make ARCH=${ARCH} CROSS_COMPILE=${CROSS} -C ${RTE_KERNELDIR} M=$(PWD) modules $(appname): $(appname).c $(MSG) " CC $<" @@ -34,7 +37,7 @@ $(appname): $(appname).c -L$(DPDK_LIB) ${DPDK_MACHINE_LDFLAGS} -lpthread clean: - make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean + make -C ${RTE_KERNELDIR} M=$(PWD) clean $(MSG) " CLEAN $(appname)" $(HIDE) rm -rf *~ *.o *.ko dpdk_iface_main diff --git a/dpdk-iface-kmod/dpdk_iface_main.c b/dpdk-iface-kmod/dpdk_iface_main.c index 143cea4d1..89f579a90 100644 --- a/dpdk-iface-kmod/dpdk_iface_main.c +++ b/dpdk-iface-kmod/dpdk_iface_main.c @@ -22,7 +22,11 @@ typedef struct { PciDevice pd; struct rte_eth_dev_info dev_details; +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) struct ether_addr ports_eth_addr; +#else + struct rte_ether_addr ports_eth_addr; +#endif } DevInfo; static DevInfo di[RTE_MAX_ETHPORTS]; @@ -260,7 +264,11 @@ main(int argc, char **argv) ret = rte_eal_init(rte_argc, rte_argv); /* get total count of detected ethernet ports */ +#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0) num_devices = rte_eth_dev_count(); +#else + num_devices = rte_eth_dev_count_avail(); +#endif if (num_devices == 0) { fprintf(stderr, "No Ethernet port detected!\n"); exit(EXIT_FAILURE); diff --git a/mtcp/src/Makefile.in b/mtcp/src/Makefile.in index c703ae213..04936f1b9 100644 --- a/mtcp/src/Makefile.in +++ b/mtcp/src/Makefile.in @@ -4,6 +4,7 @@ ### TARGET ### PS=@PSIO@ +SPDK=@SPDK@ DPDK=@DPDK@ ENFORCE_RX_IDLE=@ENFORCE_RX_IDLE@ NETMAP=@NETMAP@ @@ -19,7 +20,10 @@ MTCP_HDR = mtcp_api.h mtcp_epoll.h GCC=@CC@ ### FLAGS ### -ifeq ($(shell uname -m),x86_64) + +# If ARCH is not defined, retrive from system +ARCH ?= $(shell uname -m) +ifeq ($(ARCH),x86_64) GCC_OPT = -m64 else GCC_OPT = @@ -50,6 +54,10 @@ GCC_OPT += -DNDEBUG -g -O3 -DNETSTAT -DINFO -DDBGERR -DDBGCERR #GCC_OPT += -DNDEBUG -g -DNETSTAT -DINFO -DDBGERR -DDBGCERR GCC_OPT += $(DBG_OPT) +ifeq ($(SPDK),1) +GCC_OPT += -DENABLE_SPDK +endif + ifeq ($(LRO),1) GCC_OPT += -DENABLELRO endif diff --git a/mtcp/src/api.c b/mtcp/src/api.c index c53714bcf..3429f4476 100644 --- a/mtcp/src/api.c +++ b/mtcp/src/api.c @@ -1284,7 +1284,7 @@ mtcp_recv(mctx_t mctx, int sockid, char *buf, size_t len, int flags) return ret; } /*----------------------------------------------------------------------------*/ -inline ssize_t +ssize_t mtcp_read(mctx_t mctx, int sockid, char *buf, size_t len) { return mtcp_recv(mctx, sockid, buf, len, 0); diff --git a/mtcp/src/core.c b/mtcp/src/core.c index 7ce22c805..93b93634a 100644 --- a/mtcp/src/core.c +++ b/mtcp/src/core.c @@ -750,123 +750,140 @@ InterruptApplication(mtcp_manager_t mtcp) } } /*----------------------------------------------------------------------------*/ +#ifndef ENABLE_SPDK static void RunMainLoop(struct mtcp_thread_context *ctx) { mtcp_manager_t mtcp = ctx->mtcp_manager; - int i; - int recv_cnt; - int rx_inf, tx_inf; - struct timeval cur_ts = {0}; - uint32_t ts, ts_prev; - int thresh; - gettimeofday(&cur_ts, NULL); - TRACE_DBG("CPU %d: mtcp thread running.\n", ctx->cpu); - ts = ts_prev = 0; while ((!ctx->done || mtcp->flow_cnt) && !ctx->exit) { - - STAT_COUNT(mtcp->runstat.rounds); - recv_cnt = 0; - - gettimeofday(&cur_ts, NULL); - ts = TIMEVAL_TO_TS(&cur_ts); - mtcp->cur_ts = ts; - - for (rx_inf = 0; rx_inf < CONFIG.eths_num; rx_inf++) { - - static uint16_t len; - static uint8_t *pktbuf; - recv_cnt = mtcp->iom->recv_pkts(ctx, rx_inf); - STAT_COUNT(mtcp->runstat.rounds_rx_try); + mtcp_run_instance(ctx); + } - for (i = 0; i < recv_cnt; i++) { - pktbuf = mtcp->iom->get_rptr(mtcp->ctx, rx_inf, i, &len); - if (pktbuf != NULL) - ProcessPacket(mtcp, rx_inf, ts, pktbuf, len); -#ifdef NETSTAT - else - mtcp->nstat.rx_errors[rx_inf]++; +#if TESTING + DestroyRemainingFlows(mtcp); #endif - } - } - STAT_COUNT(mtcp->runstat.rounds_rx); - /* interaction with application */ - if (mtcp->flow_cnt > 0) { - - /* check retransmission timeout and timewait expire */ -#if 0 - thresh = (int)mtcp->flow_cnt / (TS_TO_USEC(PER_STREAM_TCHECK)); - assert(thresh >= 0); - if (thresh == 0) - thresh = 1; - if (recv_cnt > 0 && thresh > recv_cnt) - thresh = recv_cnt; + TRACE_DBG("MTCP thread %d out of main loop.\n", ctx->cpu); + /* flush logs */ + flush_log_data(mtcp); + TRACE_DBG("MTCP thread %d flushed logs.\n", ctx->cpu); + InterruptApplication(mtcp); + TRACE_INFO("MTCP thread %d finished.\n", ctx->cpu); +} #endif - thresh = CONFIG.max_concurrency; - - /* Eunyoung, you may fix this later - * if there is no rcv packet, we will send as much as possible - */ - if (thresh == -1) - thresh = CONFIG.max_concurrency; - - CheckRtmTimeout(mtcp, ts, thresh); - CheckTimewaitExpire(mtcp, ts, CONFIG.max_concurrency); - - if (CONFIG.tcp_timeout > 0 && ts != ts_prev) { - CheckConnectionTimeout(mtcp, ts, thresh); - } - } - - /* if epoll is in use, flush all the queued events */ - if (mtcp->ep) { - FlushEpollEvents(mtcp, ts); - } - - if (mtcp->flow_cnt > 0) { - /* hadnle stream queues */ - HandleApplicationCalls(mtcp, ts); - } - - WritePacketsToChunks(mtcp, ts); - /* send packets from write buffer */ - /* send until tx is available */ - for (tx_inf = 0; tx_inf < CONFIG.eths_num; tx_inf++) { - mtcp->iom->send_pkts(ctx, tx_inf); - } +RTE_DEFINE_PER_LCORE(uint32_t , ts_prev) = 0; - if (ts != ts_prev) { - ts_prev = ts; - if (ctx->cpu == mtcp_master) { - ARPTimer(mtcp, ts); +int mtcp_run_instance(void *tmp) +{ + struct mtcp_thread_context *ctx= (struct mtcp_thread_context *)tmp; + mtcp_manager_t mtcp = ctx->mtcp_manager; + int i; + int recv_cnt; + int rx_inf, tx_inf; + struct timeval cur_ts = {0}; + int thresh; + uint32_t ts=0; + + if ((!ctx->done || mtcp->flow_cnt) && !ctx->exit) { + + STAT_COUNT(mtcp->runstat.rounds); + recv_cnt = 0; + + gettimeofday(&cur_ts, NULL); + ts = TIMEVAL_TO_TS(&cur_ts); + mtcp->cur_ts = ts; + + for (rx_inf = 0; rx_inf < CONFIG.eths_num; rx_inf++) { + + static uint16_t len; + static uint8_t *pktbuf; + recv_cnt = mtcp->iom->recv_pkts(ctx, rx_inf); + STAT_COUNT(mtcp->runstat.rounds_rx_try); + + for (i = 0; i < recv_cnt; i++) { + pktbuf = mtcp->iom->get_rptr(mtcp->ctx, rx_inf, i, &len); + if (pktbuf != NULL) + ProcessPacket(mtcp, rx_inf, ts, pktbuf, len); #ifdef NETSTAT - PrintNetworkStats(mtcp, ts); + else + mtcp->nstat.rx_errors[rx_inf]++; #endif - } - } + } + } + STAT_COUNT(mtcp->runstat.rounds_rx); - mtcp->iom->select(ctx); + /* interaction with application */ + if (mtcp->flow_cnt > 0) { - if (ctx->interrupt) { - InterruptApplication(mtcp); - } - } + /* check retransmission timeout and timewait expire */ +#if 0 + thresh = (int)mtcp->flow_cnt / (TS_TO_USEC(PER_STREAM_TCHECK)); + assert(thresh >= 0); + if (thresh == 0) + thresh = 1; + if (recv_cnt > 0 && thresh > recv_cnt) + thresh = recv_cnt; +#endif + thresh = CONFIG.max_concurrency; + + /* Eunyoung, you may fix this later + * if there is no rcv packet, we will send as much as possible + */ + if (thresh == -1) + thresh = CONFIG.max_concurrency; + + CheckRtmTimeout(mtcp, ts, thresh); + CheckTimewaitExpire(mtcp, ts, CONFIG.max_concurrency); + + if (CONFIG.tcp_timeout > 0 && ts != RTE_PER_LCORE(ts_prev)) { + CheckConnectionTimeout(mtcp, ts, thresh); + } + } + + /* if epoll is in use, flush all the queued events */ + if (mtcp->ep) { + FlushEpollEvents(mtcp, ts); + } + + if (mtcp->flow_cnt > 0) { + /* hadnle stream queues */ + HandleApplicationCalls(mtcp, ts); + } + + WritePacketsToChunks(mtcp, ts); + + /* send packets from write buffer */ + /* send until tx is available */ + for (tx_inf = 0; tx_inf < CONFIG.eths_num; tx_inf++) { + mtcp->iom->send_pkts(ctx, tx_inf); + } + + if (ts != RTE_PER_LCORE(ts_prev)) { + RTE_PER_LCORE(ts_prev) = ts; + if (ctx->cpu == mtcp_master) { + ARPTimer(mtcp, ts); +#ifndef ENABLE_SPDK +#ifdef NETSTAT -#if TESTING - DestroyRemainingFlows(mtcp); + PrintNetworkStats(mtcp, ts); +#endif #endif + } + } - TRACE_DBG("MTCP thread %d out of main loop.\n", ctx->cpu); - /* flush logs */ - flush_log_data(mtcp); - TRACE_DBG("MTCP thread %d flushed logs.\n", ctx->cpu); - InterruptApplication(mtcp); - TRACE_INFO("MTCP thread %d finished.\n", ctx->cpu); + mtcp->iom->select(ctx); + + if (ctx->interrupt) { + InterruptApplication(mtcp); + } + } + + return 0; } + /*----------------------------------------------------------------------------*/ struct mtcp_sender * CreateMTCPSender(int ifidx) @@ -1075,7 +1092,7 @@ InitializeMTCPManager(struct mtcp_thread_context* ctx) return mtcp; } /*----------------------------------------------------------------------------*/ -static void * +void * MTCPRunThread(void *arg) { mctx_t mctx = (mctx_t)arg; @@ -1152,7 +1169,7 @@ MTCPRunThread(void *arg) fprintf(stderr, "CPU %d: initialization finished.\n", cpu); sem_post(&g_init_sem[ctx->cpu]); - +#ifndef ENABLE_SPDK /* start the main loop */ RunMainLoop(ctx); @@ -1164,8 +1181,20 @@ MTCPRunThread(void *arg) DestroyHashtable(g_mtcp[cpu]->listeners); TRACE_DBG("MTCP thread %d finished.\n", ctx->cpu); - return 0; +#else + sem_wait(&g_init_sem[ctx->cpu]); + sem_destroy(&g_init_sem[ctx->cpu]); + + running[ctx->cpu] = TRUE; + + if (mtcp_master < 0) { + mtcp_master = ctx->cpu; + TRACE_INFO("CPU %d is now the master thread.\n", mtcp_master); + } + + return ctx; +#endif } /*----------------------------------------------------------------------------*/ #ifndef DISABLE_DPDK @@ -1231,6 +1260,7 @@ mtcp_create_context(int cpu) return NULL; } #endif +#ifndef ENABLE_SPDK #ifndef DISABLE_DPDK /* Wake up mTCP threads (wake up I/O threads) */ if (current_iomodule_func == &dpdk_module_func) { @@ -1268,6 +1298,7 @@ mtcp_create_context(int cpu) TRACE_INFO("CPU %d is now the master thread.\n", mtcp_master); } +#endif return mctx; } /*----------------------------------------------------------------------------*/ diff --git a/mtcp/src/dpdk_module.c b/mtcp/src/dpdk_module.c index ebf124ecb..0688a2098 100644 --- a/mtcp/src/dpdk_module.c +++ b/mtcp/src/dpdk_module.c @@ -87,7 +87,7 @@ #define ETHER_IFG 12 #define ETHER_PREAMBLE 8 -#define ETHER_OVR (ETHER_CRC_LEN + ETHER_PREAMBLE + ETHER_IFG) +#define ETHER_OVR (MTCP_ETHER_CRC_LEN + ETHER_PREAMBLE + ETHER_IFG) static const uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; static const uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; @@ -98,7 +98,7 @@ static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; //#define DEBUG 1 #ifdef DEBUG /* ethernet addresses of ports */ -static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +static MTCP_ETHER_ADDR ports_eth_addr[RTE_MAX_ETHPORTS]; #endif static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; @@ -106,7 +106,7 @@ static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; static struct rte_eth_conf port_conf = { .rxmode = { .mq_mode = ETH_MQ_RX_RSS, - .max_rx_pkt_len = ETHER_MAX_LEN, + .max_rx_pkt_len = MTCP_ETHER_MAX_LEN, #if RTE_VERSION > RTE_VERSION_NUM(17, 8, 0, 0) .offloads = ( #if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0) @@ -412,7 +412,7 @@ dpdk_get_wptr(struct mtcp_thread_context *ctxt, int ifidx, uint16_t pktsize) m = dpc->wmbufs[ifidx].m_table[len_of_mbuf]; /* retrieve the right write offset */ - ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); + ptr = (void *)rte_pktmbuf_mtod(m, MTCP_ETHER_HDR *); m->pkt_len = m->data_len = pktsize; m->nb_segs = 1; m->next = NULL; @@ -467,16 +467,16 @@ dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) struct rte_mbuf * ip_reassemble(struct dpdk_private_context *dpc, struct rte_mbuf *m) { - struct ether_hdr *eth_hdr; + MTCP_ETHER_HDR *eth_hdr; struct rte_ip_frag_tbl *tbl; struct rte_ip_frag_death_row *dr; /* if packet is IPv4 */ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { - struct ipv4_hdr *ip_hdr; + MTCP_IPV4_HDR *ip_hdr; - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + eth_hdr = rte_pktmbuf_mtod(m, MTCP_ETHER_HDR *); + ip_hdr = (MTCP_IPV4_HDR *)(eth_hdr + 1); /* if it is a fragmented packet, then try to reassemble. */ if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) { @@ -702,6 +702,10 @@ dpdk_load_module(void) /* init port */ printf("Initializing port %u... ", (unsigned) portid); fflush(stdout); + + if (!strncmp(dev_info[portid].driver_name, "net_bnxt", 8)) + port_conf.rx_adv_conf.rss_conf.rss_key_len = 40; + ret = rte_eth_dev_configure(portid, CONFIG.num_cores, CONFIG.num_cores, &port_conf); if (ret < 0) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u, cores: %d\n", @@ -822,7 +826,7 @@ dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) goto dev_ioctl_err; m = dpc->wmbufs[eidx].m_table[len_of_mbuf - 1]; m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; - m->l2_len = sizeof(struct ether_hdr); + m->l2_len = sizeof(MTCP_ETHER_HDR); m->l3_len = (iph->ihl<<2); break; case PKT_TX_TCP_CSUM: @@ -831,19 +835,19 @@ dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) m = dpc->wmbufs[eidx].m_table[len_of_mbuf - 1]; tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); m->ol_flags |= PKT_TX_TCP_CKSUM; - tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); + tcph->check = rte_ipv4_phdr_cksum((MTCP_IPV4_HDR *)iph, m->ol_flags); break; #ifdef ENABLELRO case PKT_RX_TCP_LROSEG: m = dpc->cur_rx_m; //if (m->next != NULL) // rte_prefetch0(rte_pktmbuf_mtod(m->next, void *)); - iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(struct ether_hdr)); + iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(MTCP_ETHER_HDR)); tcph = (struct tcphdr *)((u_char *)iph + (iph->ihl << 2)); payload = (uint8_t *)tcph + (tcph->doff << 2); seg_off = m->data_len - - sizeof(struct ether_hdr) - (iph->ihl << 2) - + sizeof(MTCP_ETHER_HDR) - (iph->ihl << 2) - (tcph->doff << 2); to = (uint8_t *) argp; @@ -866,13 +870,13 @@ dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) if ((dev_info[nif].tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) goto dev_ioctl_err; m = dpc->wmbufs[eidx].m_table[len_of_mbuf - 1]; - iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(struct ether_hdr)); + iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(MTCP_ETHER_HDR)); tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2)); - m->l2_len = sizeof(struct ether_hdr); + m->l2_len = sizeof(MTCP_ETHER_HDR); m->l3_len = (iph->ihl<<2); m->l4_len = (tcph->doff<<2); m->ol_flags = PKT_TX_TCP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4; - tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); + tcph->check = rte_ipv4_phdr_cksum((MTCP_IPV4_HDR *)iph, m->ol_flags); break; case PKT_RX_IP_CSUM: if ((dev_info[nif].rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) == 0) diff --git a/mtcp/src/include/mtcp.h b/mtcp/src/include/mtcp.h index ba07fd773..1e92c3433 100644 --- a/mtcp/src/include/mtcp.h +++ b/mtcp/src/include/mtcp.h @@ -39,6 +39,22 @@ #define ERROR (-1) #endif +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) +typedef struct ether_addr MTCP_ETHER_ADDR; +typedef struct ipv4_hdr MTCP_IPV4_HDR; +typedef struct ether_hdr MTCP_ETHER_HDR; + +#define MTCP_ETHER_MAX_LEN ETHER_MAX_LEN +#define MTCP_ETHER_CRC_LEN ETHER_CRC_LEN +#else +typedef struct rte_ether_addr MTCP_ETHER_ADDR; +typedef struct rte_ipv4_hdr MTCP_IPV4_HDR; +typedef struct rte_ether_hdr MTCP_ETHER_HDR; + +#define MTCP_ETHER_MAX_LEN RTE_ETHER_MAX_LEN +#define MTCP_ETHER_CRC_LEN RTE_ETHER_CRC_LEN +#endif + #define ETHERNET_HEADER_LEN 14 // sizeof(struct ethhdr) #define IP_HEADER_LEN 20 // sizeof(struct iphdr) #define TCP_HEADER_LEN 20 // sizeof(struct tcphdr) diff --git a/mtcp/src/include/mtcp_api.h b/mtcp/src/include/mtcp_api.h index 0a1ee40fd..017cb51d7 100644 --- a/mtcp/src/include/mtcp_api.h +++ b/mtcp/src/include/mtcp_api.h @@ -46,7 +46,7 @@ int mtcp_init(const char *config_file); void -mtcp_destroy(); +mtcp_destroy(void); int mtcp_getconf(struct mtcp_conf *conf); @@ -68,6 +68,9 @@ typedef void (*mtcp_sighandler_t)(int); mtcp_sighandler_t mtcp_register_signal(int signum, mtcp_sighandler_t handler); +void * +MTCPRunThread(void *arg); + int mtcp_pipe(mctx_t mctx, int pipeid[2]); @@ -87,6 +90,9 @@ mtcp_setsock_nonblock(mctx_t mctx, int sockid); int mtcp_socket_ioctl(mctx_t mctx, int sockid, int request, void *argp); +int +mtcp_run_instance(void *tmp); + int mtcp_socket(mctx_t mctx, int domain, int type, int protocol); @@ -124,7 +130,7 @@ int mtcp_getpeername(mctx_t mctx, int sockid, struct sockaddr *addr, socklen_t *addrlen); -inline ssize_t +ssize_t mtcp_read(mctx_t mctx, int sockid, char *buf, size_t len); ssize_t diff --git a/mtcp/src/io_module.c b/mtcp/src/io_module.c index 126c94f46..9864ad12a 100644 --- a/mtcp/src/io_module.c +++ b/mtcp/src/io_module.c @@ -260,7 +260,11 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) char socket_mem_str[32] = ""; // int i; int ret, socket_mem; +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#else + static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#endif /* STEP 1: first determine CPU mask */ mpz_init(_cpumask); @@ -343,8 +347,10 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) /* initialize the dpdk eal env */ ret = rte_eal_init(argc, argv); if (ret < 0) { - TRACE_ERROR("Invalid EAL args!\n"); - exit(EXIT_FAILURE); + if(rte_errno != EALREADY) { + TRACE_ERROR("Invalid EAL args!\n"); + exit(EXIT_FAILURE); + } } /* give me the count of 'detected' ethernet ports */ #if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0) diff --git a/mtcp/src/onvm_module.c b/mtcp/src/onvm_module.c index eed55f263..4d8a8a5ea 100644 --- a/mtcp/src/onvm_module.c +++ b/mtcp/src/onvm_module.c @@ -63,7 +63,11 @@ static struct rte_mempool *pktmbuf_pool = NULL; //#define DEBUG 1 #ifdef DEBUG /* ethernet addresses of ports */ +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#else +static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#endif #endif static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; diff --git a/mtcp_crossbuild.sh b/mtcp_crossbuild.sh new file mode 100755 index 000000000..64bf43ec0 --- /dev/null +++ b/mtcp_crossbuild.sh @@ -0,0 +1,110 @@ +#!/bin/bash + +args="$*" + +usage () +{ + echo "args=$args" + echo + echo "`basename $0` -h -c -k -i -d -e" + echo + echo "Helper script, used to build dpdk." + echo + echo " -h Help Usage" + echo " -c specify build platform" + echo " -k Directory that kernel builds if enable LKM build option" + echo " -i If needed introduce external lib dependencies" + echo " -d Directory that dpdk builds" + echo " -e Enable SPDK Support" + echo +} + +while getopts "hc:k:i:d:e" opt; do + case $opt in + h) show_usage=1 + ;; + c) build_arch="$OPTARG" + ;; + k) kernel_build="$OPTARG" + ;; + i) ext_lib="$OPTARG" + ;; + d) dpdk_build="$OPTARG" + ;; + e) spdk_enable=1 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + show_usage=1 + ;; + esac +done + +if [ "$show_usage" == "1" ]; then + usage + exit 1 +fi + +CUR_PATH=`pwd` + +if [ "$build_arch" == "aarch64" ]; then + CROSS=aarch64-linux-gnu- + RTE_TARGET=arm64-stingray-linuxapp-gcc +else + RTE_TARGET=x86_64-native-linuxapp-gcc +fi + +if [ -z "$kernel_build" ]; then + RTE_KERNELDIR=/lib/modules/`uname -r`/build +else + RTE_KERNELDIR=$kernel_build +fi + +if [ -z "$ext_lib" ]; then + EXT_LIB_DIR=$CUR_PATH/../ext_lib +else + EXT_LIB_DIR=$ext_lib +fi + +if [ "$spdk_enable" == "1" ]; then + SPDK_OPTION="--enable-spdk" +fi + +if [ -z $dpdk_build ]; then + RTE_SDK=$CUR_PATH/../dpdk +else + RTE_SDK=$dpdk_build +fi +DPDK_BUILD=$RTE_SDK/$RTE_TARGET +export RTE_SDK RTE_TARGET DPDK_BUILD RTE_KERNELDIR + +# check ldflags.txt +if grep "ldflags.txt" $RTE_SDK/mk/rte.app.mk > /dev/null +then + : +else + sed -i -e 's/O_TO_EXE_STR =/\$(shell if [ \! -d \${RTE_SDK}\/\${RTE_TARGET}\/lib ]\; then mkdir \${RTE_SDK}\/\${RTE_TARGET}\/lib\; fi)\nLINKER_FLAGS = \$(call linkerprefix,\$(LDLIBS))\n\$(shell echo \${LINKER_FLAGS} \> \${RTE_SDK}\/\${RTE_TARGET}\/lib\/ldflags\.txt)\nO_TO_EXE_STR =/g' $RTE_SDK/mk/rte.app.mk + echo "Need to rebuild dpdk." + exit 1 +fi + +echo +echo "======================================" +echo "Build_arch : $build_arch" +echo "RTE_TARGET : $RTE_TARGET" +echo "DPDK Build : $RTE_SDK" +echo "SPDK Support: $spdk_enable" +echo "======================================" +echo + +CUR_PATH=`pwd` + +# build kernel module +cd $CUR_PATH/dpdk-iface-kmod && make CROSS=$CROSS RTE_KERNELDIR=$RTE_KERNELDIR V=1 + +# build application +cd $CUR_PATH && autoreconf -f -i && \ + ./configure --host=aarch64 CC=${CROSS}gcc LD=${CROSS}ld --with-dpdk=$RTE_SDK/$RTE_TARGET --with-dpdk-lib=$RTE_SDK/$RTE_TARGET/lib $SPDK_OPTION CFLAGS="-I$EXT_LIB_DIR/include" LDFLAGS="-L$EXT_LIB_DIR/lib64 -L$EXT_LIB_DIR/lib" +make -j `grep -c ^processor /proc/cpuinfo` ARCH=arm64 CC=${CROSS}gcc LD=${CROSS}ld V=0 + +# end of file diff --git a/util/Makefile.in b/util/Makefile.in index 0d03bf271..f48fcafe2 100644 --- a/util/Makefile.in +++ b/util/Makefile.in @@ -1,7 +1,9 @@ ### GCC ### GCC=@CC@ -ifeq ($(shell uname -m),x86_64) +# If ARCH is not defined, retrive from system +ARCH ?= $(shell uname -m) +ifeq ($(ARCH),x86_64) GCC_OPT = -m64 else GCC_OPT =