Bootstrap

DPDK——TCP/UDP协议栈服务端实现之UDP实现(二)

一、概述

《DPDK——TCP/UDP协议栈服务端实现(一)》中已经讲述TCP/UDP简易协议栈项目的框架和基本信息,本篇文章要完成的工作如下:

  1. udp包结构设计,包括udp控制块结构、协议栈与应用层传输包结构
  2. ARP表处理
  3. UDP套接字函数实现
  4. UDP包处理

二、UDP数据结构

如下图所示,UDP的数据结构主要分为两个部分:数据传输块与控制块。
在这里插入图片描述

UDP控制块是在应用层创建socket的同时生成的,主要包含一个发送队列和接收队列,线程同步变量,以及相关参数,如下:

struct localhost 
{
	int fd;
	uint32_t localip; // ip --> mac
	unsigned char localmac[RTE_ETHER_ADDR_LEN];
	uint16_t localport;

	unsigned char protocol;

	struct rte_ring *sndbuf;
	struct rte_ring *rcvbuf;

	struct localhost *prev; 
	struct localhost *next;

	pthread_cond_t cond;
	pthread_mutex_t mutex;
};

传输块作为协议栈向UDP应用通信的数据封装,DPDK协议栈在收到网卡发送的数据后,按照传输块的结构来封装数据,并发送至UDP控制块中的接收队列,结构如下:

struct offload 
{ 
	uint32_t sip;
	uint32_t dip;

	uint16_t sport;
	uint16_t dport; 

	int protocol;

	unsigned char *data;
	uint16_t length;
	
};

三、arp表处理

在网络传输中,ARP协议承担着“护送数据包最后一公里”的任务,因为在数据包到达局域网后,ip地址作为一个不可靠的标识,显然不能确保数据包具体送达目的主机,这个时候我们就要获知主机的MAC地址了。具体做法就是向局域网内同一个路由器下所有设备广播,内容就是“你们谁是ip为xxx.xxx.xxx.xxx的主机呀?看到消息请马上告知你的MAC地址,你的快递到了哈!”

所以,本项目也必须维护一个ip和mac映射的arp表,这样才能准确将数据包发给对方主机。

维护arp表的地方主要在两个地方:协议栈收到网卡数据包时存储ip和mac地址信息;协议栈发送数据包之前,查询arp表,若无mac地址,则先广播arp包。

主要函数如下:

int ng_arp_entry_insert(uint32_t ip, unsigned char *mac)
{
    struct arp_table *pstTbl = arp_table_instance();
    struct arp_entry *pstEntry = NULL;
    unsigned char *pstHwaddr = NULL;

    pstHwaddr = ng_get_dst_macaddr(ip);
    if(pstHwaddr == NULL)
    {
        pstEntry = rte_malloc("arp_entry", sizeof(struct arp_entry), 0);
		if (pstEntry) 
        {
			memset(pstEntry, 0, sizeof(struct arp_entry));

			pstEntry->ip = ip;
			rte_memcpy(pstEntry->hwaddr, mac, RTE_ETHER_ADDR_LEN);
			pstEntry->type = 0;

			pthread_spin_lock(&pstTbl->spinlock);
			LL_ADD(pstEntry, pstTbl->entries);
			pstTbl->count ++;
			pthread_spin_unlock(&pstTbl->spinlock);
		}
        return 1;
    }

    return 0;
}

四、UDP套接字函数实现

3.1 socket函数

该函数主要实现获取fd、创建控制块。

int nsocket(__attribute__((unused)) int domain, int type, __attribute__((unused))  int protocol)
{
    int iFd;
    struct localhost *pstHost;
    pthread_cond_t pctCond = PTHREAD_COND_INITIALIZER;
    pthread_mutex_t pmtMutex = PTHREAD_MUTEX_INITIALIZER;

    iFd = get_fd_frombitmap();
    if(type == SOCK_DGRAM) // udp
    {
        pstHost = rte_malloc("localhost", sizeof(struct localhost), 0);
        if(pstHost == NULL)
        {
            printf("[%s][%d]: rte_malloc fail!\n", __FUNCTION__, __LINE__);
            return -1;
        }

        memset(pstHost, 0x00, sizeof(struct localhost));
        pstHost->fd = iFd;
        pstHost->protocol = IPPROTO_UDP;
        pstHost->rcvbuf = rte_ring_create("recv buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ);
        if (pstHost->rcvbuf == NULL) 
        {
            printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__);
			rte_free(pstHost);
			return -1;
		}
        pstHost->sndbuf = rte_ring_create("send buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ);
        if (pstHost->sndbuf == NULL) 
        {
            printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__);
            rte_ring_free(pstHost->rcvbuf);
			rte_free(pstHost);
			return -1;
		}

		rte_memcpy(&pstHost->cond, &pctCond, sizeof(pthread_cond_t));

		rte_memcpy(&pstHost->mutex, &pmtMutex, sizeof(pthread_mutex_t));

		LL_ADD(pstHost, g_pstHost);
    }
   	
    return iFd;
}

3.2 bind函数

bind函数的任务是将ip和端口信息绑定到socket函数创建的控制块结构当中。

int nbind(int sockfd, const struct sockaddr *addr, __attribute__((unused))  socklen_t addrlen)
{
    void *info = NULL;

    info = get_hostinfo_fromfd(sockfd);
    if(info == NULL) 
        return -1;

    struct localhost *pstHostInfo = (struct localhost *)info;
    if(pstHostInfo->protocol == IPPROTO_UDP)
    {
        const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)addr;
		pstHostInfo->localport = pstAddr->sin_port;
		rte_memcpy(&pstHostInfo->localip, &pstAddr->sin_addr.s_addr, sizeof(uint32_t));
		rte_memcpy(pstHostInfo->localmac, &g_stCpuMac, RTE_ETHER_ADDR_LEN);
    }

    return 0;
}

3.3 recvfrom函数

目前实现的recvfrom函数为阻塞式的,使用条件变量+互斥量等待接收队列中数据到来。

ssize_t nrecvfrom(int sockfd, void *buf, size_t len, __attribute__((unused))  int flags,
                        struct sockaddr *src_addr, __attribute__((unused))  socklen_t *addrlen)
{
    struct localhost *pstHostInfo = NULL;
    struct offload *pstOffLoad = NULL;
    struct sockaddr_in *pstAddr = NULL;
	unsigned char *pucPtr = NULL;
    int iLen = 0;
    int iRet = -1;

    pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd);
    if(pstHostInfo == NULL) 
        return -1;
    
    pthread_mutex_lock(&pstHostInfo->mutex);
    while((iRet = rte_ring_mc_dequeue(pstHostInfo->rcvbuf, (void**)&pstOffLoad)) < 0)
    {
        pthread_cond_wait(&pstHostInfo->cond, &pstHostInfo->mutex);
    }
    pthread_mutex_unlock(&pstHostInfo->mutex);

    pstAddr = (struct sockaddr_in *)src_addr;
    pstAddr->sin_port = pstOffLoad->sport;
    rte_memcpy(&pstAddr->sin_addr.s_addr, &pstOffLoad->sip, sizeof(uint32_t));

    if(len < pstOffLoad->length)
    {
        rte_memcpy(buf, pstOffLoad->data, len);

        pucPtr = rte_malloc("unsigned char *", pstOffLoad->length - len, 0);
		rte_memcpy(pucPtr, pstOffLoad->data + len, pstOffLoad->length - len);

		pstOffLoad->length -= len;
		rte_free(pstOffLoad->data);
		pstOffLoad->data = pucPtr;
		
		rte_ring_mp_enqueue(pstHostInfo->rcvbuf, pstOffLoad);

		return len;
    }

    iLen = pstOffLoad->length;
    rte_memcpy(buf, pstOffLoad->data, pstOffLoad->length);
    
    rte_free(pstOffLoad->data);
    rte_free(pstOffLoad);
    
    return iLen;
}   

3.4 sendto函数

sento函数则是将待发送数据封装成传输块,放入发送队列当中,交由协议栈发送至网卡。

ssize_t nsendto(int sockfd, const void *buf, size_t len, __attribute__((unused))  int flags,
                      const struct sockaddr *dest_addr, __attribute__((unused))  socklen_t addrlen)
{
    struct localhost *pstHostInfo = NULL;
    struct offload *pstOffLoad = NULL;
    const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)dest_addr;

    pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd);
    if(pstHostInfo == NULL) 
        return -1;

    pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);
	if (pstOffLoad == NULL) 
        return -1;

    pstOffLoad->dip = pstAddr->sin_addr.s_addr;
	pstOffLoad->dport = pstAddr->sin_port;
	pstOffLoad->sip = pstHostInfo->localip;
	pstOffLoad->sport = pstHostInfo->localport;
	pstOffLoad->length = len;

    /*
    struct in_addr addr;
	addr.s_addr = pstOffLoad->dip;
	printf("nsendto ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport));
    */
    
    pstOffLoad->data = rte_malloc("unsigned char *", len, 0);
	if (pstOffLoad->data == NULL) {
		rte_free(pstOffLoad);
		return -1;
	}

	rte_memcpy(pstOffLoad->data, buf, len);

	rte_ring_mp_enqueue(pstHostInfo->sndbuf, pstOffLoad);

	return len;
}

3.5 close函数

close函数则是将创建的控制块进行释放。

int nclose(int fd)
{
    void *info = NULL;

    info = (struct localhost *)get_hostinfo_fromfd(fd);
    if(info == NULL) 
        return -1;

    struct localhost *pstHostInfo = (struct localhost *)info;
    if(pstHostInfo->protocol == IPPROTO_UDP)
    {
        LL_REMOVE(pstHostInfo, g_pstHost);

        if (pstHostInfo->rcvbuf)
			rte_ring_free(pstHostInfo->rcvbuf);
		if (pstHostInfo->sndbuf) 
			rte_ring_free(pstHostInfo->sndbuf);

		rte_free(pstHostInfo);

		set_fd_frombitmap(fd);
    }

    return 0;
}

五、UDP包处理

5.1 协议栈接收

协议栈一直接收来自网卡的数据包,我们需要筛选出需要的协议数据,这主要是通过网络层中的IP数据头来分析,代码如下:

int pkt_process(void *arg)
{
    struct rte_mempool *pstMbufPool;
    int iRxNum;
	int i;
	struct rte_mbuf *pstMbuf[32];
	struct rte_ether_hdr *pstEthHdr;
    struct rte_ipv4_hdr *pstIpHdr;

    pstMbufPool = (struct rte_mempool *)arg;
    while(1)
    {
        iRxNum = rte_ring_mc_dequeue_burst(g_pstRingIns->pstInRing, (void**)pstMbuf, D_BURST_SIZE, NULL);
        
        if(iRxNum <= 0)
			continue;
        
        for(i = 0; i < iRxNum; ++i)
        {
            pstEthHdr = rte_pktmbuf_mtod_offset(pstMbuf[i], struct rte_ether_hdr *, 0);
            if (pstEthHdr->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))   //IPv4: 0800 
            {
                pstIpHdr = (struct rte_ipv4_hdr *)(pstEthHdr + 1);
                
				// 维护一个arp表
				ng_arp_entry_insert(pstIpHdr->src_addr, pstEthHdr->s_addr.addr_bytes);
                if(pstIpHdr->next_proto_id == IPPROTO_UDP) // udp 
                {
                    // udp process
                    udp_process(pstMbuf[i]);
                }
                else if(pstIpHdr->next_proto_id == IPPROTO_TCP)  // tcp
                {
                    printf("tcp_process ---\n");
					tcp_process(pstMbuf[i]);
                }
            }   
        }

        // to send
        udp_out(pstMbufPool);
        tcp_out(pstMbufPool);
    }
    return 0;
}

其中,对于UDP数据包而言,如果应用层已经创建了控制块,那么协议栈这边主要是组装好数据,再发送到控制块中的接收队列,并通知阻塞在recvfrom函数中的应用层进行处理。

int udp_process(struct rte_mbuf *pstUdpMbuf) 
{
    struct rte_ipv4_hdr *pstIpHdr;
    struct rte_udp_hdr *pstUdpHdr;
    struct localhost *pstHost;
    struct offload *pstOffLoad;

    pstIpHdr = rte_pktmbuf_mtod_offset(pstUdpMbuf, struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));
	pstUdpHdr = (struct rte_udp_hdr *)(pstIpHdr + 1);

    
	struct in_addr addr;
	addr.s_addr = pstIpHdr->src_addr;
	printf("udp_process ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstUdpHdr->src_port));
	

    pstHost = get_hostinfo_fromip_port(pstIpHdr->dst_addr, pstUdpHdr->dst_port, pstIpHdr->next_proto_id);
    if (pstHost == NULL) 
    {
		rte_pktmbuf_free(pstUdpMbuf);
		return -3;
	} 

    pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);
	if (pstOffLoad == NULL) 
    {
		rte_pktmbuf_free(pstUdpMbuf);
		return -1;
	}

    pstOffLoad->dip = pstIpHdr->dst_addr;
	pstOffLoad->sip = pstIpHdr->src_addr;
	pstOffLoad->sport = pstUdpHdr->src_port;
	pstOffLoad->dport = pstUdpHdr->dst_port;
    pstOffLoad->protocol = IPPROTO_UDP;
	pstOffLoad->length = ntohs(pstUdpHdr->dgram_len);
    pstOffLoad->data = rte_malloc("unsigned char*", pstOffLoad->length - sizeof(struct rte_udp_hdr), 0);
	if (pstOffLoad->data == NULL) 
    {
		rte_pktmbuf_free(pstUdpMbuf);
		rte_free(pstOffLoad);
		return -2;
	}

    rte_memcpy(pstOffLoad->data, (unsigned char *)(pstUdpHdr+1), pstOffLoad->length - sizeof(struct rte_udp_hdr));

	rte_ring_mp_enqueue(pstHost->rcvbuf, pstOffLoad);  // recv buffer

	pthread_mutex_lock(&pstHost->mutex);
	pthread_cond_signal(&pstHost->cond);
	pthread_mutex_unlock(&pstHost->mutex);

	rte_pktmbuf_free(pstUdpMbuf);

    return 0;
}

5.2 协议栈发送

协议栈遍历应用层控制块,如果发送队列中有数据,则协议栈的任务就是准确的发送给网卡,这包括广播arp包、组装标准UDP数据包,代码如下:

int udp_out(struct rte_mempool *pstMbufPool) 
{
    struct localhost *pstHost;

    for(pstHost = g_pstHost; pstHost != NULL; pstHost = pstHost->next)
    {
        struct offload *pstOffLoad = NULL;
        int iSendCnt = rte_ring_mc_dequeue(pstHost->sndbuf, (void **)&pstOffLoad);
        if(iSendCnt < 0) 
            continue;
        
        struct in_addr addr;
		addr.s_addr = pstOffLoad->dip;
		printf("udp_out ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport));

        unsigned char *dstmac = ng_get_dst_macaddr(pstOffLoad->dip); // 查询对端mac地址
		if (dstmac == NULL)  // 先广播发个arp包确定对端mac地址
        {
			struct rte_mbuf *pstArpbuf = ng_send_arp(pstMbufPool, RTE_ARP_OP_REQUEST, g_aucDefaultArpMac, 
				pstOffLoad->sip, pstOffLoad->dip);

			rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstArpbuf, 1, NULL);

			rte_ring_mp_enqueue(pstHost->sndbuf, pstOffLoad); // 将取出的udp数据再次写入队列
		} 
        else 
        {
			struct rte_mbuf *pstUdpbuf = ng_udp_pkt(pstMbufPool, pstOffLoad->sip, pstOffLoad->dip, 
                    pstOffLoad->sport, pstOffLoad->dport, pstHost->localmac, 
                    dstmac, pstOffLoad->data, pstOffLoad->length);

			rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstUdpbuf, 1, NULL);

			if (pstOffLoad->data != NULL)
				rte_free(pstOffLoad->data);
			
			rte_free(pstOffLoad);
		}
    }

    return 0;
}

5.3 项目地址及相关文章

项目地址:https://github.com/hjlogzw/DPDK-TCP-UDP_Protocol_Stack
DPDK——TCP/UDP协议栈服务端实现(一)

;