文章目录
一、概述
在《DPDK——TCP/UDP协议栈服务端实现(一)》中已经讲述TCP/UDP简易协议栈项目的框架和基本信息,本篇文章要完成的工作如下:
- udp包结构设计,包括udp控制块结构、协议栈与应用层传输包结构
- ARP表处理
- UDP套接字函数实现
- UDP包处理
二、UDP数据结构
如下图所示,UDP的数据结构主要分为两个部分:数据传输块与控制块。
UDP控制块是在应用层创建socket的同时生成的,主要包含一个发送队列和接收队列,线程同步变量,以及相关参数,如下:
struct localhost
{
int fd;
uint32_t localip; // ip --> mac
unsigned char localmac[RTE_ETHER_ADDR_LEN];
uint16_t localport;
unsigned char protocol;
struct rte_ring *sndbuf;
struct rte_ring *rcvbuf;
struct localhost *prev;
struct localhost *next;
pthread_cond_t cond;
pthread_mutex_t mutex;
};
传输块作为协议栈向UDP应用通信的数据封装,DPDK协议栈在收到网卡发送的数据后,按照传输块的结构来封装数据,并发送至UDP控制块中的接收队列,结构如下:
struct offload
{
uint32_t sip;
uint32_t dip;
uint16_t sport;
uint16_t dport;
int protocol;
unsigned char *data;
uint16_t length;
};
三、arp表处理
在网络传输中,ARP协议承担着“护送数据包最后一公里”的任务,因为在数据包到达局域网后,ip地址作为一个不可靠的标识,显然不能确保数据包具体送达目的主机,这个时候我们就要获知主机的MAC地址了。具体做法就是向局域网内同一个路由器下所有设备广播,内容就是“你们谁是ip为xxx.xxx.xxx.xxx的主机呀?看到消息请马上告知你的MAC地址,你的快递到了哈!”
所以,本项目也必须维护一个ip和mac映射的arp表,这样才能准确将数据包发给对方主机。
维护arp表的地方主要在两个地方:协议栈收到网卡数据包时存储ip和mac地址信息;协议栈发送数据包之前,查询arp表,若无mac地址,则先广播arp包。
主要函数如下:
int ng_arp_entry_insert(uint32_t ip, unsigned char *mac)
{
struct arp_table *pstTbl = arp_table_instance();
struct arp_entry *pstEntry = NULL;
unsigned char *pstHwaddr = NULL;
pstHwaddr = ng_get_dst_macaddr(ip);
if(pstHwaddr == NULL)
{
pstEntry = rte_malloc("arp_entry", sizeof(struct arp_entry), 0);
if (pstEntry)
{
memset(pstEntry, 0, sizeof(struct arp_entry));
pstEntry->ip = ip;
rte_memcpy(pstEntry->hwaddr, mac, RTE_ETHER_ADDR_LEN);
pstEntry->type = 0;
pthread_spin_lock(&pstTbl->spinlock);
LL_ADD(pstEntry, pstTbl->entries);
pstTbl->count ++;
pthread_spin_unlock(&pstTbl->spinlock);
}
return 1;
}
return 0;
}
四、UDP套接字函数实现
3.1 socket函数
该函数主要实现获取fd、创建控制块。
int nsocket(__attribute__((unused)) int domain, int type, __attribute__((unused)) int protocol)
{
int iFd;
struct localhost *pstHost;
pthread_cond_t pctCond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t pmtMutex = PTHREAD_MUTEX_INITIALIZER;
iFd = get_fd_frombitmap();
if(type == SOCK_DGRAM) // udp
{
pstHost = rte_malloc("localhost", sizeof(struct localhost), 0);
if(pstHost == NULL)
{
printf("[%s][%d]: rte_malloc fail!\n", __FUNCTION__, __LINE__);
return -1;
}
memset(pstHost, 0x00, sizeof(struct localhost));
pstHost->fd = iFd;
pstHost->protocol = IPPROTO_UDP;
pstHost->rcvbuf = rte_ring_create("recv buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ);
if (pstHost->rcvbuf == NULL)
{
printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__);
rte_free(pstHost);
return -1;
}
pstHost->sndbuf = rte_ring_create("send buffer", D_RING_SIZE, rte_socket_id(), RING_F_SP_ENQ | RING_F_SC_DEQ);
if (pstHost->sndbuf == NULL)
{
printf("[%s][%d]: rte_ring_create fail!\n", __FUNCTION__, __LINE__);
rte_ring_free(pstHost->rcvbuf);
rte_free(pstHost);
return -1;
}
rte_memcpy(&pstHost->cond, &pctCond, sizeof(pthread_cond_t));
rte_memcpy(&pstHost->mutex, &pmtMutex, sizeof(pthread_mutex_t));
LL_ADD(pstHost, g_pstHost);
}
return iFd;
}
3.2 bind函数
bind函数的任务是将ip和端口信息绑定到socket函数创建的控制块结构当中。
int nbind(int sockfd, const struct sockaddr *addr, __attribute__((unused)) socklen_t addrlen)
{
void *info = NULL;
info = get_hostinfo_fromfd(sockfd);
if(info == NULL)
return -1;
struct localhost *pstHostInfo = (struct localhost *)info;
if(pstHostInfo->protocol == IPPROTO_UDP)
{
const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)addr;
pstHostInfo->localport = pstAddr->sin_port;
rte_memcpy(&pstHostInfo->localip, &pstAddr->sin_addr.s_addr, sizeof(uint32_t));
rte_memcpy(pstHostInfo->localmac, &g_stCpuMac, RTE_ETHER_ADDR_LEN);
}
return 0;
}
3.3 recvfrom函数
目前实现的recvfrom函数为阻塞式的,使用条件变量+互斥量等待接收队列中数据到来。
ssize_t nrecvfrom(int sockfd, void *buf, size_t len, __attribute__((unused)) int flags,
struct sockaddr *src_addr, __attribute__((unused)) socklen_t *addrlen)
{
struct localhost *pstHostInfo = NULL;
struct offload *pstOffLoad = NULL;
struct sockaddr_in *pstAddr = NULL;
unsigned char *pucPtr = NULL;
int iLen = 0;
int iRet = -1;
pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd);
if(pstHostInfo == NULL)
return -1;
pthread_mutex_lock(&pstHostInfo->mutex);
while((iRet = rte_ring_mc_dequeue(pstHostInfo->rcvbuf, (void**)&pstOffLoad)) < 0)
{
pthread_cond_wait(&pstHostInfo->cond, &pstHostInfo->mutex);
}
pthread_mutex_unlock(&pstHostInfo->mutex);
pstAddr = (struct sockaddr_in *)src_addr;
pstAddr->sin_port = pstOffLoad->sport;
rte_memcpy(&pstAddr->sin_addr.s_addr, &pstOffLoad->sip, sizeof(uint32_t));
if(len < pstOffLoad->length)
{
rte_memcpy(buf, pstOffLoad->data, len);
pucPtr = rte_malloc("unsigned char *", pstOffLoad->length - len, 0);
rte_memcpy(pucPtr, pstOffLoad->data + len, pstOffLoad->length - len);
pstOffLoad->length -= len;
rte_free(pstOffLoad->data);
pstOffLoad->data = pucPtr;
rte_ring_mp_enqueue(pstHostInfo->rcvbuf, pstOffLoad);
return len;
}
iLen = pstOffLoad->length;
rte_memcpy(buf, pstOffLoad->data, pstOffLoad->length);
rte_free(pstOffLoad->data);
rte_free(pstOffLoad);
return iLen;
}
3.4 sendto函数
sento函数则是将待发送数据封装成传输块,放入发送队列当中,交由协议栈发送至网卡。
ssize_t nsendto(int sockfd, const void *buf, size_t len, __attribute__((unused)) int flags,
const struct sockaddr *dest_addr, __attribute__((unused)) socklen_t addrlen)
{
struct localhost *pstHostInfo = NULL;
struct offload *pstOffLoad = NULL;
const struct sockaddr_in *pstAddr = (const struct sockaddr_in *)dest_addr;
pstHostInfo = (struct localhost *)get_hostinfo_fromfd(sockfd);
if(pstHostInfo == NULL)
return -1;
pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);
if (pstOffLoad == NULL)
return -1;
pstOffLoad->dip = pstAddr->sin_addr.s_addr;
pstOffLoad->dport = pstAddr->sin_port;
pstOffLoad->sip = pstHostInfo->localip;
pstOffLoad->sport = pstHostInfo->localport;
pstOffLoad->length = len;
/*
struct in_addr addr;
addr.s_addr = pstOffLoad->dip;
printf("nsendto ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport));
*/
pstOffLoad->data = rte_malloc("unsigned char *", len, 0);
if (pstOffLoad->data == NULL) {
rte_free(pstOffLoad);
return -1;
}
rte_memcpy(pstOffLoad->data, buf, len);
rte_ring_mp_enqueue(pstHostInfo->sndbuf, pstOffLoad);
return len;
}
3.5 close函数
close函数则是将创建的控制块进行释放。
int nclose(int fd)
{
void *info = NULL;
info = (struct localhost *)get_hostinfo_fromfd(fd);
if(info == NULL)
return -1;
struct localhost *pstHostInfo = (struct localhost *)info;
if(pstHostInfo->protocol == IPPROTO_UDP)
{
LL_REMOVE(pstHostInfo, g_pstHost);
if (pstHostInfo->rcvbuf)
rte_ring_free(pstHostInfo->rcvbuf);
if (pstHostInfo->sndbuf)
rte_ring_free(pstHostInfo->sndbuf);
rte_free(pstHostInfo);
set_fd_frombitmap(fd);
}
return 0;
}
五、UDP包处理
5.1 协议栈接收
协议栈一直接收来自网卡的数据包,我们需要筛选出需要的协议数据,这主要是通过网络层中的IP数据头来分析,代码如下:
int pkt_process(void *arg)
{
struct rte_mempool *pstMbufPool;
int iRxNum;
int i;
struct rte_mbuf *pstMbuf[32];
struct rte_ether_hdr *pstEthHdr;
struct rte_ipv4_hdr *pstIpHdr;
pstMbufPool = (struct rte_mempool *)arg;
while(1)
{
iRxNum = rte_ring_mc_dequeue_burst(g_pstRingIns->pstInRing, (void**)pstMbuf, D_BURST_SIZE, NULL);
if(iRxNum <= 0)
continue;
for(i = 0; i < iRxNum; ++i)
{
pstEthHdr = rte_pktmbuf_mtod_offset(pstMbuf[i], struct rte_ether_hdr *, 0);
if (pstEthHdr->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) //IPv4: 0800
{
pstIpHdr = (struct rte_ipv4_hdr *)(pstEthHdr + 1);
// 维护一个arp表
ng_arp_entry_insert(pstIpHdr->src_addr, pstEthHdr->s_addr.addr_bytes);
if(pstIpHdr->next_proto_id == IPPROTO_UDP) // udp
{
// udp process
udp_process(pstMbuf[i]);
}
else if(pstIpHdr->next_proto_id == IPPROTO_TCP) // tcp
{
printf("tcp_process ---\n");
tcp_process(pstMbuf[i]);
}
}
}
// to send
udp_out(pstMbufPool);
tcp_out(pstMbufPool);
}
return 0;
}
其中,对于UDP数据包而言,如果应用层已经创建了控制块,那么协议栈这边主要是组装好数据,再发送到控制块中的接收队列,并通知阻塞在recvfrom函数中的应用层进行处理。
int udp_process(struct rte_mbuf *pstUdpMbuf)
{
struct rte_ipv4_hdr *pstIpHdr;
struct rte_udp_hdr *pstUdpHdr;
struct localhost *pstHost;
struct offload *pstOffLoad;
pstIpHdr = rte_pktmbuf_mtod_offset(pstUdpMbuf, struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));
pstUdpHdr = (struct rte_udp_hdr *)(pstIpHdr + 1);
struct in_addr addr;
addr.s_addr = pstIpHdr->src_addr;
printf("udp_process ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstUdpHdr->src_port));
pstHost = get_hostinfo_fromip_port(pstIpHdr->dst_addr, pstUdpHdr->dst_port, pstIpHdr->next_proto_id);
if (pstHost == NULL)
{
rte_pktmbuf_free(pstUdpMbuf);
return -3;
}
pstOffLoad = rte_malloc("offload", sizeof(struct offload), 0);
if (pstOffLoad == NULL)
{
rte_pktmbuf_free(pstUdpMbuf);
return -1;
}
pstOffLoad->dip = pstIpHdr->dst_addr;
pstOffLoad->sip = pstIpHdr->src_addr;
pstOffLoad->sport = pstUdpHdr->src_port;
pstOffLoad->dport = pstUdpHdr->dst_port;
pstOffLoad->protocol = IPPROTO_UDP;
pstOffLoad->length = ntohs(pstUdpHdr->dgram_len);
pstOffLoad->data = rte_malloc("unsigned char*", pstOffLoad->length - sizeof(struct rte_udp_hdr), 0);
if (pstOffLoad->data == NULL)
{
rte_pktmbuf_free(pstUdpMbuf);
rte_free(pstOffLoad);
return -2;
}
rte_memcpy(pstOffLoad->data, (unsigned char *)(pstUdpHdr+1), pstOffLoad->length - sizeof(struct rte_udp_hdr));
rte_ring_mp_enqueue(pstHost->rcvbuf, pstOffLoad); // recv buffer
pthread_mutex_lock(&pstHost->mutex);
pthread_cond_signal(&pstHost->cond);
pthread_mutex_unlock(&pstHost->mutex);
rte_pktmbuf_free(pstUdpMbuf);
return 0;
}
5.2 协议栈发送
协议栈遍历应用层控制块,如果发送队列中有数据,则协议栈的任务就是准确的发送给网卡,这包括广播arp包、组装标准UDP数据包,代码如下:
int udp_out(struct rte_mempool *pstMbufPool)
{
struct localhost *pstHost;
for(pstHost = g_pstHost; pstHost != NULL; pstHost = pstHost->next)
{
struct offload *pstOffLoad = NULL;
int iSendCnt = rte_ring_mc_dequeue(pstHost->sndbuf, (void **)&pstOffLoad);
if(iSendCnt < 0)
continue;
struct in_addr addr;
addr.s_addr = pstOffLoad->dip;
printf("udp_out ---> src: %s:%d \n", inet_ntoa(addr), ntohs(pstOffLoad->dport));
unsigned char *dstmac = ng_get_dst_macaddr(pstOffLoad->dip); // 查询对端mac地址
if (dstmac == NULL) // 先广播发个arp包确定对端mac地址
{
struct rte_mbuf *pstArpbuf = ng_send_arp(pstMbufPool, RTE_ARP_OP_REQUEST, g_aucDefaultArpMac,
pstOffLoad->sip, pstOffLoad->dip);
rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstArpbuf, 1, NULL);
rte_ring_mp_enqueue(pstHost->sndbuf, pstOffLoad); // 将取出的udp数据再次写入队列
}
else
{
struct rte_mbuf *pstUdpbuf = ng_udp_pkt(pstMbufPool, pstOffLoad->sip, pstOffLoad->dip,
pstOffLoad->sport, pstOffLoad->dport, pstHost->localmac,
dstmac, pstOffLoad->data, pstOffLoad->length);
rte_ring_mp_enqueue_burst(g_pstRingIns->pstOutRing, (void **)&pstUdpbuf, 1, NULL);
if (pstOffLoad->data != NULL)
rte_free(pstOffLoad->data);
rte_free(pstOffLoad);
}
}
return 0;
}
5.3 项目地址及相关文章
项目地址:https://github.com/hjlogzw/DPDK-TCP-UDP_Protocol_Stack
DPDK——TCP/UDP协议栈服务端实现(一)