Bootstrap

一种程序结构设计json,多线程,避免数据竞争


前言

我已经学会了C语言,json解析程序就是用C语言的基本语法写的,那么我应该可以解析json了

此外还提供了一种避免数据竞争的多线程程序


一、json解析

{
    "abc": 1,
    "a1": true,
    "a2":  "sd",
    "a3": [1, {"abc":4}],
    "a4":{}
}

设计一个数据结构,实现json的数据存储

节点设计:

enum TYPE{JINVAL, JOBJ, JARR, JSTR, JNUM, JBOOL};
struct JsonNode
{
    TYPE type;
    struct JsonNode *left;
    struct JsonNode *right;
    struct JsonNode *child;
    std::string key;
    double dval;
    bool dbool;
    std::string dstr;
    JsonNode() :type(JINVAL), left(0), right(0), child(0),
        dval(0), dbool(false){}
};

其中type为json的值类型,这里只解析5种:JOBJ, JARR, JSTR, JNUM, JBOOL

type为值类型,所有的节点都会有一个唯一的值类型,其它字段可能有也可能没有(key,val,*ptr等都不一定会有值)

以上json对应的数据结构为;

root.type=JOBJ
root.child===first1.type=JNUM
            first1.right===first2
            first1.key="abc"
            first1.val=1

            first2.type=JBOOL   
            first2.right===first3 
            first2.key="a1"    
            first2.val=true    

            first3.type=JSTR   
            first3.right===first4 
            first3.key="a2"    
            first3.val="sd"    

            first4.type=JARR  
            first4.right===first5 
            first4.child =============second1.type=JNUM
            first4.key="a3"           second1.right===second2
                                      second1.val=1
            first5.type=JOBJ
            first5.key="a4"           second2.type=JOBJ
                                      second2.child=============third1.type=JNUM
                                                                third1.key="abc"
                                                                third1.val=4
char* parse_json_value(char* ptr, JsonNode* node);
char* read_obj(char* ptr, JsonNode* node);
// { node->type = JOBJ; }
char* read_arr(char* ptr, JsonNode* node);
// { node->type = JARR; }
char* read_str(char* ptr, JsonNode* node);
// { node->type = JSTR; }
char* read_num(char* ptr, JsonNode* node);
// { node->type = JNUM; }
#include <stdio.h>
#include <string.h>
#include <string>
#include <iostream>

#define SKIP_SPACE(ptr) do { if (!(*(ptr)&&( *(ptr) == ' ' || *(ptr) == '\t' || *(ptr) == '\n') )) break; ++(ptr); } while(1)

/*

{
    "abc": 1,
    "a1": true,
    "a2":  "sd",
    "a3": [1, {"abc":4}],
    "a4":{}
}

*/
enum TYPE{JINVAL, JOBJ, JARR, JSTR, JNUM, JBOOL};
struct JsonNode
{
    TYPE type;
    struct JsonNode *left;
    struct JsonNode *right;
    struct JsonNode *child;
    std::string key;
    double dval;
    bool dbool;
    std::string dstr;
    JsonNode() :type(JINVAL), left(0), right(0), child(0),
        dval(0), dbool(false){}
};

char* parse_json_value(char* ptr, JsonNode* node);
char* read_obj(char* ptr, JsonNode* node);
char* read_arr(char* ptr, JsonNode* node);
char* read_str(char* ptr, JsonNode* node);
char* read_num(char* ptr, JsonNode* node);

JsonNode* new_jnode()
{
    return new (std::nothrow) JsonNode;
}
thread_local int json_error = 0;
char* read_obj(char* ptr, JsonNode* node)
{
    SKIP_SPACE(ptr);
    if (*ptr == 0 || *ptr++ != '{') return 0;
    node->type = JOBJ;
    SKIP_SPACE(ptr);
    if (*ptr == 0) return 0;
    if (*ptr == '}') return ptr+1;
    JsonNode* curr = new_jnode();
    node->child = curr;

    next_item:
    SKIP_SPACE(ptr);
    if (* ptr++ != '\"') return 0;
    while(*ptr != '\"')
    {
        curr->key += *ptr++;
    }
    curr->type = JSTR;
    SKIP_SPACE(ptr);
    if (*ptr == 0 || *ptr++ != '\"')
    {
        return 0;
    }
    SKIP_SPACE(ptr);
    if (*ptr == 0 || *ptr++ != ':')
    {
        return 0;
    }
    SKIP_SPACE(ptr);

    ptr = parse_json_value(ptr, curr);

    SKIP_SPACE(ptr);
    if (*ptr == ',')
    {
        ++ptr;
        JsonNode* next_node = new_jnode();
        curr->right = next_node;
        next_node->left = curr;
        curr = next_node;
        SKIP_SPACE(ptr);
        goto next_item;
    }

    if (*ptr == 0 || * ptr++ != '}') return 0;
    return ptr;
}
char* read_arr(char* ptr, JsonNode* node)
{
    if ( * ptr++ != '[') return 0;
    node->type = JARR;
    SKIP_SPACE(ptr);
    if (*ptr == 0) return 0;
    if (*ptr == ']') return ptr+1;
    JsonNode* curr = new_jnode();
    node->child = curr;
    ptr = parse_json_value(ptr, curr);
    SKIP_SPACE(ptr);

    while (*ptr != ']')
    {
        if (*ptr++ != ',') return 0;
        SKIP_SPACE(ptr);
        JsonNode* next_node = new_jnode();
        curr->right = next_node;
        next_node->left = curr;
        curr = next_node;
        ptr = parse_json_value(ptr, curr);
        SKIP_SPACE(ptr);
    }
    
    SKIP_SPACE(ptr);
    if (*ptr == 0 || * ptr++ != ']') return 0;
    return ptr;
}
char* read_str(char* ptr, JsonNode* node)
{
    if (*ptr == 0 || *ptr++ != '\"') return 0;
    node->type = JSTR;
    char* str_start = ptr;
    while (* ptr++ != '\"') ; // pass
    size_t size = ptr - str_start;
    node->dstr = std::string(str_start, size-1);
    return ptr;
}
char* read_num(char* ptr, JsonNode* node)
{
    SKIP_SPACE(ptr);
    if (*ptr == 0 || *ptr > 57 || *ptr < 48) return 0;
    node->type = JNUM;
    char* curr_num = ptr;
    while (*ptr >= 48 && *ptr <= 57)
    {
        ++ptr;
    }
    size_t size = ptr - curr_num;
    std::string tmp(curr_num, size);
    node->dval = std::atof(tmp.c_str());
    
    SKIP_SPACE(ptr);
    return ptr;
}
char* parse_json_value(char* ptr, JsonNode* node)
{
    if (*ptr == 0) return 0;
    SKIP_SPACE(ptr);

    if (*ptr == '{')
    {
        ptr = read_obj(ptr, node);
    }
    else if (*ptr == '[')
    {
        ptr = read_arr(ptr, node);
    }
    else if (*ptr == '\"')
    {
        ptr = read_str(ptr, node);
    }
    else if (strncmp(ptr, "true", 4) == 0)
    {
        ptr += 4;
        node->type = JBOOL;
        node->dbool = true;
    }
    else if (strncmp(ptr, "false", 5) == 0)
    {
        ptr += 5;
        node->type = JBOOL;
        node->dbool = false;
    }
    else if ( *ptr >= 48 && *ptr <= 57 )
    {
        ptr = read_num(ptr, node);
    }
    else
    {
        return 0;
    }
    SKIP_SPACE(ptr);
    return ptr;
}
JsonNode* parse_json(char* ptr)
{
    JsonNode* root = new_jnode();
    ptr = parse_json_value(ptr, root);
    return root;
}

int main() {

    char * buf = ""
    "{"
   " \"abc\": 1,"
    "\"a1\": false,"
    "\"a2\":  \"sd\","
    "\"a3\": [1, {\"abc\":4}],"
    "\"a4\":{}"
    "}";
    printf("%s\n", buf);
    printf("jsdlkjfl\n");
    JsonNode* root = parse_json(buf);
    // printf("%s: %f\n", root->child->key.c_str(), root->child->dval);
    // printf("%s: %d\n", root->child->right->key.c_str(), root->child->right->dbool);
    printf("hello\n");
    auto first = root->child;
    for (int i=0; i < 5; ++i)
    {
        printf("[%d]\n", i);
        if (i == 0) std::cout << first->key << " : " << first->dval << std::endl;
        if (i == 1) std::cout << first->key << " : " << first->dbool << std::endl;
        if (i == 2) std::cout << first->key << " : " << first->dstr << std::endl;
        if (i == 3)
        {
            std::cout << first->key << " :[" << first->child->dval << first->child->right->child->key << first->child->right->child->dval << std::endl;
        }
        if (i == 4)
        {
            std::cout << "a4: " << first->key << " : " << first->child << first->left << first->right << "type:" << first->type << std::endl;
        }

        first = first->right;
    }

    return 0;
}

二、通讯协议设计

                                       data1                                          datan                                    
                                      ┌────────────────┐──────┌──────────────────┐   ┌───────────────┐                         
                                      │                │      │                  │   │               │                         
                                      │                │      │                  │   │               │                         
    ┌─────────────────┌───────────────│                │      │                  │   │               │                         
    │                 │               │                │      │                  │   │               ┌──────────┐              
    │                 │               │                │      │                  │   │               │          │              
    │                 │               │                │      │                  │   │               │          │              
    │                 │               │  DATA TYPE     │length│      DATA        │   │  DATA TYPE2   │          │              
    │   version       │   number      │                │      │                  │...│               │ CRC      │              
    │                 │   of          │                │      │                  │   │               │          │              
    │                 │   data        │                │      │                  │   │               │          │              
    │                 │               │                │      │                  │   │               │          │              
    │                 │               │                │      │                  │   │               └──────────┘              
    │                 │               │                │      │                  │   │               │                         
    └─────────────────└───────────────│                │      │                  │   │               │                         
                                      │                │      │                  │   │               │                         
                                      │                │      │                  │   │               │                         
                                      └────────────────┘──────┼──────────────────┤   └───────────────┘                         
                                                              │                  │                                             
                                                              │                  │                                             
                                     ┌────────────────────────┘                  └───────────────────────────────┐             
                                     │                                                                           │             
                                     │                                                                           │             
                                     ▼──────────┬───────────┬────────────┐────────┌─────────┬───────────┬────────▼             
                                     │          │           │            │        │         │           │        │             
                                     │          │           │            │        │         │           │        │             
                                     │ timestamp│   length  │  real-data │ ...... │timestamp│ length    │real-d  │             
                                     │          │           │            │        │         │           │        │             
                                     │          │           │            │        │         │           │        │             
                                     │          │           │            │        │         │           │        │             
                                     └──────────┴───────────┴────────────┘────────└─────────┴───────────┴────────┘             
                                                                                                       

设计一个数据结构,实现上图的数据存储格式

struct CacheNode{
  struct CacheNode* next;
  uint64_t timestamp;
  uint32_t data_length;
  uint8_t data[0];
};
struct CacheList{
  struct CacheNode* used_node_head;
  struct CacheNode* used_node_tail;  // 并非尾后
  struct CacheNode* free_node_head;
  uint32_t          used_node_cnt;
};
struct CacheData{
  uint32_t data_type; // static  <--> topic
  uint32_t data_len;  // dynamic
  struct CacheList cache_lists;
  uint32_t data_length; // static datalength equal to CacheNode.data_length
};
struct TaskInfo;
#define OK 0
#define NOTOK 1
struct Package{
  struct TaskInfo *task_info;
  uint32_t version; // static
  uint32_t num;  // number of data, 由实际数据计算得到
  // uint32_t real_num; // // number of data, dynamic
  uint64_t timestamp;
  int status; // 0:OK  1NOTOK
  struct CacheData* cache_data; // 可考虑改为二级指针,如果CacheData很大的话
};

有以下配置文件

{
    "task":[
        {
            "name": "task1",
            "version": 1010,
            "flags":0,
            "collect_time": 30,
            "datas":[
                {
                    "topic":"CEMERA",
                    "data type": 34,
                    "data":[
                        32, // offset
                        8,  // len
                        64, // offset
                        4   // len
                    ]
                },{
                    "topic":"RADAR",
                    "data type": 35,
                    "data":[
                        32, // offset
                        64   // len
                    ]
                },
                {
                    "topic":"SENSOR",
                    "data type": 36,
                    "data":[
                        0, // offset
                        64,   // len
                        128, //offset
                        4  // len
                    ]
                }
            ]
        },
        {
            "name": "task2",
            "version": 1010,
            "flags":0,
            "collect_time": 5,
            "datas":[
                {
                    "topic":"CEMERA",
                    "data type": 34,
                    "data":[
                        32, // offset
                        8  // len
                    ]
                },
                {
                    "topic":"SENSOR",
                    "data type": 36,
                    "data":[
                        0, // offset
                        64,   // len
                        128, //offset
                        8  // len
                    ]
                }
            ]
        }
    ]
}

其中一个task对应一个Package,以上有两个不同的task

下面需要设计一种数据结构,可以方便的表示上述的json配置

struct DataUnit{
  uint32_t offset;
  uint32_t len;
};
struct RegTopicInfo{
  const char* topic;
  uint32_t data_type;
  uint32_t datas_number;
  struct DataUnit datas[0];
};
struct TaskInfo{
  const char* source_topic;
  struct Package* package;
  const char* task_name;
  uint32_t version;
  uint32_t flags;
  uint32_t collect_half_time;
  uint32_t task_counter;
  uint32_t topic_number;
  struct RegTopicInfo** topics;
};

上面的结构完全表示了json的配置,只需读取一次

一个task对应着一个package

解析json获取配置信息

RegTopicInfo* parse_topic(const Value& task_obj)
{
    RegTopicInfo* topic_info;
    uint32_t total_len_size = 0;
    int data_unit_number = task_obj["data"].Size()/2;
    topic_info = (RegTopicInfo*)malloc(
        sizeof(*topic_info) + data_unit_number * sizeof(DataUnit)
    );
    if (topic_info == nullptr) return nullptr;
    topic_info->topic ; // = task_obj["topic"]
    topic_info->data_type;
    topic_info->datas_number = task_obj["data"].Size()/2;
    for (int i=0; i < topic_info->datas_number; ++i)
    {
        topic_info->datas[i].offset = task_obj["data"][2*i];
        topic_info->datas[i].len = task_obj["data"][2*i+1];
    }
    return topic_info;
}
TaskInfo* parse_task_cfg(const Value& task_obj)
{
    TaskInfo* task = (TaskInfo*)malloc(sizeof(*task));
    if (task == nullptr) return 0;
    memset(task, 0, sizeof(*task));
    task->task_name ;// task_obj["topic"];
    task->version;
    task->flags;
    task->collect_half_time;
    task->topic_number ;// task_obj["datas"].size()
    task->topics = (RegTopicInfo**)malloc(sizeof(* task->topics) * task->topic_number);
    if (task->topics == nullptr) goto error0;
    for (int i=0; i < task->topic_number; ++i)
    {
        task->topics[i] = parse_topic(task_obj);
    }
    return task;
  error0:
    free(task);
    return 0;
}
int get_task_infos(struct TaskInfo*** _infos)
{
    struct TaskInfo** taskinfos;
    //json parse
    int task_number = 2;
    Document document; 
    const Value& tasks_arr = document["tasks"];
    //
    taskinfos = (TaskInfo**)malloc(sizeof(*taskinfos) * task_number);
    if (taskinfos == nullptr) return 0;
    for (int i=0; i < task_number; ++task_number)
    {
        taskinfos[i] = parse_task_cfg(tasks_arr[i]);
    }
    *_infos = taskinfos;
    return task_number;
}

注册相关信息

uint32_t get_recv_data_len_from_taskinfo();
void create_task(struct TaskInfo* taskinfo, int i)
{
    if (taskinfo == nullptr) return;
    Package* package = (Package*)malloc(sizeof(*package));
    taskinfo->package = package;
    if (package == nullptr) return;
    memset(package, 0, sizeof(*package));
    package->task_info = taskinfo;

    package->num;
    package->cache_data = (CacheData*)malloc(sizeof(CacheData)*package->num);
    if (!package->cache_data) return;
    for (int j=0; j<package->num; ++j)
    {
        auto& cache_data = package->cache_data[j];
        cache_data.data_type;
        cache_data.data_len = get_recv_data_len_from_taskinfo();
        cache_data.cache_lists;
    }
    // do some configuration
    if ( taskinfo->source_topic )
    {
        // register source topic
    }
    for (size_t j = 0; j < package->num; j++)
    {
        package->cache_data[i].data_type; // register topic
    }
    // push on_listen_source_topic into thread
    // push on_listen_topic into thread
    // start task running
}

将收到的数据存入数据结构

int get_topic_data();
void on_listen_source_topic(struct TaskInfo** task)
{
    if (*task == nullptr) return;
    int next_step_flag = 0;
    int data = get_topic_data();
    if (data == 1)
    {
        next_step_flag = 1;
    }
    else if (data == 2)
    {
        next_step_flag = 1;
    }
    if (next_step_flag)
    {
        (*task)->package->timestamp = time();
    }
}
void collection_package(Package* package, struct TaskInfo** task)
{
	do_package(package);
	do_anything;
	// 昨晚所有工作后将task设为正常值,然后on_listen_topic将正常调用,从而避免了数据竞争问题
	*task = gtask; 
}
void start_collection_package(Package* package, struct TaskInfo** task)
{
	std::thread(collection_package, package, task).detatch();
}
char* get_data_from_external(int);
int get_topic_idx_on_package_cache(Package*package, int topic_from_external)
{
    for (int i=0; i < package->num; ++i)
    {
        if ( package->cache_data[i].data_type == topic_from_external)
        {
            return i;
        }
    }
    return -1;
}
int push_data_to_package(Package* package, int idx, char* data, int time_duration)
{
    strut timespec ts;
    timespec_get(&ts, TIME_UTC); // C11中引入的新的提高高精度时间的函数
    if ( package->status == NOTOK) return NOTOK;
    if ( package->timestamp != 0)  // on call ready to end cacheing
    {
    	if ( ts - package > time_duration )
    	{
			return NOTOK;
        }
    }
    if ( package->status == OK)
    {
		free_package_expired_data(package->cache_data[idx], ts, time_duration);
    }
    struct CacheNode* ndoe = get_cache_new_node(package->cache_data[idx]);
    if (node != nullptr)
    {
		copy_data_to_node(package, idx, node, data, ts);
		push_node_into_list(package, idx, node);
	}
	return OK;
}
int push_data_into_package(Package* package, int topic_from_external)
{
    char* data = get_data_from_external(topic_from_external);
    int idx = get_topic_idx_on_package_cache(package, topic_from_external);
    if (idx < 0) return -1;
    return push_data_to_package(package, idx, data, package->task_info->collect_half_time);
}
// 假定只有一个线程,应当规避数据竞争问题
void on_listen_topic(struct TaskInfo** task, int topic_from_external, xxx)
{
    if ((*task) == nullptr) return;
    if ((*task)->package == nullptr) return;
    Package* package = (*task)->package;
    // 根据返回值判断当前的状态,
    int ret = push_data_into_package(package, topic_from_external);  // return OK / NOTOK
    if (ret == NOTOK)
    {   // stop cache data
        *task == nullptr;
        start_collection_package(package, task);
    }
}
void running_thread()
{
    ; // pass
}

aux func

void free_package_expired_data(struct CacheData& cache_data, uint64_t curr_ms, uint64_t duration)
{
    struct CacheNode* node = cache_data.cache_lists.used_node_head;
    while(node != nullptr)
    {
        if (curr_ms - node->timestamp > duration)
        {
 		    CacheNode next = node->next;
 		    node->next = cache_data.cache_lists.free_node_head;
 		    cache_data.cache_lists.free_node_head = node;
 		    node = next;
 		    cache_data.cache_lists.used_node_head = next;
 		    cache_data.cache_lists.used_node_cnt--;
 		}
 		else break;
    }
}
struct CacheNode* get_cache_new_node((struct CacheData& cache_data)
{
    if (cache_data.cache_lists.free_node_head != nullptr)
    {
    	auto node = cache_data.cache_lists.free_node_head;
    	cache_data.cache_lists.free_node_head = node->next;
    	node->next = nullptr;
    	return node;
    }
    CacheNode* node = (CacheNode*)malloc(sizeof(CacheNode) + cache_data.data_length);
    if (ndoe != nullptr)
    {
		node->next = nullptr;
	}
	return node;
}
int copy_data_to_node(Package* package, int idx, CacheNode* node, char* data, uint64_t ts)
{
	uint8_t* dst = node->data;
	struct RegTopicInfo* topic_info = package->task_info->topics[idx];
	node->timestamp = ts;
	node->data_length = package->cache_data[idx].data_length;
	assert(dst != 0);
	for (uint32_t i = 0; i < topic_info->datas_number; ++i)
	{
		auto& offset_len = topic_info->datas[i];
		memcpy(dst, data + offset_len.offset, offset_len.len);
		dst += offset_len.len;
	}
	return 0;
}
void push_node_into_list(Package* package, int idx, CacheNode* node)
{
    auto& cache_lists = package->cache_data[idx].cache_lists;
    node->next = nullptr;
    if (cache_lists.used_node_head == nullptr)
    {
    	cache_lists.used_node_head = node;
    }
    else
    {
    	cache_lists.used_node_tail->next = node;
    }
    cache_lists.used_node_tail = node;
    cache_lists.used_node_cnt++;
}

数据序列化

void do_package(Package* package)
{
	string out;
	out.append( &package->version, sizeof(package->version));
	out.append( &package->num , sizeof(package->num )); // todo: calculen 
	for (int i = 0; i < cache_data_number; ++i){
		uint32_t data_len = 0;
		auto& cache_data = package->cache_data[i];
		out.append(&cache_data.data_type, sizeof(cache_data.data_type));
		data_len = cache_data.cache_lists.used_node_cnt*( 8+4+cache_data.data_length );
		out.append(&data_len, sizeof(data_len));
		struct CacheNode* node = cache_data.cache_lists.used_node_head;
		for (int i = 0; i < cache_data.cache_lists.used_node_cnt; ++i)
		{
			struct CacheNode* node = cache_data.cache_lists.used_node_head;
			out.append(node, sizeof(node->timestamp) + sizeof(node->data_length) + node->data_length );
			node = node->next;
		}
		cache_data.used_node_tail->next = cache_data.free_node_head;
		cache_data.free_node_head = cache_data.used_node_head;
		cache_data.used_node_head = cache_data.used_node_tail = nullptr;
	}
	uint32_t crc = CalcuCrc32(0, out.c_str(), out.size());
	out.append(&crc, sizeof(crc));
}

总结

a TASK —— package —— once thread —— avoid data race

This is a good design

;