From ec61d20b29232a748e402757b9b433b0e79b0204 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Tue, 6 Jun 2023 00:35:26 +0300 Subject: [PATCH] minimal json parser for generated data --- Makefile | 17 +- src/json_parser.c | 393 ++++++++++++++++++++++++++++++++++++++++++++++ src/json_parser.h | 58 +++++++ src/main.c | 53 +++++++ 4 files changed, 515 insertions(+), 6 deletions(-) create mode 100644 src/json_parser.c create mode 100644 src/json_parser.h create mode 100644 src/main.c diff --git a/Makefile b/Makefile index fb71766..d36c479 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,17 @@ -CFLAGS=-g -Wall +CFLAGS=-lm -g -Wall -Walloc-size-larger-than=-1 -.PHONY := gen_data +.PHONY := gen_data main -gen_data: build/gen_data.exe +gen_data: build/gen_data +main: build/main -build/gen_data.exe: src/gen_data.c +build/gen_data: src/gen_data.c mkdir -p build - gcc -o build/gen_data.exe src/gen_data.c $(CFLAGS) -O2 + gcc -o build/gen_data src/gen_data.c $(CFLAGS) -O2 + +build/main: src/main.c src/json_parser.c + mkdir -p build + gcc -o build/main src/main.c $(CFLAGS) -O2 clean: - rm -r build \ No newline at end of file + rm -r build diff --git a/src/json_parser.c b/src/json_parser.c new file mode 100644 index 0000000..d8aa853 --- /dev/null +++ b/src/json_parser.c @@ -0,0 +1,393 @@ +#include +#include +#include +#include +#include + +#include "json_parser.h" + +#define ARRAY_LEN(x) (sizeof(x)/sizeof(x[0])) + +typedef uint8_t u8; + +void free_json_object(struct json_object *object) +{ + if (object == NULL) return; + + for (int i = 0; i < object->count; i++) { + free_json_string(object->keys[i]); + free_json_value(object->values[i]); + } + free(object->keys); + free(object->values); + free(object); +} + +void free_json_value(struct json_value *value) +{ + if (value == NULL) return; + + if (value->type == JSON_TYPE_OBJECT) { + free_json_object(value->object); + } else if (value->type == JSON_TYPE_ARRAY) { + free_json_array(value->array); + } else if (value->type == JSON_TYPE_STRING) { + free_json_string(value->string); + } + free(value); +} + +void free_json_string(struct json_string *string) +{ + if (string == NULL) return; + free(string->str); + free(string); +} + +void free_json_array(struct json_array *array) +{ + if (array == NULL) return; + for (int i = 0; i < array->count; i++) { + free_json_value(array->values[i]); + } + free(array->values); + free(array); +} + +static size_t inc_cursor(char **data, size_t *data_size, size_t amount) +{ + size_t bytes_parsed = 0; + if (amount > *data_size) { + bytes_parsed = *data_size; + (*data) += *data_size; + (*data_size) = 0; + } else { + bytes_parsed = amount; + (*data) += amount; + (*data_size) -= amount; + } + return bytes_parsed; +} + +static bool is_whitespace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\0'; +} + +static size_t skip_ws(char **data, size_t *data_size) +{ + size_t ws_count = 0; + while (data_size > 0) { + if (!is_whitespace(**data)) break; + inc_cursor(data, data_size, 1); + ws_count++; + } + return ws_count; +} + +static size_t get_json_string_size(char *data, size_t data_size) +{ + if (data_size == 0) return -1; + + assert(data[0] == '"'); + for (int i = 1; i < data_size; i++) { + bool is_escaped = false; + if (i > 2) { + is_escaped = data[i-1] == '\\' && data[i-2] != '\\'; + } else { + is_escaped = data[i-1] == '\\'; + } + + if (data[i] == '"' && !is_escaped) return i+1; + } + return -1; +} + +static u8 decode_hex(char hex) +{ + if ('a' <= hex && hex <= 'f') { + return hex - 'a' + 10; + } else if ('A' <= hex && hex <= 'F') { + return hex - 'A' + 10; + } else if ('0' <= hex && hex <= '9') { + return hex - '0'; + } else { + abort(); + } +} + +static char read_hex_symbol(char *data) +{ + u8 byte1 = decode_hex(data[0]); + u8 byte2 = decode_hex(data[1]); + u8 byte3 = decode_hex(data[2]); + u8 byte4 = decode_hex(data[3]); + return (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | (byte4 << 0); +} + +static bool expect_char(char *data, size_t data_size, char expected) +{ + if (data_size == 0) return false; + return data[0] == expected; +} + +static bool is_number_char(char c) +{ + char number_chars[] = "0123456789.-"; + for (size_t i = 0; i < ARRAY_LEN(number_chars)-1; i++) { + if (number_chars[i] == c) return true; + } + return false; +} + +static bool expect_number_char(char *data, size_t data_size) +{ + if (data_size == 0) return false; + return is_number_char(data[0]); +} + +int parse_json_value(struct json_value *result, char *data, size_t data_size) +{ + size_t bytes_parsed = 0; + bytes_parsed += skip_ws(&data, &data_size); + + if (expect_char(data, data_size, '{')) { + result->type = JSON_TYPE_OBJECT; + result->object = malloc(sizeof(struct json_object)); + int object_size = parse_json_object(result->object, data, data_size); + assert(object_size >= 0); + bytes_parsed += object_size; + } else if (expect_char(data, data_size, '[')) { + result->type = JSON_TYPE_ARRAY; + result->object = malloc(sizeof(struct json_array)); + int array_size = parse_json_array(result->array, data, data_size); + assert(array_size >= 0); + bytes_parsed += array_size; + } else if (expect_number_char(data, data_size)) { + result->type = JSON_TYPE_NUMBER; + int number_size = parse_json_number(&result->number, data, data_size); + assert(number_size >= 0); + bytes_parsed += number_size; + } else { + // TODO: number, null, boolean, string + assert(false && "todo"); + } + + return bytes_parsed; +} + +int parse_json_object(struct json_object *result, char *data, size_t data_size) +{ + assert(expect_char(data, data_size, '{')); + size_t bytes_parsed = 0; + bytes_parsed += inc_cursor(&data, &data_size, 1); + bytes_parsed += skip_ws(&data, &data_size); + + result->count = 0; + result->keys = NULL; + result->values = NULL; + + if (expect_char(data, data_size, '}')) { // Empty object + return bytes_parsed + 1; + } + + while (data_size > 0) { + int idx = result->count; + result->count++; + result->keys = realloc(result->keys, result->count*sizeof(struct json_string*)); + result->values = realloc(result->values, result->count*sizeof(struct json_value*)); + result->keys[idx] = malloc(sizeof(struct json_string)); + result->values[idx] = malloc(sizeof(struct json_value)); + + int key_size = parse_json_string(result->keys[idx], data, data_size); + assert(key_size >= 0); + + bytes_parsed += inc_cursor(&data, &data_size, key_size); + bytes_parsed += skip_ws(&data, &data_size); + assert(expect_char(data, data_size, ':')); + bytes_parsed += inc_cursor(&data, &data_size, 1); + bytes_parsed += skip_ws(&data, &data_size); + + int value_size = parse_json_value(result->values[idx], data, data_size); + assert(value_size >= 0); + + bytes_parsed += inc_cursor(&data, &data_size, value_size); + bytes_parsed += skip_ws(&data, &data_size); + + if (!expect_char(data, data_size, ',')) { + break; + } + + bytes_parsed += inc_cursor(&data, &data_size, 1); + bytes_parsed += skip_ws(&data, &data_size); + } + + assert(expect_char(data, data_size, '}')); + + return bytes_parsed+1; +} + +int parse_json_array(struct json_array *result, char *data, size_t data_size) +{ + assert(expect_char(data, data_size, '[')); + size_t bytes_parsed = 0; + bytes_parsed += inc_cursor(&data, &data_size, 1); + bytes_parsed += skip_ws(&data, &data_size); + + result->count = 0; + result->values = NULL; + + if (expect_char(data, data_size, ']')) { // Empty object + return bytes_parsed + 1; + } + + while (data_size > 0) { + int idx = result->count; + result->count++; + result->values = realloc(result->values, result->count*sizeof(struct json_value*)); + result->values[idx] = malloc(sizeof(struct json_value)); + + int value_size = parse_json_value(result->values[idx], data, data_size); + assert(value_size >= 0); + + bytes_parsed += inc_cursor(&data, &data_size, value_size); + bytes_parsed += skip_ws(&data, &data_size); + + if (!expect_char(data, data_size, ',')) { + break; + } + + bytes_parsed += inc_cursor(&data, &data_size, 1); + bytes_parsed += skip_ws(&data, &data_size); + } + + assert(expect_char(data, data_size, ']')); + + return bytes_parsed+1; +} + +int parse_json_number(f64 *result, char *data, size_t data_size) +{ + int bytes_parsed = 0; + for (int i = 0; i < data_size; i++) { + if (!is_number_char(data[i])) break; + bytes_parsed++; + } + + // TODO: `strtod` until first non number character + // Use alternative parsing, so `data_size` can be enforced + *result = strtod(data, NULL); + + return bytes_parsed; +} + +int parse_json_string(struct json_string *result, char *data, size_t data_size) +{ + assert(expect_char(data, data_size, '"')); + + int json_string_size = get_json_string_size(data, data_size); + + result->str = malloc((json_string_size-2)*sizeof(char)); + result->size = 0; + + for (size_t i = 1; i < data_size-1; i++) { + size_t str_idx = result->size; + if (data[i] == '"') { + break; + } else if (data[i] == '\\') { + if (data[i+1] == '\\') { + result->str[str_idx] = '\\'; + } else if (data[i+1] == '"') { + result->str[str_idx] = '"'; + } else if (data[i+1] == '/') { + result->str[str_idx] = '/'; + } else if (data[i+1] == 'b') { + result->str[str_idx] = '\b'; + } else if (data[i+1] == 'f') { + result->str[str_idx] = '\f'; + } else if (data[i+1] == 'n') { + result->str[str_idx] = '\n'; + } else if (data[i+1] == 'r') { + result->str[str_idx] = '\r'; + } else if (data[i+1] == 't') { + result->str[str_idx] = '\t'; + } else if (data[i+1] == 'u') { + result->str[str_idx] = read_hex_symbol(data + i + 2); + i+=4; + } else { + abort(); // Unknown escape sequence + } + i++; + } else { + result->str[str_idx] = data[i]; + } + result->size++; + } + + result->str = realloc(result->str, result->size*sizeof(char)); + + return json_string_size; +} + +int parse_json_null(char *data, size_t data_size) +{ + return 0; // TODO: +} + +int parse_json_bool(bool *result, char *data, size_t data_size) +{ + return 0; // TODO: +} + + +void printf_json_array(struct json_array *array) +{ + printf("["); + for (int i = 0; i < array->count; i++) { + printf_json_value(array->values[i]); + if (i < array->count-1) printf(","); + } + printf("]"); +} + +void printf_json_object(struct json_object *object) +{ + printf("{"); + for (int i = 0; i < object->count; i++) { + printf_json_string(object->keys[i]); + printf(":"); + printf_json_value(object->values[i]); + if (i < object->count-1) printf(","); + } + printf("}"); +} + +void printf_json_string(struct json_string *string) +{ + // TODO: Support to display escaped sequences + printf("\"%.*s\"", (int)string->size, string->str); +} + +void printf_json_value(struct json_value *value) +{ + switch(value->type) { + case JSON_TYPE_NULL: + printf("null"); + break; + case JSON_TYPE_OBJECT: + printf_json_object(value->object); + break; + case JSON_TYPE_ARRAY: + printf_json_array(value->array); + break; + case JSON_TYPE_STRING: + printf_json_string(value->string); + break; + case JSON_TYPE_NUMBER: + printf("%.16f", value->number); + break; + case JSON_TYPE_BOOLEAN: + printf("%s", value->boolean ? "true" : "false"); + break; + } +} diff --git a/src/json_parser.h b/src/json_parser.h new file mode 100644 index 0000000..de2ec70 --- /dev/null +++ b/src/json_parser.h @@ -0,0 +1,58 @@ +#include +#include + +typedef double f64; + +enum json_type { + JSON_TYPE_NULL, + JSON_TYPE_OBJECT, + JSON_TYPE_ARRAY, + JSON_TYPE_STRING, + JSON_TYPE_NUMBER, + JSON_TYPE_BOOLEAN, +}; + +struct json_string { + char *str; + size_t size; +}; + +struct json_array { + struct json_value **values; + size_t count; +}; + +struct json_object { + struct json_string **keys; + struct json_value **values; + size_t count; +}; + +struct json_value { + enum json_type type; + union { + bool boolean; + f64 number; + struct json_string *string; + struct json_array *array; + struct json_object *object; + }; +}; + +void free_json_value(struct json_value *value); +void free_json_array(struct json_array *array); +void free_json_string(struct json_string *string); +void free_json_object(struct json_object *object); + +int parse_json_null(char *data, size_t data_size); +int parse_json_bool(bool *result, char *data, size_t data_size); +int parse_json_number(f64 *result, char *data, size_t data_size); +int parse_json_value(struct json_value *result, char *data, size_t data_size); +int parse_json_object(struct json_object *result, char *data, size_t data_size); +int parse_json_array(struct json_array *result, char *data, size_t data_size); +int parse_json_string(struct json_string *result, char *data, size_t data_size); + +void printf_json_value(struct json_value *result); +void printf_json_string(struct json_string *string); +void printf_json_object(struct json_object *object); +void printf_json_array(struct json_array *array); diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..72ea432 --- /dev/null +++ b/src/main.c @@ -0,0 +1,53 @@ +#include +#include +#include + +#include "json_parser.c" + +void print_usage(char *program) +{ + printf("Usage: %s [test-bin-file]\n", program); +} + +size_t get_file_size(FILE *f) +{ + fseek(f, 0, SEEK_END); + size_t size = ftell(f); + fseek(f, 0, SEEK_SET); + return size; +} + +int main(int argc, char **argv) +{ + if (argc < 2) { + print_usage(argv[0]); + return -1; + } + + char *json_filename = argv[1]; + + FILE *f = fopen(json_filename, "r"); + if (f == NULL) { + printf("Failed to open: %s\n", json_filename); + return -1; + } + + size_t json_size = get_file_size(f); + char json_data[json_size]; + size_t bytes_read = fread(json_data, 1, json_size, f); + if (bytes_read != json_size) { + printf("Failed to read all contents of file\n"); + return -1; + } + fclose(f); + + struct json_value *parsed = malloc(sizeof(struct json_value)); + int bytes_parsed = parse_json_value(parsed, json_data, json_size); + + printf_json_value(parsed); + printf("\n"); + + free_json_value(parsed); + + return 0; +}