1
0

minimal json parser for generated data

This commit is contained in:
Rokas Puzonas 2023-06-06 00:35:26 +03:00
parent 97be659f53
commit ec61d20b29
4 changed files with 515 additions and 6 deletions

View File

@ -1,12 +1,17 @@
CFLAGS=-g -Wall
CFLAGS=-lm -g -Wall -Walloc-size-larger-than=-1
.PHONY := gen_data
.PHONY := gen_data main
gen_data: build/gen_data.exe
gen_data: build/gen_data
main: build/main
build/gen_data.exe: src/gen_data.c
build/gen_data: src/gen_data.c
mkdir -p build
gcc -o build/gen_data.exe src/gen_data.c $(CFLAGS) -O2
gcc -o build/gen_data src/gen_data.c $(CFLAGS) -O2
build/main: src/main.c src/json_parser.c
mkdir -p build
gcc -o build/main src/main.c $(CFLAGS) -O2
clean:
rm -r build

393
src/json_parser.c Normal file
View File

@ -0,0 +1,393 @@
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include "json_parser.h"
#define ARRAY_LEN(x) (sizeof(x)/sizeof(x[0]))
typedef uint8_t u8;
void free_json_object(struct json_object *object)
{
if (object == NULL) return;
for (int i = 0; i < object->count; i++) {
free_json_string(object->keys[i]);
free_json_value(object->values[i]);
}
free(object->keys);
free(object->values);
free(object);
}
void free_json_value(struct json_value *value)
{
if (value == NULL) return;
if (value->type == JSON_TYPE_OBJECT) {
free_json_object(value->object);
} else if (value->type == JSON_TYPE_ARRAY) {
free_json_array(value->array);
} else if (value->type == JSON_TYPE_STRING) {
free_json_string(value->string);
}
free(value);
}
void free_json_string(struct json_string *string)
{
if (string == NULL) return;
free(string->str);
free(string);
}
void free_json_array(struct json_array *array)
{
if (array == NULL) return;
for (int i = 0; i < array->count; i++) {
free_json_value(array->values[i]);
}
free(array->values);
free(array);
}
static size_t inc_cursor(char **data, size_t *data_size, size_t amount)
{
size_t bytes_parsed = 0;
if (amount > *data_size) {
bytes_parsed = *data_size;
(*data) += *data_size;
(*data_size) = 0;
} else {
bytes_parsed = amount;
(*data) += amount;
(*data_size) -= amount;
}
return bytes_parsed;
}
static bool is_whitespace(char c)
{
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\0';
}
static size_t skip_ws(char **data, size_t *data_size)
{
size_t ws_count = 0;
while (data_size > 0) {
if (!is_whitespace(**data)) break;
inc_cursor(data, data_size, 1);
ws_count++;
}
return ws_count;
}
static size_t get_json_string_size(char *data, size_t data_size)
{
if (data_size == 0) return -1;
assert(data[0] == '"');
for (int i = 1; i < data_size; i++) {
bool is_escaped = false;
if (i > 2) {
is_escaped = data[i-1] == '\\' && data[i-2] != '\\';
} else {
is_escaped = data[i-1] == '\\';
}
if (data[i] == '"' && !is_escaped) return i+1;
}
return -1;
}
static u8 decode_hex(char hex)
{
if ('a' <= hex && hex <= 'f') {
return hex - 'a' + 10;
} else if ('A' <= hex && hex <= 'F') {
return hex - 'A' + 10;
} else if ('0' <= hex && hex <= '9') {
return hex - '0';
} else {
abort();
}
}
static char read_hex_symbol(char *data)
{
u8 byte1 = decode_hex(data[0]);
u8 byte2 = decode_hex(data[1]);
u8 byte3 = decode_hex(data[2]);
u8 byte4 = decode_hex(data[3]);
return (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | (byte4 << 0);
}
static bool expect_char(char *data, size_t data_size, char expected)
{
if (data_size == 0) return false;
return data[0] == expected;
}
static bool is_number_char(char c)
{
char number_chars[] = "0123456789.-";
for (size_t i = 0; i < ARRAY_LEN(number_chars)-1; i++) {
if (number_chars[i] == c) return true;
}
return false;
}
static bool expect_number_char(char *data, size_t data_size)
{
if (data_size == 0) return false;
return is_number_char(data[0]);
}
int parse_json_value(struct json_value *result, char *data, size_t data_size)
{
size_t bytes_parsed = 0;
bytes_parsed += skip_ws(&data, &data_size);
if (expect_char(data, data_size, '{')) {
result->type = JSON_TYPE_OBJECT;
result->object = malloc(sizeof(struct json_object));
int object_size = parse_json_object(result->object, data, data_size);
assert(object_size >= 0);
bytes_parsed += object_size;
} else if (expect_char(data, data_size, '[')) {
result->type = JSON_TYPE_ARRAY;
result->object = malloc(sizeof(struct json_array));
int array_size = parse_json_array(result->array, data, data_size);
assert(array_size >= 0);
bytes_parsed += array_size;
} else if (expect_number_char(data, data_size)) {
result->type = JSON_TYPE_NUMBER;
int number_size = parse_json_number(&result->number, data, data_size);
assert(number_size >= 0);
bytes_parsed += number_size;
} else {
// TODO: number, null, boolean, string
assert(false && "todo");
}
return bytes_parsed;
}
int parse_json_object(struct json_object *result, char *data, size_t data_size)
{
assert(expect_char(data, data_size, '{'));
size_t bytes_parsed = 0;
bytes_parsed += inc_cursor(&data, &data_size, 1);
bytes_parsed += skip_ws(&data, &data_size);
result->count = 0;
result->keys = NULL;
result->values = NULL;
if (expect_char(data, data_size, '}')) { // Empty object
return bytes_parsed + 1;
}
while (data_size > 0) {
int idx = result->count;
result->count++;
result->keys = realloc(result->keys, result->count*sizeof(struct json_string*));
result->values = realloc(result->values, result->count*sizeof(struct json_value*));
result->keys[idx] = malloc(sizeof(struct json_string));
result->values[idx] = malloc(sizeof(struct json_value));
int key_size = parse_json_string(result->keys[idx], data, data_size);
assert(key_size >= 0);
bytes_parsed += inc_cursor(&data, &data_size, key_size);
bytes_parsed += skip_ws(&data, &data_size);
assert(expect_char(data, data_size, ':'));
bytes_parsed += inc_cursor(&data, &data_size, 1);
bytes_parsed += skip_ws(&data, &data_size);
int value_size = parse_json_value(result->values[idx], data, data_size);
assert(value_size >= 0);
bytes_parsed += inc_cursor(&data, &data_size, value_size);
bytes_parsed += skip_ws(&data, &data_size);
if (!expect_char(data, data_size, ',')) {
break;
}
bytes_parsed += inc_cursor(&data, &data_size, 1);
bytes_parsed += skip_ws(&data, &data_size);
}
assert(expect_char(data, data_size, '}'));
return bytes_parsed+1;
}
int parse_json_array(struct json_array *result, char *data, size_t data_size)
{
assert(expect_char(data, data_size, '['));
size_t bytes_parsed = 0;
bytes_parsed += inc_cursor(&data, &data_size, 1);
bytes_parsed += skip_ws(&data, &data_size);
result->count = 0;
result->values = NULL;
if (expect_char(data, data_size, ']')) { // Empty object
return bytes_parsed + 1;
}
while (data_size > 0) {
int idx = result->count;
result->count++;
result->values = realloc(result->values, result->count*sizeof(struct json_value*));
result->values[idx] = malloc(sizeof(struct json_value));
int value_size = parse_json_value(result->values[idx], data, data_size);
assert(value_size >= 0);
bytes_parsed += inc_cursor(&data, &data_size, value_size);
bytes_parsed += skip_ws(&data, &data_size);
if (!expect_char(data, data_size, ',')) {
break;
}
bytes_parsed += inc_cursor(&data, &data_size, 1);
bytes_parsed += skip_ws(&data, &data_size);
}
assert(expect_char(data, data_size, ']'));
return bytes_parsed+1;
}
int parse_json_number(f64 *result, char *data, size_t data_size)
{
int bytes_parsed = 0;
for (int i = 0; i < data_size; i++) {
if (!is_number_char(data[i])) break;
bytes_parsed++;
}
// TODO: `strtod` until first non number character
// Use alternative parsing, so `data_size` can be enforced
*result = strtod(data, NULL);
return bytes_parsed;
}
int parse_json_string(struct json_string *result, char *data, size_t data_size)
{
assert(expect_char(data, data_size, '"'));
int json_string_size = get_json_string_size(data, data_size);
result->str = malloc((json_string_size-2)*sizeof(char));
result->size = 0;
for (size_t i = 1; i < data_size-1; i++) {
size_t str_idx = result->size;
if (data[i] == '"') {
break;
} else if (data[i] == '\\') {
if (data[i+1] == '\\') {
result->str[str_idx] = '\\';
} else if (data[i+1] == '"') {
result->str[str_idx] = '"';
} else if (data[i+1] == '/') {
result->str[str_idx] = '/';
} else if (data[i+1] == 'b') {
result->str[str_idx] = '\b';
} else if (data[i+1] == 'f') {
result->str[str_idx] = '\f';
} else if (data[i+1] == 'n') {
result->str[str_idx] = '\n';
} else if (data[i+1] == 'r') {
result->str[str_idx] = '\r';
} else if (data[i+1] == 't') {
result->str[str_idx] = '\t';
} else if (data[i+1] == 'u') {
result->str[str_idx] = read_hex_symbol(data + i + 2);
i+=4;
} else {
abort(); // Unknown escape sequence
}
i++;
} else {
result->str[str_idx] = data[i];
}
result->size++;
}
result->str = realloc(result->str, result->size*sizeof(char));
return json_string_size;
}
int parse_json_null(char *data, size_t data_size)
{
return 0; // TODO:
}
int parse_json_bool(bool *result, char *data, size_t data_size)
{
return 0; // TODO:
}
void printf_json_array(struct json_array *array)
{
printf("[");
for (int i = 0; i < array->count; i++) {
printf_json_value(array->values[i]);
if (i < array->count-1) printf(",");
}
printf("]");
}
void printf_json_object(struct json_object *object)
{
printf("{");
for (int i = 0; i < object->count; i++) {
printf_json_string(object->keys[i]);
printf(":");
printf_json_value(object->values[i]);
if (i < object->count-1) printf(",");
}
printf("}");
}
void printf_json_string(struct json_string *string)
{
// TODO: Support to display escaped sequences
printf("\"%.*s\"", (int)string->size, string->str);
}
void printf_json_value(struct json_value *value)
{
switch(value->type) {
case JSON_TYPE_NULL:
printf("null");
break;
case JSON_TYPE_OBJECT:
printf_json_object(value->object);
break;
case JSON_TYPE_ARRAY:
printf_json_array(value->array);
break;
case JSON_TYPE_STRING:
printf_json_string(value->string);
break;
case JSON_TYPE_NUMBER:
printf("%.16f", value->number);
break;
case JSON_TYPE_BOOLEAN:
printf("%s", value->boolean ? "true" : "false");
break;
}
}

58
src/json_parser.h Normal file
View File

@ -0,0 +1,58 @@
#include <stdbool.h>
#include <stddef.h>
typedef double f64;
enum json_type {
JSON_TYPE_NULL,
JSON_TYPE_OBJECT,
JSON_TYPE_ARRAY,
JSON_TYPE_STRING,
JSON_TYPE_NUMBER,
JSON_TYPE_BOOLEAN,
};
struct json_string {
char *str;
size_t size;
};
struct json_array {
struct json_value **values;
size_t count;
};
struct json_object {
struct json_string **keys;
struct json_value **values;
size_t count;
};
struct json_value {
enum json_type type;
union {
bool boolean;
f64 number;
struct json_string *string;
struct json_array *array;
struct json_object *object;
};
};
void free_json_value(struct json_value *value);
void free_json_array(struct json_array *array);
void free_json_string(struct json_string *string);
void free_json_object(struct json_object *object);
int parse_json_null(char *data, size_t data_size);
int parse_json_bool(bool *result, char *data, size_t data_size);
int parse_json_number(f64 *result, char *data, size_t data_size);
int parse_json_value(struct json_value *result, char *data, size_t data_size);
int parse_json_object(struct json_object *result, char *data, size_t data_size);
int parse_json_array(struct json_array *result, char *data, size_t data_size);
int parse_json_string(struct json_string *result, char *data, size_t data_size);
void printf_json_value(struct json_value *result);
void printf_json_string(struct json_string *string);
void printf_json_object(struct json_object *object);
void printf_json_array(struct json_array *array);

53
src/main.c Normal file
View File

@ -0,0 +1,53 @@
#include <stdio.h>
#include <inttypes.h>
#include <inttypes.h>
#include "json_parser.c"
void print_usage(char *program)
{
printf("Usage: %s <json-file> [test-bin-file]\n", program);
}
size_t get_file_size(FILE *f)
{
fseek(f, 0, SEEK_END);
size_t size = ftell(f);
fseek(f, 0, SEEK_SET);
return size;
}
int main(int argc, char **argv)
{
if (argc < 2) {
print_usage(argv[0]);
return -1;
}
char *json_filename = argv[1];
FILE *f = fopen(json_filename, "r");
if (f == NULL) {
printf("Failed to open: %s\n", json_filename);
return -1;
}
size_t json_size = get_file_size(f);
char json_data[json_size];
size_t bytes_read = fread(json_data, 1, json_size, f);
if (bytes_read != json_size) {
printf("Failed to read all contents of file\n");
return -1;
}
fclose(f);
struct json_value *parsed = malloc(sizeof(struct json_value));
int bytes_parsed = parse_json_value(parsed, json_data, json_size);
printf_json_value(parsed);
printf("\n");
free_json_value(parsed);
return 0;
}