From 25730681a9fe333057709071dba2c2483999eda7 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Thu, 3 Oct 2024 19:20:11 +0300 Subject: [PATCH] first draft, parser working --- README.md | 11 +++ src/arena.c | 80 +++++++++++++++++++++ src/byte_slice.c | 116 ++++++++++++++++++++++++++++++ src/client.c | 121 +++++++++++++++++++++++++++++++ src/log.c | 8 +++ src/main.c | 36 +++++++++- src/parser.c | 149 ++++++++++++++++++++++++++++++++++++++ src/request.c | 111 ++++++++++++++++++++++++++++ src/server.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++ src/utils.c | 14 ++++ 10 files changed, 829 insertions(+), 1 deletion(-) create mode 100644 README.md create mode 100644 src/arena.c create mode 100644 src/byte_slice.c create mode 100644 src/client.c create mode 100644 src/log.c create mode 100644 src/parser.c create mode 100644 src/request.c create mode 100644 src/server.c create mode 100644 src/utils.c diff --git a/README.md b/README.md new file mode 100644 index 0000000..4629ad5 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# HTTP server in C + +Run program: +``` +zig build run +``` + +## Resources + +* [Hypertext Transfer Protocol -- HTTP/1.1](https://www.rfc-editor.org/rfc/rfc2616) +* [HTTP Working Group](https://httpwg.org/) diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 0000000..be471f0 --- /dev/null +++ b/src/arena.c @@ -0,0 +1,80 @@ +#ifndef ARENA_ +#define ARENA_ + +#include +#include +#include + +#define ARENA_BLOCK_SIZE 256 + +#include "byte_slice.c" + +struct arena { + uint8_t *ptr; + size_t len; + size_t capacity; +}; + +int arena_init(struct arena *arena) { + arena->ptr = malloc(ARENA_BLOCK_SIZE); + if (!arena->ptr) { + return -1; + } + + arena->len = 0; + arena->capacity = ARENA_BLOCK_SIZE; + return 0; +} + +void arena_deinit(struct arena *arena) { + free(arena->ptr); + arena->ptr = NULL; +} + +static uint64_t div_round_up(uint64_t a, uint64_t b) { + return a % b == 0 ? a / b * b : (a / b + 1) * b; +} + +void *arena_alloc(struct arena *arena, size_t size) { + if (arena->ptr == NULL && arena_init(arena)) { + return NULL; + } + + if (arena->len + size > arena->capacity) { + size_t new_capacity = div_round_up(arena->len + size, ARENA_BLOCK_SIZE); + void *new_ptr = realloc(arena->ptr, new_capacity); + if (!new_ptr) { + return NULL; + } + arena->ptr = new_ptr; + arena->capacity = new_capacity; + } + + void *result = (void*)&arena->ptr[arena->len]; + arena->len += size; + return result; +} + +void *arena_dupe(struct arena *arena, void *ptr, size_t len) { + void *new_ptr = arena_alloc(arena, len); + if (!new_ptr) { + return NULL; + } + + if (ptr) { + memcpy(new_ptr, ptr, len); + } + + return new_ptr; +} + +struct byte_slice arena_dupe_slice(struct arena *arena, struct byte_slice slice) { + void *new_ptr = arena_dupe(arena, slice.ptr, slice.len); + if (!new_ptr) { + return byte_slice_init_zero(); + } + + return byte_slice_init(new_ptr, slice.len); +} + +#endif //ARENA_ diff --git a/src/byte_slice.c b/src/byte_slice.c new file mode 100644 index 0000000..8af377d --- /dev/null +++ b/src/byte_slice.c @@ -0,0 +1,116 @@ +#ifndef BYTE_SLICE_ +#define BYTE_SLICE_ + +#include +#include +#include +#include + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +struct byte_slice { + uint8_t *ptr; + size_t len; +}; + +struct byte_slice byte_slice_init(uint8_t *ptr, size_t len) { + return (struct byte_slice){ .ptr = ptr, .len = len }; +} + +struct byte_slice byte_slice_init_zero() { + return (struct byte_slice){ 0 }; +} + +struct byte_slice byte_slice_init_str(char *str) { + return byte_slice_init((uint8_t*)str, str ? strlen(str) : 0); +} + +void byte_slice_move_left(struct byte_slice *slice, size_t amount) { + amount = MIN(amount, slice->len); + memmove(slice->ptr, slice->ptr + amount, slice->len - amount); + + slice->len -= amount; +} + +int byte_slice_find(struct byte_slice slice, uint8_t needle) { + for (size_t i = 0; i < slice.len; i++) { + if (slice.ptr[i] == needle) { + return i; + } + } + return -1; +} + +int byte_slice_split_once(struct byte_slice slice, uint8_t separator, struct byte_slice *left, struct byte_slice *right) { + int index = byte_slice_find(slice, separator); + if (index < 0) { + return -1; + } + + *left = byte_slice_init(slice.ptr, index); + *right = byte_slice_init(slice.ptr + index + 1, slice.len - index - 1); + + return 0; +} + +bool byte_slice_eql(struct byte_slice a, struct byte_slice b) { + if (a.len != b.len) { + return false; + } + for (size_t i = 0; i < a.len; i++) { + if (a.ptr[i] != b.ptr[i]) { + return false; + } + } + + return true; +} + +bool byte_slice_eql_str(struct byte_slice slice, const char *str) { + // yes, the 'const' is cast away, but that is fine. + // Because I know that the string won't be modified. + struct byte_slice str_slice = byte_slice_init_str((char *)str); + return byte_slice_eql(slice, str_slice); +} + +bool byte_slice_is_number(struct byte_slice slice) { + for (size_t i = 0; i < slice.len; i++) { + if (!('0' <= slice.ptr[i] && slice.ptr[i] <= '9')) { + return false; + } + } + return true; +} + +void byte_slice_trim_left(struct byte_slice *slice, struct byte_slice to_trim) { + size_t left = 0; + while (byte_slice_find(to_trim, slice->ptr[left]) != -1 && left < slice->len) { + left++; + } + + slice->ptr += left; + slice->len -= left; +} + +void byte_slice_trim_right(struct byte_slice *slice, struct byte_slice to_trim) { + size_t right = 0; + while (byte_slice_find(to_trim, slice->ptr[slice->len - right - 1]) != -1 && right < slice->len) { + right++; + } + + slice->len -= right; +} + +void byte_slice_trim(struct byte_slice *slice, struct byte_slice to_trim) { + byte_slice_trim_right(slice, to_trim); + byte_slice_trim_left(slice, to_trim); +} + +void byte_slice_trim_str(struct byte_slice *slice, const char *str) { + // yes, the 'const' is cast away, but that is fine. + // Because I know that the string won't be modified. + struct byte_slice str_slice = byte_slice_init_str((char *)str); + byte_slice_trim(slice, str_slice); +} + +#endif //BYTE_SLICE_ diff --git a/src/client.c b/src/client.c new file mode 100644 index 0000000..ba877c6 --- /dev/null +++ b/src/client.c @@ -0,0 +1,121 @@ +#ifndef CLIENT_ +#define CLIENT_ + +#include +#include +#include + +#include "parser.c" +#include "request.c" +#include "log.c" + +#define CLIENT_READ_BUFFER_SIZE 4096 + +enum parser_state { + PARSER_STATE_REQUEST_LINE, + PARSER_STATE_HEADERS, + PARSER_STATE_BODY, + PARSER_STATE_DONE, +}; + +struct http_client { + int fd; + + struct sockaddr_storage addr; + socklen_t addr_len; + + uint8_t read_bufer[CLIENT_READ_BUFFER_SIZE]; + size_t read_bufer_len; + + enum parser_state parse_state; + struct http_request parse_request; +}; + +int http_client_recv(struct http_client *client) { + int result = recv( + client->fd, + client->read_bufer + client->read_bufer_len, + CLIENT_READ_BUFFER_SIZE - client->read_bufer_len, + MSG_DONTWAIT + ); + + if (result == 0) { + // TODO: Mark connection for closing + } else if (result < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // TODO: Ignore error + } else if (errno == ECONNRESET) { + // TODO: Mark connection for closing + } + } else { + struct http_request *request = &client->parse_request; + + client->read_bufer_len += result; + log_debug("Received %d bytes", result); + + struct byte_slice buffer = byte_slice_init(client->read_bufer, client->read_bufer_len); + struct byte_slice line = { 0 }; + while (buffer.len > 0) { + if (client->parse_state == PARSER_STATE_REQUEST_LINE) { + if (find_next_line(buffer, &line)) { + break; + } + + log_debug("request-line: %.*s", line.len, (char*)line.ptr); + + if (parse_request_line(line, &request->method, &request->uri, &request->version)) { + remove_line(&buffer, line); + break; + } + + log_debug("- method: '%.*s'", request->method.len, (char*)request->method.ptr); + log_debug("- uri: '%.*s'", request->uri.len, (char*)request->uri.ptr); + log_debug("- version: 'HTTP/%d.%d'", request->version.major, request->version.minor); + + remove_line(&buffer, line); + client->parse_state = PARSER_STATE_HEADERS; + + } else if (client->parse_state == PARSER_STATE_HEADERS) { + if (find_next_line(buffer, &line)) { + break; + } + + if (line.len == 0) { + client->parse_state = PARSER_STATE_BODY; + remove_line(&buffer, line); + continue; + } + + struct http_header header = { 0 }; + if (parse_header_line(line, &header)) { + remove_line(&buffer, line); + break; + } + + log_debug("header: %.*s: %.*s", header.name.len, (char*)header.name.ptr, header.value.len, (char*)header.value.ptr); + + if (http_request_append_header(request, header)) { + remove_line(&buffer, line); + break; + } + + remove_line(&buffer, line); + + } else if (client->parse_state == PARSER_STATE_BODY) { + + + log_debug("data: %.*s", buffer.len, (char*)buffer.ptr); + buffer.len = 0; + client->parse_state = PARSER_STATE_DONE; + break; + } + } + + client->read_bufer_len = buffer.len; + log_debug("Left over %d bytes", client->read_bufer_len); + } + + return 0; +} + +#endif //CLIENT_ diff --git a/src/log.c b/src/log.c new file mode 100644 index 0000000..a50bb7a --- /dev/null +++ b/src/log.c @@ -0,0 +1,8 @@ +#ifndef LOG_ +#define LOG_ + +#include + +#define log_debug(fmt, ...) printf("DEBUG: " fmt "\n", ##__VA_ARGS__) + +#endif //LOG_ diff --git a/src/main.c b/src/main.c index ef66efb..5f28cc6 100644 --- a/src/main.c +++ b/src/main.c @@ -1,6 +1,40 @@ +#include #include +#include +#include +#include +#include +#include + +#include "server.c" int main() { - printf("Hello, World!\n"); + uint16_t server_port = 8080; + + struct http_server server = { 0 }; + http_server_init(&server); + + if (http_server_listen(&server, server_port)) { + printf("Failed to listen on port %d\n", server_port); + return -1; + } + + printf("Listening on port %d\n", server_port); + + while (true) { + struct http_request request = { 0 }; + enum poll_result result = http_server_poll_request(&server, &request, -1); + if (result == POLL_RESULT_TIMEOUT) { + log_debug("Poll timeout\n"); + continue; + } else if (result == POLL_RESULT_TIMEOUT) { + log_debug("Poll failed\n"); + break; + } + + printf("Got request\n"); + } + + http_server_deinit(&server); return 0; } diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..1a0188b --- /dev/null +++ b/src/parser.c @@ -0,0 +1,149 @@ +#ifndef PARSER_ +#define PARSER_ + +#include + +#include "byte_slice.c" +#include "request.c" +#include "utils.c" + +int find_next_line(struct byte_slice buffer, struct byte_slice *line) { + int newline_pos = byte_slice_find(buffer, '\n'); + if (newline_pos == -1) { + return -1; + } + + *line = byte_slice_init(buffer.ptr, newline_pos); + if (line->len > 0 && line->ptr[line->len-1] == '\r') { + line->len--; + } + + return 0; +} + +void remove_line(struct byte_slice *buffer, struct byte_slice line) { + if (buffer->ptr[line.len] == '\r') { + byte_slice_move_left(buffer, line.len + 2); + } else { + byte_slice_move_left(buffer, line.len + 1); + } +} + +bool is_visible_char(uint8_t c) { + // Constants '0x21' and '0x7E' from https://www.rfc-editor.org/rfc/rfc5234.html#appendix-B.1 + return 0x21 <= c && c <= 0x7E; +} + +bool is_obs_text(uint8_t c) { + // Constants '0x80' and '0xFF' from https://httpwg.org/specs/rfc9110.html#fields.values + return 0x80 <= c && c <= 0xFF; +} + +bool is_token_char(uint8_t c) { + const char allowed_symbols[] = "!#$%&'*+-.^_`|~"; + return is_digit(c) || is_alpha(c) || memchr(allowed_symbols, c, sizeof(allowed_symbols)-1); +} + +bool is_token(struct byte_slice slice) { + if (slice.len == 0) { + return false; + } + + for (size_t i = 0; i < slice.len; i++) { + if (!is_token_char(slice.ptr[i])) { + return false; + } + } + + return true; +} + +bool is_header_value(struct byte_slice slice) { + for (size_t i = 0; i < slice.len; i++) { + uint8_t c = slice.ptr[i]; + if (i > 0 && i < slice.len-1) { + if (c == '\t' || c == ' ') { + continue; + } + } + + bool field_vchar = is_visible_char(c) || is_obs_text(c); + if (!field_vchar) { + return false; + } + } + + return true; +} + +int parse_http_version(struct http_version *version, struct byte_slice version_slice) { + struct byte_slice http_constant = { 0 }; + struct byte_slice major_minor = { 0 }; + if (byte_slice_split_once(version_slice, '/', &http_constant, &major_minor)) { + return -1; + } + + if (!byte_slice_eql_str(http_constant, "HTTP")) { + return -1; + } + + struct byte_slice major = { 0 }; + struct byte_slice minor = { 0 }; + if (byte_slice_split_once(major_minor, '.', &major, &minor)) { + return -1; + } + + if (major.len != 1 || !is_digit(major.ptr[0])) { + return -1; + } + if (minor.len != 1 || !is_digit(minor.ptr[0])) { + return -1; + } + + version->major = major.ptr[0] - '0'; + version->minor = minor.ptr[0] - '0'; + + return 0; +} + +int parse_request_line(struct byte_slice line, struct byte_slice *method, struct byte_slice *uri, struct http_version *version) { + // TODO: Ignore whitespace at start and end of line + + // TODO: Split by whitespace, allow multiple whitespace characters in a row + struct byte_slice uri_version = { 0 }; + if (byte_slice_split_once(line, ' ', method, &uri_version)) { + return -1; + } + + // TODO: Split by whitespace, allow multiple whitespace characters in a row + struct byte_slice version_slice = { 0 }; + if (byte_slice_split_once(uri_version, ' ', uri, &version_slice)) { + return -1; + } + + if (parse_http_version(version, version_slice)) { + return -1; + } + + return 0; +} + +int parse_header_line(struct byte_slice line, struct http_header *header) { + if (byte_slice_split_once(line, ':', &header->name, &header->value)) { + return -1; + } + + if (!is_token(header->name)) { + return -1; + } + + byte_slice_trim_str(&header->value, " \t"); + + if (!is_header_value(header->value)) { + return -1; + } + + return 0; +} + +#endif //PARSER_ diff --git a/src/request.c b/src/request.c new file mode 100644 index 0000000..7b223c5 --- /dev/null +++ b/src/request.c @@ -0,0 +1,111 @@ +#ifndef REQUEST_ +#define REQUEST_ + +#include +#include + +#include "byte_slice.c" +#include "arena.c" +#include "utils.c" + +struct http_version { + uint8_t major; + uint8_t minor; +}; + +struct http_header { + struct byte_slice name; + struct byte_slice value; +}; + +struct http_request { + struct arena arena; + + // Request line: + struct byte_slice method; + struct byte_slice uri; + struct http_version version; + + // Headers: + struct http_header *headers; + size_t headers_len; + size_t headers_capacity; + + // Body: + struct byte_slice body; +}; + +int http_request_append_header(struct http_request *request, struct http_header header) { + if (request->headers_len >= request->headers_capacity) { + size_t new_capacity = request->headers_capacity * 1.5; + if (new_capacity == 0) { + new_capacity = 16; + } + + struct http_header *new_headers = arena_alloc(&request->arena, new_capacity * sizeof(struct http_header)); + if (!new_headers) { + return -1; + } + + if (request->headers) { + memcpy(new_headers, request->headers, request->headers_capacity * sizeof(struct http_header)); + } + + request->headers = new_headers; + request->headers_capacity = new_capacity; + } + + struct byte_slice name = arena_dupe_slice(&request->arena, header.name); + if (name.len == 0) { + return -1; + } + + struct byte_slice value = arena_dupe_slice(&request->arena, header.value); + if (value.len == 0) { + return -1; + } + + request->headers[request->headers_len] = (struct http_header){ + .name = name, + .value = value, + }; + request->headers_len++; + + return 0; +} + +static bool case_insensitive_eql(struct byte_slice a, struct byte_slice b) { + if (a.len != b.len) { + return false; + } + for (size_t i = 0; i < a.len; i++) { + uint8_t a_char = a.ptr[i]; + uint8_t b_char = b.ptr[i]; + + if (is_alpha(a_char)) { + a_char = toupper(a_char); + } + + if (is_alpha(b_char)) { + b_char = toupper(b_char); + } + + if (a_char != b_char) { + return false; + } + } + + return true; +} + +struct byte_slice *http_request_find_header(struct http_request *request, struct byte_slice name) { + for (size_t i = 0; i < request->headers_len; i++) { + if (case_insensitive_eql(request->headers[i].name, name)) { + return &request->headers[i].value; + } + } + + return NULL; +} + +#endif //REQUEST_ diff --git a/src/server.c b/src/server.c new file mode 100644 index 0000000..45bfec9 --- /dev/null +++ b/src/server.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include + +#include "client.c" +#include "log.c" + +enum poll_result { + POLL_RESULT_REQUEST, + POLL_RESULT_TIMEOUT, + POLL_RESULT_ERROR, +}; + +struct http_server { + int fd; + + struct http_version version; + + struct http_client *clients; + size_t clients_len; + size_t clients_capacity; +}; + +int http_server_init(struct http_server *server) { + memset(server, 0, sizeof(*server)); + + server->clients_capacity = 10; + server->clients = calloc(server->clients_capacity, sizeof(struct http_client)); + if (!server->clients) { + return -1; + } + + server->version.major = 1; + server->version.minor = 1; + + return 0; +} + +void http_server_deinit(struct http_server *server) { + if (!server) { + return; + } + + free(server->clients); + server->clients = NULL; + + if (server->fd) { + close(server->fd); + server->fd = 0; + } +} + +int http_server_listen(struct http_server *server, uint16_t port) { + int server_fd = socket(AF_INET, SOCK_STREAM, 0); + if (server_fd <= 0) { + goto err; + } + + int on = 1; + setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + + struct sockaddr_in server_addr = { 0 }; + server_addr.sin_family = AF_INET; + server_addr.sin_addr.s_addr = htonl(INADDR_ANY); + server_addr.sin_port = htons(port); + + if (bind(server_fd, (struct sockaddr*)&server_addr, sizeof(server_addr)) == -1) { + perror("bind"); + goto err; + } + + if (listen(server_fd, 5) == -1) { + perror("listen"); + goto err; + } + + server->fd = server_fd; + + return 0; +err: + close(server_fd); + return -1; +} + +struct http_client *http_server_get_unused_client(struct http_server *server) { + if (server->clients_len >= server->clients_capacity) { + return NULL; + } + + struct http_client *client = &server->clients[server->clients_len++]; + memset(client, 0, sizeof(*client)); + return client; +} + +enum poll_result http_server_poll_request(struct http_server *server, struct http_request *result_request, int timeout) { + enum poll_result result = POLL_RESULT_ERROR; + + if (server->fd == 0) { + goto end; + } + + struct pollfd *pollfds = NULL; + while (true) { + for (size_t i = 0; i < server->clients_len; i++) { + struct http_client *client = &server->clients[i]; + if (client->parse_state == PARSER_STATE_DONE) { + result = POLL_RESULT_REQUEST; + *result_request = client->parse_request; + return 0; + } + } + + pollfds = calloc(1 + server->clients_capacity, sizeof(struct pollfd)); + if (!pollfds) { + goto end; + } + + size_t fds_count = 1 + server->clients_len; + memset(pollfds, 0, sizeof(struct pollfd) * fds_count); + + pollfds[0].fd = server->fd; + pollfds[0].events = POLLIN; + for (size_t i = 0; i < server->clients_len; i++) { + struct http_client *client = &server->clients[i]; + struct pollfd *pollfd = &pollfds[i+1]; + + pollfd->fd = server->clients[i].fd; + if (client->read_bufer_len < CLIENT_READ_BUFFER_SIZE) { + pollfd->events = POLLIN; + } + } + + int poll_result = poll(pollfds, fds_count, timeout); + if (poll_result == 0) { + log_debug("timeout"); + result = POLL_RESULT_TIMEOUT; + goto end; + } + if (poll_result < 0) { + perror("poll"); + goto end; + } + + struct pollfd *server_pollfd = &pollfds[0]; + if (server_pollfd->revents & POLLIN) { + struct sockaddr_storage client_addr = { 0 }; + socklen_t client_addr_len = sizeof(client_addr); + + int client_fd = accept(server->fd, (struct sockaddr *)&client_addr, &client_addr_len); + if (client_fd == -1) { + perror("accept"); + } else { + log_debug("Accepted connection"); + struct http_client *client = http_server_get_unused_client(server); + assert(client != NULL); + client->fd = client_fd; + client->addr = client_addr; + client->addr_len = client_addr_len; + } + } + + for (size_t i = 1; i < fds_count; i++) { + struct pollfd *pollfd = &pollfds[i]; + + struct http_client *client = &server->clients[i - 1]; + if (pollfd->revents & POLLIN) { + http_client_recv(client); + } + + if (pollfd->revents & (POLLHUP | POLLERR | POLLERR)) { + log_debug("Closing connection"); + } + } + + free(pollfds); + pollfds = NULL; + } + +end: + free(pollfds); + return result; +} diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..015699e --- /dev/null +++ b/src/utils.c @@ -0,0 +1,14 @@ +#ifndef UTILS_ +#define UTILS_ + +#include + +bool is_digit(char c) { + return '0' <= c && c <= '9'; +} + +bool is_alpha(char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); +} + +#endif //UTILS_