diff --git a/parser_test/CMakeLists.txt b/parser_test/CMakeLists.txt index 554b37c27..4c6024bed 100644 --- a/parser_test/CMakeLists.txt +++ b/parser_test/CMakeLists.txt @@ -6,7 +6,9 @@ set(TEST_TARGETS mark_tests formula_inline_tests formula_block_tests - code_tests) + code_tests + table_tests + link_tests) foreach(TARGET ${TEST_TARGETS}) add_executable(${TARGET} ${TARGET}.c test_utils.c) diff --git a/parser_test/link_tests.c b/parser_test/link_tests.c new file mode 100644 index 000000000..825e6bb85 --- /dev/null +++ b/parser_test/link_tests.c @@ -0,0 +1,127 @@ +#include +#include "test_utils.h" + +int test_link_basic() { + return test_xml( + "This is a [link](http://example.com)", + "\n" + "\n" + "\n" + " \n" + " This is a \n" + " \n" + " link\n" + " \n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_link_with_title() { + return test_xml( + "Here's a [link](http://example.com \"Example Site\")", + "\n" + "\n" + "\n" + " \n" + " Here's a \n" + " \n" + " link\n" + " \n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_reference_link() { + return test_xml( + "This is a [reference link][1]\n\n[1]: http://example.com", + "\n" + "\n" + "\n" + " \n" + " This is a \n" + " \n" + " reference link\n" + " \n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_link_with_escaped_brackets() { + return test_xml( + "This link has \\[escaped brackets\\]: [link](http://example.com)", + "\n" + "\n" + "\n" + " \n" + " This link has [escaped brackets]: \n" + " \n" + " link\n" + " \n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_link_invalid() { + return test_xml( + "This is not a [link(http://example.com)", + "\n" + "\n" + "\n" + " \n" + " This is not a [link(http://example.com)\n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_reference_definitions_only() { + return test_xml( + "[ref]: http://example.com\n" + "[other]: http://example.org", + "\n" + "\n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_reference_definitions_with_text() { + return test_xml( + "[ref]: http://example.com\n\n" + "This is a [ref] link\n\n" + "[other]: http://example.org\n\n" + "And [other] link", + "\n" + "\n" + "\n" + " \n" + " This is a \n" + " \n" + " ref\n" + " \n" + " link\n" + " \n" + " \n" + " And \n" + " \n" + " other\n" + " \n" + " link\n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int main() { + CASE(test_link_basic); + CASE(test_link_with_title); + CASE(test_reference_link); + CASE(test_link_with_escaped_brackets); + CASE(test_link_invalid); + CASE(test_reference_definitions_only); + CASE(test_reference_definitions_with_text); + return 0; +} diff --git a/parser_test/table_tests.c b/parser_test/table_tests.c new file mode 100644 index 000000000..aea2f36d5 --- /dev/null +++ b/parser_test/table_tests.c @@ -0,0 +1,288 @@ +#include +#include "test_utils.h" + +int test_table_basic() { + return test_xml( + "the text before the table\n\n" + "| foo | bar | zoo |\n" + "| :--- | :---: | ---: |\n" + "| baz | bim | xyz |\n" + "the text after the table", + "\n" + "\n" + "\n" + " \n" + " the text before the table\n" + " \n" + " \n" + " \n" + " \n" + " foo\n" + " \n" + " \n" + " bar\n" + " \n" + " \n" + " zoo\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " baz\n" + " \n" + " \n" + " bim\n" + " \n" + " \n" + " xyz\n" + " \n" + " \n" + "
\n" + " \n" + " the text after the table\n" + " \n" + "
\n", + CMARK_OPT_SOURCEPOS); +} + +int test_table_alignments() { + return test_xml( + "| left | center | right |\n" + "|:-----|:------:|------:|\n" + "| a | b | c |", + "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " left\n" + " \n" + " \n" + " center\n" + " \n" + " \n" + " right\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " a\n" + " \n" + " \n" + " b\n" + " \n" + " \n" + " c\n" + " \n" + " \n" + "
\n" + "
\n", + CMARK_OPT_DEFAULT); +} + +int test_table_empty_cells() { + return test_xml( + "| a | b |\n" + "| --- | --- |\n" + "| | d |", + "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " a\n" + " \n" + " \n" + " b\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " d\n" + " \n" + " \n" + "
\n" + "
\n", + CMARK_OPT_DEFAULT); +} + +int test_table_escaped_pipes() { + return test_xml( + "| a \\| b | c |\n" + "| --- | --- |\n" + "| d | e \\| f |", + "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " a | b\n" + " \n" + " \n" + " c\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " d\n" + " \n" + " \n" + " e | f\n" + " \n" + " \n" + "
\n" + "
\n", + CMARK_OPT_DEFAULT); +} + +int test_table_invalid_no_header() { + return test_xml( + "| a | b |\n" + "| d | e |", + "\n" + "\n" + "\n" + " \n" + " | a | b |\n" + " \n" + " | d | e |\n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_table_invalid_delimiter() { + return test_xml( + "| a | b |\n" + "| -- | --- - |\n" + "| c | d |", + "\n" + "\n" + "\n" + " \n" + " | a | b |\n" + " \n" + " | -- | --- - |\n" + " \n" + " | c | d |\n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_table_no_leading_pipe() { + return test_xml( + "foo | bar |\n" + "--- | --- |\n" + "baz | bim |", + "\n" + "\n" + "\n" + " \n" + " foo | bar |\n" + " \n" + " --- | --- |\n" + " \n" + " baz | bim |\n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_table_mismatched_columns() { + return test_xml( + "| a | b | c |\n" + "| --- | --- |\n" + "| d | e |", + "\n" + "\n" + "\n" + " \n" + " | a | b | c |\n" + " \n" + " | --- | --- |\n" + " \n" + " | d | e |\n" + " \n" + "\n", + CMARK_OPT_DEFAULT); +} + +int test_table_with_inline_markdown() { + return test_xml( + "| *em* | **strong** |\n" + "| --- | --- |\n" + "| `code` | [link](url) |", + "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " em\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " strong\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " code\n" + " \n" + " \n" + " \n" + " \n" + " link\n" + " \n" + " \n" + " \n" + "
\n" + "
\n", + CMARK_OPT_SOURCEPOS); +} + +int main() { + CASE(test_table_basic); + CASE(test_table_alignments); + CASE(test_table_empty_cells); + CASE(test_table_escaped_pipes); + CASE(test_table_invalid_no_header); + CASE(test_table_invalid_delimiter); + CASE(test_table_no_leading_pipe); + CASE(test_table_mismatched_columns); + CASE(test_table_with_inline_markdown); + return 0; +} diff --git a/src/blocks.c b/src/blocks.c index 6ac3ff14a..15690c1ce 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -23,6 +23,8 @@ #include "buffer.h" #include "chunk.h" +#define UNUSED(x) (void)(x) + #define CODE_INDENT 4 #define TAB_STOP 4 @@ -165,7 +167,9 @@ static inline bool can_contain(cmark_node_type parent_type, return (parent_type == CMARK_NODE_DOCUMENT || parent_type == CMARK_NODE_BLOCK_QUOTE || parent_type == CMARK_NODE_ITEM || - (parent_type == CMARK_NODE_LIST && child_type == CMARK_NODE_ITEM)); + (parent_type == CMARK_NODE_LIST && child_type == CMARK_NODE_ITEM) || + (parent_type == CMARK_NODE_TABLE && child_type == CMARK_NODE_TABLE_ROW) || + (parent_type == CMARK_NODE_TABLE_ROW && child_type == CMARK_NODE_TABLE_CELL)); } static inline bool accepts_lines(cmark_node_type block_type) { @@ -177,7 +181,8 @@ static inline bool accepts_lines(cmark_node_type block_type) { static inline bool contains_inlines(cmark_node_type block_type) { return (block_type == CMARK_NODE_PARAGRAPH || - block_type == CMARK_NODE_HEADING); + block_type == CMARK_NODE_HEADING || + block_type == CMARK_NODE_TABLE_CELL); } static void add_line(cmark_chunk *ch, cmark_parser *parser) { @@ -417,6 +422,10 @@ static void process_inlines(cmark_mem *mem, cmark_node *root, cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { if (contains_inlines(S_type(cur))) { + // For table delimiter cell, do not parse inlines. + if (S_type(cur) == CMARK_NODE_TABLE_CELL && cur->as.table_cell.is_delimiter) { + continue; + } cmark_parse_inlines(mem, cur, refmap, options); mem->free(cur->data); cur->data = NULL; @@ -866,6 +875,146 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input, return res; } +typedef struct { + cmark_parser *parser; + cmark_node *container; + const unsigned char *data; + + // For delimiter row check. + // [S, C, D, C, S] for space/colon/dash. + bool delimiter_row_chars_met[5]; + + bufsize_t cell_start_offset; +} table_row_scan_context; + +// @on_char will be called for each char consumed, except the escaped one. +// Returns columns count on matched. Otherwise, returns 0. +static int scan_table_row_helper(const unsigned char *data, bufsize_t len, + table_row_scan_context *context, + bool (*on_char)(table_row_scan_context *, bufsize_t, int, char)) { + bufsize_t offset = 0; + + // Unlike GFM, the first and trailing '|' is necessary. + if (offset >= len || data[offset] != '|') { + return 0; + } + + ++offset; + if (context) { + context->cell_start_offset = offset; + } + + bool is_escaped = false; + bool nonspace_after_last_delimiter = false; + bool has_valid_delimiter = false; + int cols = 0; + while (offset < len) { + const char c = data[offset]; + if (c == '\n' || c == '\r') { + break; + } + if (c == '\\' && !is_escaped) { + nonspace_after_last_delimiter = true; + is_escaped = true; + ++offset; + continue; + } + + if (is_escaped) { + is_escaped = false; + ++offset; + continue; + } + + if (on_char && !on_char(context, offset, cols, c)) { + return 0; + } + + if (c == '|') { + ++cols; + has_valid_delimiter = true; + nonspace_after_last_delimiter = false; + } else if (!S_is_space_or_tab(c)) { + nonspace_after_last_delimiter = true; + } + ++offset; + } + + if (nonspace_after_last_delimiter || !has_valid_delimiter) { + return 0; + } else { + return cols; + } +} + +static int scan_table_header(const unsigned char *data, bufsize_t len) { + return scan_table_row_helper(data, len, NULL, NULL); +} + +static int scan_table_row(const unsigned char *data, bufsize_t len) { + return scan_table_row_helper(data, len, NULL, NULL); +} + +static bool scan_table_delimiter_row_scan_row_helper(table_row_scan_context *context, + bufsize_t offset, int cols, char c) { + UNUSED(offset); + UNUSED(cols); + switch (c) { + case '-': + if (context->delimiter_row_chars_met[2] && + (context->delimiter_row_chars_met[3] || context->delimiter_row_chars_met[4])) { + return false; + } + context->delimiter_row_chars_met[2] = true; + break; + case ':': + if (context->delimiter_row_chars_met[4] || context->delimiter_row_chars_met[3]) { + return false; + } else if (context->delimiter_row_chars_met[2]) { + context->delimiter_row_chars_met[3] = true; + } else if (context->delimiter_row_chars_met[1]) { + return false; + } else { + context->delimiter_row_chars_met[1] = true; + } + break; + case '|': + if (!context->delimiter_row_chars_met[2]) { + return false; + } + memset(context->delimiter_row_chars_met, 0, sizeof(context->delimiter_row_chars_met)); + break; + default: + if (S_is_space_or_tab(c)) { + if (context->delimiter_row_chars_met[4] || + context->delimiter_row_chars_met[3] || + context->delimiter_row_chars_met[2]) { + context->delimiter_row_chars_met[4] = true; + } else if (context->delimiter_row_chars_met[1]) { + return false; + } else { + context->delimiter_row_chars_met[0] = true; + } + } else { + return false; + } + } + return true; +} + +// Returns columns count on matched. Otherwise, returns 0. +static int scan_table_delimiter_row(const unsigned char *data, bufsize_t len) { + table_row_scan_context context; + memset(&context, 0, sizeof(context)); + int ret = scan_table_row_helper(data, len, &context, + scan_table_delimiter_row_scan_row_helper); + if (ret > 0) { + return ret; + } else { + return 0; + } +} + static bool parse_formula_block_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container, bool *should_continue) { bool res = false; @@ -996,6 +1145,15 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, if (parser->blank) goto done; break; + case CMARK_NODE_TABLE: + if (!scan_table_row(input->data + parser->first_nonspace, input->len - parser->first_nonspace)) + goto done; + break; + case CMARK_NODE_TABLE_ROW: + // Fallthrough. + case CMARK_NODE_TABLE_CELL: + // A table row and cell can't contain more than one line. + goto done; default: break; } @@ -1015,6 +1173,196 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, return container; } +static bool parse_table_row_cells_scan_row_helper(table_row_scan_context *context, + bufsize_t offset, int cols, char c) { + UNUSED(offset); + if (c == '|') { + cmark_node *cell_node = add_child(context->parser, context->container, + CMARK_NODE_TABLE_CELL, + context->container->start_column + context->cell_start_offset); + cell_node->start_line = cell_node->end_line = context->container->start_line; + // Do not include the pipes. + cell_node->end_column = context->container->start_column + offset - 1; + + const unsigned char *cell_content = context->data + context->cell_start_offset; + unsigned char *mutable_data = (unsigned char *)context->data; + mutable_data[offset] = '\0'; + cmark_node_set_literal(cell_node, (char *)cell_content); + mutable_data[offset] = '|'; + + cell_node->as.table_cell.idx = cols; + cell_node->as.table_cell.is_delimiter = false; + + // For next cell. + context->cell_start_offset = offset + 1; + } + return true; +} + +static bool parse_table_row_cells(cmark_parser *parser, cmark_node *container, + const unsigned char *data, bufsize_t len) { + if (0 >= len || data[0] != '|') { + return false; + } + + table_row_scan_context context; + memset(&context, 0, sizeof(context)); + context.parser = parser; + context.container = container; + context.data = data; + return scan_table_row_helper(data, len, &context, parse_table_row_cells_scan_row_helper); +} + +static cmark_node *try_opening_new_table_row(cmark_parser *parser, cmark_node *container, + cmark_chunk *input) { + if (parser->blank) { + return NULL; + } + + const unsigned char *data = input->data + parser->first_nonspace; + const bufsize_t len = input->len - parser->first_nonspace; + + int matched = scan_table_row(data, len); + if (matched == 0) { + return NULL; + } + + cmark_node *row = add_child(parser, container, CMARK_NODE_TABLE_ROW, container->start_column); + row->end_column = container->end_column; + row->as.table_row.type = CMARK_TABLE_ROW_TYPE_DATA; + if (!parse_table_row_cells(parser, row, data, len)) { + cmark_node_free(row); + return NULL; + } + + // Minus the extra '\n'. + S_advance_offset(parser, input, input->len - parser->offset - 1, false); + return row; +} + +static cmark_table_align parse_table_cell_alignment(const unsigned char *data, bufsize_t len) { + bool align_left = false; + bool align_right = false; + bool dash_met = false; + for (bufsize_t i = 0; i < len; ++i) { + switch (data[i]) { + case '-': + dash_met = true; + break; + case ':': + if (dash_met) { + align_right = true; + } else { + align_left = true; + } + break; + default: + assert(S_is_space_or_tab(data[i])); + break; + } + } + if (align_left && align_right) { + return CMARK_TABLE_ALIGN_CENTER; + } else if (align_left) { + return CMARK_TABLE_ALIGN_LEFT; + } else if (align_right) { + return CMARK_TABLE_ALIGN_RIGHT; + } else { + return CMARK_TABLE_ALIGN_NONE; + } +} + +static cmark_node *try_opening_new_table_header(cmark_parser *parser, cmark_node *container, + cmark_chunk *input) { + const unsigned char *data = input->data + parser->first_nonspace; + const bufsize_t len = input->len - parser->first_nonspace; + + // Check if current line is the delimiter row. If yes, change the type of the parent container + // from PARAGRAPH to TABLE. + int del_matched = scan_table_delimiter_row(data, len); + if (del_matched == 0) { + return NULL; + } + + // Try parsing parent node content as the header row. + int header_matched = scan_table_header(parser->content.ptr, parser->content.size); + if (header_matched == 0) { + return NULL; + } + + // Columns count must match. + if (del_matched != header_matched) { + return NULL; + } + + container->type = CMARK_NODE_TABLE; + + cmark_node *header_row = add_child(parser, container, CMARK_NODE_TABLE_ROW, container->start_column); + header_row->start_line = header_row->end_line = container->start_line; + // Minus the extra '\n'. + header_row->end_column = container->start_column + parser->content.size - 2; + header_row->as.table_row.type = CMARK_TABLE_ROW_TYPE_HEADER; + if (!parse_table_row_cells(parser, header_row, parser->content.ptr, parser->content.size)) { + container->type = CMARK_NODE_PARAGRAPH; + cmark_node_free(header_row); + return NULL; + } + + cmark_node *delimiter_row = add_child(parser, container, CMARK_NODE_TABLE_ROW, container->start_column); + delimiter_row->as.table_row.type = CMARK_TABLE_ROW_TYPE_DELIMITER; + if (!parse_table_row_cells(parser, delimiter_row, data, len)) { + container->type = CMARK_NODE_PARAGRAPH; + cmark_node_free(header_row); + cmark_node_free(delimiter_row); + return NULL; + } + + // Init table data. + memset(&container->as.table, 0, sizeof(cmark_table)); + container->as.table.columns_cnt = header_matched; + cmark_table_align *alignments = (cmark_table_align *)parser->mem + ->calloc(header_matched, sizeof(cmark_table_align)); + cmark_node *cell_node = delimiter_row->first_child; + while (cell_node) { + assert(cell_node->as.table_cell.idx < header_matched); + cell_node->as.table_cell.is_delimiter = true; + alignments[cell_node->as.table_cell.idx] = parse_table_cell_alignment(cell_node->data, cell_node->len); + cell_node = cell_node->next; + } + container->as.table.alignments = alignments; + + // Minus the extra '\n'. + S_advance_offset(parser, input, input->len - parser->offset - 1, false); + + // Clear parser content after successfully creating table header and delimiter rows + cmark_strbuf_clear(&parser->content); + + return delimiter_row; +} + +static bool try_opening_new_table_blocks(cmark_parser *parser, cmark_node **container, + cmark_chunk *input, bool indented) { + if (indented) { + return false; + } + + cmark_node_type parent_type = cmark_node_get_type(*container); + cmark_node *new_container = NULL; + if (parent_type == CMARK_NODE_TABLE) { + new_container = try_opening_new_table_row(parser, *container, input); + } else if (parent_type == CMARK_NODE_PARAGRAPH){ + // Now we are checking the delimiter row and the header row has already + // been recognized as a paragraph. + new_container = try_opening_new_table_header(parser, *container, input); + } + if (new_container) { + *container = new_container; + return true; + } else { + return false; + } +} + static void open_new_blocks(cmark_parser *parser, cmark_node **container, cmark_chunk *input, bool all_matched) { bool indented; @@ -1030,7 +1378,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, while (cont_type != CMARK_NODE_CODE_BLOCK && cont_type != CMARK_NODE_FORMULA_BLOCK && - cont_type != CMARK_NODE_HTML_BLOCK) { + cont_type != CMARK_NODE_HTML_BLOCK && + cont_type != CMARK_NODE_TABLE_CELL) { S_find_first_nonspace(parser, input); indented = parser->indent >= CODE_INDENT; @@ -1195,7 +1544,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_length = 0; (*container)->as.code.fence_offset = 0; (*container)->as.code.info = NULL; - + } else if (try_opening_new_table_blocks(parser, container, input, indented)) { + break; } else { break; } diff --git a/src/cmark.h b/src/cmark.h index d1cb79ba9..1e448f4f2 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -47,9 +47,12 @@ typedef enum { CMARK_NODE_HEADING, CMARK_NODE_THEMATIC_BREAK, CMARK_NODE_FORMULA_BLOCK, + CMARK_NODE_TABLE, + CMARK_NODE_TABLE_ROW, + CMARK_NODE_TABLE_CELL, CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, - CMARK_NODE_LAST_BLOCK = CMARK_NODE_FORMULA_BLOCK, + CMARK_NODE_LAST_BLOCK = CMARK_NODE_TABLE_CELL, /* Inline */ CMARK_NODE_TEXT, @@ -681,6 +684,9 @@ const char *cmark_version_string(void); #define NODE_ITEM CMARK_NODE_ITEM #define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK #define NODE_FORMULA_BLOCK CMARK_NODE_FORMULA_BLOCK +#define NODE_TABLE CMARK_NODE_TABLE +#define NODE_TABLE_ROW CMARK_NODE_TABLE_ROW +#define NODE_TABLE_CELL CMARK_NODE_TABLE_CELL #define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK #define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK #define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH diff --git a/src/node.c b/src/node.c index d6176331f..dd8bff7b8 100644 --- a/src/node.c +++ b/src/node.c @@ -84,6 +84,28 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { case CMARK_NODE_CUSTOM_INLINE: return cmark_node_is_inline(child); + case CMARK_NODE_TABLE: + return child->type == CMARK_NODE_TABLE_ROW; + + case CMARK_NODE_TABLE_ROW: + return child->type == CMARK_NODE_TABLE_CELL; + + case CMARK_NODE_TABLE_CELL: + switch (child->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_CODE: + case CMARK_NODE_EMPH: + case CMARK_NODE_STRONG: + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + case CMARK_NODE_STRIKETHROUGH: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_MARK: + return true; + default: + return false; + } + default: break; } @@ -137,6 +159,7 @@ static void S_free_nodes(cmark_node *e) { case CMARK_NODE_HTML_BLOCK: case CMARK_NODE_FORMULA_INLINE: case CMARK_NODE_FORMULA_BLOCK: + case CMARK_NODE_TABLE_CELL: mem->free(e->data); break; case CMARK_NODE_LINK: @@ -144,6 +167,10 @@ static void S_free_nodes(cmark_node *e) { mem->free(e->as.link.url); mem->free(e->as.link.title); break; + case CMARK_NODE_TABLE: + mem->free(e->as.table.alignments); + e->as.table.alignments = NULL; + break; case CMARK_NODE_CUSTOM_BLOCK: case CMARK_NODE_CUSTOM_INLINE: mem->free(e->as.custom.on_enter); @@ -229,6 +256,12 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "formula_inline"; case CMARK_NODE_FORMULA_BLOCK: return "formula_block"; + case CMARK_NODE_TABLE: + return "table"; + case CMARK_NODE_TABLE_ROW: + return "table_row"; + case CMARK_NODE_TABLE_CELL: + return "table_cell"; case CMARK_NODE_LINK: return "link"; case CMARK_NODE_IMAGE: @@ -327,6 +360,7 @@ const char *cmark_node_get_literal(cmark_node *node) { case CMARK_NODE_CODE_BLOCK: case CMARK_NODE_FORMULA_INLINE: case CMARK_NODE_FORMULA_BLOCK: + case CMARK_NODE_TABLE_CELL: return node->data ? (char *)node->data : ""; default: @@ -349,6 +383,7 @@ int cmark_node_set_literal(cmark_node *node, const char *content) { case CMARK_NODE_CODE_BLOCK: case CMARK_NODE_FORMULA_INLINE: case CMARK_NODE_FORMULA_BLOCK: + case CMARK_NODE_TABLE_CELL: node->len = cmark_set_cstr(node->mem, &node->data, content); return 1; diff --git a/src/node.h b/src/node.h index dd1c18cb4..c80311964 100644 --- a/src/node.h +++ b/src/node.h @@ -41,6 +41,33 @@ typedef struct { bool setext; } cmark_heading; +typedef enum { + CMARK_TABLE_ALIGN_NONE = 0, // No alignment specified (---) + CMARK_TABLE_ALIGN_LEFT = 1, // :--- + CMARK_TABLE_ALIGN_CENTER = 2, // :---: + CMARK_TABLE_ALIGN_RIGHT = 3 // ---: +} cmark_table_align; + +typedef enum { + CMARK_TABLE_ROW_TYPE_HEADER, + CMARK_TABLE_ROW_TYPE_DELIMITER, + CMARK_TABLE_ROW_TYPE_DATA +} cmark_table_row_type; + +typedef struct { + int columns_cnt; + cmark_table_align *alignments; +} cmark_table; + +typedef struct { + cmark_table_row_type type; +} cmark_table_row; + +typedef struct { + int idx; + bool is_delimiter; +} cmark_table_cell; + typedef struct { unsigned char *url; unsigned char *title; @@ -88,6 +115,9 @@ struct cmark_node { cmark_link link; cmark_custom custom; cmark_formula formula; + cmark_table table; + cmark_table_row table_row; + cmark_table_cell table_cell; int html_block_type; } as; }; diff --git a/src/xml.c b/src/xml.c index a613d22f6..8e47c7d09 100644 --- a/src/xml.c +++ b/src/xml.c @@ -181,6 +181,53 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_STRIKETHROUGH: case CMARK_NODE_MARK: break; + case CMARK_NODE_TABLE: + snprintf(buffer, BUFFER_SIZE, " columns=\"%d\"", node->as.table.columns_cnt); + cmark_strbuf_puts(xml, buffer); + break; + + case CMARK_NODE_TABLE_ROW: + switch (node->as.table_row.type) { + case CMARK_TABLE_ROW_TYPE_HEADER: + cmark_strbuf_puts(xml, " type=\"header\""); + break; + case CMARK_TABLE_ROW_TYPE_DELIMITER: + cmark_strbuf_puts(xml, " type=\"delimiter\""); + break; + case CMARK_TABLE_ROW_TYPE_DATA: + cmark_strbuf_puts(xml, " type=\"data\""); + break; + } + break; + + case CMARK_NODE_TABLE_CELL: + if (node->parent && node->parent->parent && + node->parent->parent->type == CMARK_NODE_TABLE) { + int col_num = 0; + cmark_node *cell = node->parent->first_child; + while (cell && cell != node) { + col_num++; + cell = cell->next; + } + if (col_num < node->parent->parent->as.table.columns_cnt) { + switch(node->parent->parent->as.table.alignments[col_num]) { + case CMARK_TABLE_ALIGN_LEFT: + cmark_strbuf_puts(xml, " align=\"left\""); + break; + case CMARK_TABLE_ALIGN_CENTER: + cmark_strbuf_puts(xml, " align=\"center\""); + break; + case CMARK_TABLE_ALIGN_RIGHT: + cmark_strbuf_puts(xml, " align=\"right\""); + break; + case CMARK_TABLE_ALIGN_NONE: + cmark_strbuf_puts(xml, " align=\"none\""); + break; + } + } + } + break; + case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: cmark_strbuf_puts(xml, " destination=\"");