diff --git a/ltx/ltx.odin b/ltx/ltx.odin new file mode 100644 index 0000000..7dfdf1d --- /dev/null +++ b/ltx/ltx.odin @@ -0,0 +1,412 @@ +package ltx +import "core:fmt" +import "core:os" +import "core:path/filepath" +import "core:strings" +import "core:unicode" +// TODO(Paul): change the key-value format to allow for true arbitrary string values with the use of quotes (maybe?) and make the rules clean and easy to remember. + +main :: proc() { + if len(os.args) < 2 { + fmt.eprintf("A naive implementation of TeX-based arbitrary markup. Prints syntax tree and space-preserving XML substitution\nUsage:\n\t%s \n", os.args[0]) + return + } + ltx: Ltx + + parse_err := ltx_parse_file(<x, os.args[1]) + if parse_err != .None { + fmt.eprintln(ltx_get_error(<x, parse_err)) + return + } + for node in ltx.nodes { + print_node(node) + } + + sb := strings.builder_make() + defer strings.builder_destroy(&sb) + strip_ltx(&sb, ltx.nodes) + strings.builder_reset(&sb) + ltx_to_xml(&sb, ltx.nodes) + fmt.println(strings.to_string(sb)) +} + +Tokens :: enum { + Backslash, + LeftBrace, + RightBrace, + LeftBracket, + RightBracket, + Assign, + Quote, +} + +TokenArray := [Tokens]rune { + .Assign = '=', + .Backslash = '\\', + .LeftBrace = '{', + .RightBrace = '}', + .LeftBracket = '[', + .RightBracket = ']', + .Quote = '"', +} + +Node_Kind :: enum { + Text, + Tag, +} + +Attr_Type :: enum { + Flag, + Attribute, +} + +Field :: struct { + value: string, + type: Attr_Type, +} + +Node :: struct { + name: string, + kind: Node_Kind, + text: string, + attributes: map[string]Field, + children: [dynamic]Node, +} + +Ltx_Error :: enum u32 { + None = 0, + EOF, + KeyExpected, + ClosingBracketExpected, + ClosingBraceExpected, + UnexpectedRightBrace, + ValueExpected, + KeyAlreadyExists, + InvalidKey, + CannotReadFile, + // InvalidKeyStart, +} + +ltx_seek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) { + next_idx := ltx.idx + 1 + if next_idx >= len(ltx.source) do return 0, .EOF + if ltx_current_char(ltx) == '\n' { + ltx.pos = { + col = 0, + line = ltx.pos.line + 1, + } + } else { + ltx.pos.col += 1 + } + ltx.idx = next_idx + c := ltx_current_char(ltx) + return c, .None +} + +ltx_has_next :: proc(ltx: ^Ltx) -> b32 { + return len(ltx.source) > ltx.idx + 1 +} + +ltx_peek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) { + if !ltx_has_next(ltx) do return 0, .EOF + return rune(ltx.source[ltx.idx + 1]), .None +} + +ltx_current_char :: proc(ltx: ^Ltx) -> rune { + assert(ltx.idx < len(ltx.source), "index cannot be greater than string length") + return rune(ltx.source[ltx.idx]) +} + +ltx_consume_whitespace :: proc(ltx: ^Ltx) -> Ltx_Error { + for unicode.is_white_space(ltx_current_char(ltx)) do ltx_seek(ltx) or_return // TODO: do prop error handling + return .None +} + +is_numeric :: proc(c: rune) -> b32 { + return c >= '0' && c <= '9' +} + +is_alpha :: proc(c: rune) -> b32 { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +} + +validate_key :: proc(key: string) -> Ltx_Error { + assert(len(key) > 0, "expected non-empty key") + if !is_alpha(rune(key[0])) do return .InvalidKey + for c in key { + if !is_alpha(c) && !is_numeric(c) && c != '_' do return .InvalidKey + } + return .None +} + +Ltx :: struct { + source: string, + source_path: string, + nodes: [dynamic]Node, + idx: int, + pos: struct { + line: u32, + col: u32, + }, +} + +ltx_parse :: proc(ltx: ^Ltx) -> (err: Ltx_Error) { + ltx.idx = 0 + stack := make([dynamic]Node) + defer delete(stack) + + text_node_start_idx := 0 + for ltx_has_next(ltx) { + if ltx_current_char(ltx) == TokenArray[.Backslash] { + // TODO: cehck if the char after slash is a token + if text_node_start_idx < ltx.idx { + node := Node { + kind = .Text, + text = ltx.source[text_node_start_idx:ltx.idx], + } + if len(stack) == 0 do append(<x.nodes, node) + else do append(&stack[len(stack) - 1].children, node) + } + ltx_seek(ltx) or_return // skip \ + + tag_start_idx := ltx.idx + for unicode.is_letter(ltx_current_char(ltx)) do ltx_seek(ltx) or_return + assert(tag_start_idx < ltx.idx) + tag_name := ltx.source[tag_start_idx:ltx.idx] + node := Node { + kind = .Tag, + name = tag_name, + } + end_pos: struct { + tag_name: int, + fields: int, + content: int, + } = { + tag_name = ltx.idx, + fields = 0, + content = 0, + } + + ltx_consume_whitespace(ltx) or_return + for ltx_current_char(ltx) == TokenArray[.LeftBracket] { + c := ltx_seek(ltx) or_return + field_start := ltx.idx + key: string + for ltx_has_next(ltx) { + if c == TokenArray[.Assign] { + if field_start >= ltx.idx do return .KeyExpected + key = strings.trim(ltx.source[field_start:ltx.idx], " \t") + if len(key) <= 0 do return .KeyExpected + validate_key(key) or_return + ltx_seek(ltx) or_return + field_start = ltx.idx + } else if c == TokenArray[.RightBracket] { + if field_start >= ltx.idx do return .ValueExpected + raw_value := strings.trim(ltx.source[field_start:ltx.idx], " \t") + if len(raw_value) <= 0 do return .ValueExpected + value := Field{} + if key != "" { + value.type = .Attribute + value.value = raw_value + } else { + key = raw_value + value.type = .Flag + } + _, _, found := map_upsert(&node.attributes, key, value) + if found do return .KeyAlreadyExists + ltx_seek(ltx) or_return + end_pos.fields = ltx.idx + break + } + c = ltx_seek(ltx) or_return + } + ltx_consume_whitespace(ltx) or_return + } + ltx_consume_whitespace(ltx) or_return + if ltx_current_char(ltx) == TokenArray[.LeftBrace] { + ltx_seek(ltx) or_return // consume { + append(&stack, node) + end_pos.content = ltx.idx + } else { + append(<x.nodes, node) + } + if end_pos.content != 0 do text_node_start_idx = end_pos.content + else if end_pos.fields != 0 do text_node_start_idx = end_pos.fields + else do text_node_start_idx = end_pos.tag_name + } else if ltx_current_char(ltx) == TokenArray[.RightBrace] { + if len(stack) <= 0 do return .UnexpectedRightBrace + node := pop(&stack) + if text_node_start_idx < ltx.idx { + text_node := Node { + kind = .Text, + text = ltx.source[text_node_start_idx:ltx.idx], + } + append(&node.children, text_node) + } + if len(stack) > 0 { + append(&stack[len(stack) - 1].children, node) + } else { + append(<x.nodes, node) + } + ltx_seek(ltx) or_return // consume } + text_node_start_idx = ltx.idx + } else { + ltx_seek(ltx) or_return + } + } + if text_node_start_idx < ltx.idx { + append(<x.nodes, Node{kind = .Text, text = ltx.source[text_node_start_idx:ltx.idx]}) + } + if len(stack) > 0 do return .ClosingBraceExpected + return .None +} + +ltx_parse_file :: proc(ltx: ^Ltx, file_path: string) -> Ltx_Error { + source, ok := os.read_entire_file(file_path) + if !ok do return .CannotReadFile + abs_path, abs_ok := filepath.abs(file_path) + if !abs_ok do return .CannotReadFile + ltx.source_path = abs_path + ltx.source = string(source) + return ltx_parse(ltx) +} + +print_indent :: proc(level: int) { + for i in 0 ..< level { + fmt.print(" ") + } +} +print_node :: proc(node: Node, indent_level := 0) { + print_indent(indent_level) + switch node.kind { + case .Text: + fmt.printf("TEXT: \"%s\"\n", escape_white_space(node.text)) + case .Tag: + fmt.print("TAG:", node.name) + if len(node.attributes) > 0 { + for k, v in node.attributes { + if v.type == .Flag do fmt.printf(" (%s)", k) + else do fmt.printf(" {{%s: %s}}", k, v.value) + } + } + fmt.println() + if len(node.children) > 0 { + for child in node.children { + print_node(child, indent_level + 1) + } + } + } +} + +strip_ltx :: proc(sb: ^strings.Builder, nodes: [dynamic]Node) -> b32 { + for node in nodes { + switch node.kind { + case .Text: + fmt.sbprint(sb, (node.text)) + case .Tag: + strip_ltx(sb, node.children) + } + } + return true +} + +ltx_to_xml :: proc(sb: ^strings.Builder, nodes: [dynamic]Node, depth := 0) { + if len(nodes) <= 0 do return + if depth == 0 do fmt.sbprintln(sb, "") + for node in nodes { + switch node.kind { + case .Text: + fmt.sbprint(sb, node.text) + case .Tag: + fmt.sbprintf(sb, "<%s", node.name) + for k, v in node.attributes do fmt.sbprintf(sb, " %s=\"%s\"", k, v.value) + if len(node.children) > 0 { + fmt.sbprint(sb, ">") + ltx_to_xml(sb, node.children, depth + 1) + fmt.sbprintf(sb, "", node.name) + } else { + fmt.sbprint(sb, " />") + } + } + } + if depth == 0 do fmt.sbprintln(sb, "") +} + +ltx_error_to_string :: proc(error: Ltx_Error) -> string { + switch error { + case .None: + return "" + case .EOF: + return "unexpected end of file" + case .KeyExpected: + return "key expected before '='" + case .ClosingBraceExpected: + return "closing brace expected" + case .ClosingBracketExpected: + return "closing bracket expected" + case .UnexpectedRightBrace: + return "unexpected '}'" + case .ValueExpected: + return "value expected after '='" + case .KeyAlreadyExists: + return "attribute key/flag already exists in attribute" + case .InvalidKey: + return "invalid key" + case .CannotReadFile: + return "cannot read file" + } + return "" +} + +ltx_get_error :: proc(ltx: ^Ltx, error: Ltx_Error) -> string { + if error == .None do return "" + file_path := len(ltx.source_path) > 0 ? ltx.source_path : "[source]" + line := ltx.pos.line + 1 + col := ltx.pos.col + 1 + error_msg := ltx_error_to_string(error) + return fmt.tprintf("%s(%d,%d): error: %s", file_path, line, col, error_msg) +} + +escape_white_space :: proc(s: string) -> string { + sb: strings.Builder = strings.builder_make() + // defer strings.builder_destroy(&sb) + for c in s { + switch c { + case '\t': + fmt.sbprint(&sb, "\\t") + case '\n': + fmt.sbprint(&sb, "\\n") + case: + fmt.sbprint(&sb, c) + } + } + return strings.to_string(sb) +} + +process_white_space :: proc(s: string) -> string { + sb: strings.Builder = strings.builder_make() + defer strings.builder_destroy(&sb) + lines := strings.split(s, "\n") + for _line in lines { + line := strings.trim_right(_line, "\t ") + if len(line) <= 0 do continue + last_idx := 0 + last_char: u8 = 0 + for i := 0; i < len(line); i += 1 { + if line[i] == '\r' do continue + if unicode.is_space(rune(line[i])) { + if last_char == line[i] { + last_idx = i + } else { + fmt.sbprint(&sb, line[last_idx:i]) + last_idx = i + } + } + last_char = line[i] + } + fmt.sbprintln(&sb, line[last_idx:len(line)]) + } + // return "" + return strings.to_string(sb) +} + diff --git a/ltx/sample.ltx b/ltx/sample.ltx new file mode 100644 index 0000000..b76296b --- /dev/null +++ b/ltx/sample.ltx @@ -0,0 +1,37 @@ +\note[key=value][key2=123][flag1][flag2]{Note Heading} + +\a[link_label]{A link} + +\section{ + \title{Resources} + \list{ + \item{\a[link_flag]{sdf}} + \item{\a[link_flag2]{}} + } +} + +\lol + +\section{ + \title{Arbitrary tags!} + \list{ + \item{ + \name{yo!} + \desc{yoyo!} + } + \item{ + \name{oy!} + \desc{oyoy!} + } + } +} + +\link[link_flag]{https://example.com} +\link[link_flag]{https://example.com} + +\emphasize{Lorem ipsum} dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et +dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo +consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +