1
0
Files
silly-stuff/ltx/ltx.odin
T
2026-04-27 00:22:40 +00:00

413 lines
9.9 KiB
Odin

package ltx
import "core:fmt"
import "core:os"
import "core:path/filepath"
import "core:strings"
import "core:unicode"
// TODO(Paul): change the key-value format to allow for true arbitrary string values with the use of quotes (maybe?) and make the rules clean and easy to remember.
main :: proc() {
if len(os.args) < 2 {
fmt.eprintf("A naive implementation of TeX-based arbitrary markup. Prints syntax tree and space-preserving XML substitution\nUsage:\n\t%s <filepath>\n", os.args[0])
return
}
ltx: Ltx
parse_err := ltx_parse_file(&ltx, os.args[1])
if parse_err != .None {
fmt.eprintln(ltx_get_error(&ltx, parse_err))
return
}
for node in ltx.nodes {
print_node(node)
}
sb := strings.builder_make()
defer strings.builder_destroy(&sb)
strip_ltx(&sb, ltx.nodes)
strings.builder_reset(&sb)
ltx_to_xml(&sb, ltx.nodes)
fmt.println(strings.to_string(sb))
}
Tokens :: enum {
Backslash,
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
Assign,
Quote,
}
TokenArray := [Tokens]rune {
.Assign = '=',
.Backslash = '\\',
.LeftBrace = '{',
.RightBrace = '}',
.LeftBracket = '[',
.RightBracket = ']',
.Quote = '"',
}
Node_Kind :: enum {
Text,
Tag,
}
Attr_Type :: enum {
Flag,
Attribute,
}
Field :: struct {
value: string,
type: Attr_Type,
}
Node :: struct {
name: string,
kind: Node_Kind,
text: string,
attributes: map[string]Field,
children: [dynamic]Node,
}
Ltx_Error :: enum u32 {
None = 0,
EOF,
KeyExpected,
ClosingBracketExpected,
ClosingBraceExpected,
UnexpectedRightBrace,
ValueExpected,
KeyAlreadyExists,
InvalidKey,
CannotReadFile,
// InvalidKeyStart,
}
ltx_seek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) {
next_idx := ltx.idx + 1
if next_idx >= len(ltx.source) do return 0, .EOF
if ltx_current_char(ltx) == '\n' {
ltx.pos = {
col = 0,
line = ltx.pos.line + 1,
}
} else {
ltx.pos.col += 1
}
ltx.idx = next_idx
c := ltx_current_char(ltx)
return c, .None
}
ltx_has_next :: proc(ltx: ^Ltx) -> b32 {
return len(ltx.source) > ltx.idx + 1
}
ltx_peek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) {
if !ltx_has_next(ltx) do return 0, .EOF
return rune(ltx.source[ltx.idx + 1]), .None
}
ltx_current_char :: proc(ltx: ^Ltx) -> rune {
assert(ltx.idx < len(ltx.source), "index cannot be greater than string length")
return rune(ltx.source[ltx.idx])
}
ltx_consume_whitespace :: proc(ltx: ^Ltx) -> Ltx_Error {
for unicode.is_white_space(ltx_current_char(ltx)) do ltx_seek(ltx) or_return // TODO: do prop error handling
return .None
}
is_numeric :: proc(c: rune) -> b32 {
return c >= '0' && c <= '9'
}
is_alpha :: proc(c: rune) -> b32 {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}
validate_key :: proc(key: string) -> Ltx_Error {
assert(len(key) > 0, "expected non-empty key")
if !is_alpha(rune(key[0])) do return .InvalidKey
for c in key {
if !is_alpha(c) && !is_numeric(c) && c != '_' do return .InvalidKey
}
return .None
}
Ltx :: struct {
source: string,
source_path: string,
nodes: [dynamic]Node,
idx: int,
pos: struct {
line: u32,
col: u32,
},
}
ltx_parse :: proc(ltx: ^Ltx) -> (err: Ltx_Error) {
ltx.idx = 0
stack := make([dynamic]Node)
defer delete(stack)
text_node_start_idx := 0
for ltx_has_next(ltx) {
if ltx_current_char(ltx) == TokenArray[.Backslash] {
// TODO: cehck if the char after slash is a token
if text_node_start_idx < ltx.idx {
node := Node {
kind = .Text,
text = ltx.source[text_node_start_idx:ltx.idx],
}
if len(stack) == 0 do append(&ltx.nodes, node)
else do append(&stack[len(stack) - 1].children, node)
}
ltx_seek(ltx) or_return // skip \
tag_start_idx := ltx.idx
for unicode.is_letter(ltx_current_char(ltx)) do ltx_seek(ltx) or_return
assert(tag_start_idx < ltx.idx)
tag_name := ltx.source[tag_start_idx:ltx.idx]
node := Node {
kind = .Tag,
name = tag_name,
}
end_pos: struct {
tag_name: int,
fields: int,
content: int,
} = {
tag_name = ltx.idx,
fields = 0,
content = 0,
}
ltx_consume_whitespace(ltx) or_return
for ltx_current_char(ltx) == TokenArray[.LeftBracket] {
c := ltx_seek(ltx) or_return
field_start := ltx.idx
key: string
for ltx_has_next(ltx) {
if c == TokenArray[.Assign] {
if field_start >= ltx.idx do return .KeyExpected
key = strings.trim(ltx.source[field_start:ltx.idx], " \t")
if len(key) <= 0 do return .KeyExpected
validate_key(key) or_return
ltx_seek(ltx) or_return
field_start = ltx.idx
} else if c == TokenArray[.RightBracket] {
if field_start >= ltx.idx do return .ValueExpected
raw_value := strings.trim(ltx.source[field_start:ltx.idx], " \t")
if len(raw_value) <= 0 do return .ValueExpected
value := Field{}
if key != "" {
value.type = .Attribute
value.value = raw_value
} else {
key = raw_value
value.type = .Flag
}
_, _, found := map_upsert(&node.attributes, key, value)
if found do return .KeyAlreadyExists
ltx_seek(ltx) or_return
end_pos.fields = ltx.idx
break
}
c = ltx_seek(ltx) or_return
}
ltx_consume_whitespace(ltx) or_return
}
ltx_consume_whitespace(ltx) or_return
if ltx_current_char(ltx) == TokenArray[.LeftBrace] {
ltx_seek(ltx) or_return // consume {
append(&stack, node)
end_pos.content = ltx.idx
} else {
append(&ltx.nodes, node)
}
if end_pos.content != 0 do text_node_start_idx = end_pos.content
else if end_pos.fields != 0 do text_node_start_idx = end_pos.fields
else do text_node_start_idx = end_pos.tag_name
} else if ltx_current_char(ltx) == TokenArray[.RightBrace] {
if len(stack) <= 0 do return .UnexpectedRightBrace
node := pop(&stack)
if text_node_start_idx < ltx.idx {
text_node := Node {
kind = .Text,
text = ltx.source[text_node_start_idx:ltx.idx],
}
append(&node.children, text_node)
}
if len(stack) > 0 {
append(&stack[len(stack) - 1].children, node)
} else {
append(&ltx.nodes, node)
}
ltx_seek(ltx) or_return // consume }
text_node_start_idx = ltx.idx
} else {
ltx_seek(ltx) or_return
}
}
if text_node_start_idx < ltx.idx {
append(&ltx.nodes, Node{kind = .Text, text = ltx.source[text_node_start_idx:ltx.idx]})
}
if len(stack) > 0 do return .ClosingBraceExpected
return .None
}
ltx_parse_file :: proc(ltx: ^Ltx, file_path: string) -> Ltx_Error {
source, ok := os.read_entire_file(file_path)
if !ok do return .CannotReadFile
abs_path, abs_ok := filepath.abs(file_path)
if !abs_ok do return .CannotReadFile
ltx.source_path = abs_path
ltx.source = string(source)
return ltx_parse(ltx)
}
print_indent :: proc(level: int) {
for i in 0 ..< level {
fmt.print(" ")
}
}
print_node :: proc(node: Node, indent_level := 0) {
print_indent(indent_level)
switch node.kind {
case .Text:
fmt.printf("TEXT: \"%s\"\n", escape_white_space(node.text))
case .Tag:
fmt.print("TAG:", node.name)
if len(node.attributes) > 0 {
for k, v in node.attributes {
if v.type == .Flag do fmt.printf(" (%s)", k)
else do fmt.printf(" {{%s: %s}}", k, v.value)
}
}
fmt.println()
if len(node.children) > 0 {
for child in node.children {
print_node(child, indent_level + 1)
}
}
}
}
strip_ltx :: proc(sb: ^strings.Builder, nodes: [dynamic]Node) -> b32 {
for node in nodes {
switch node.kind {
case .Text:
fmt.sbprint(sb, (node.text))
case .Tag:
strip_ltx(sb, node.children)
}
}
return true
}
ltx_to_xml :: proc(sb: ^strings.Builder, nodes: [dynamic]Node, depth := 0) {
if len(nodes) <= 0 do return
if depth == 0 do fmt.sbprintln(sb, "<ltx>")
for node in nodes {
switch node.kind {
case .Text:
fmt.sbprint(sb, node.text)
case .Tag:
fmt.sbprintf(sb, "<%s", node.name)
for k, v in node.attributes do fmt.sbprintf(sb, " %s=\"%s\"", k, v.value)
if len(node.children) > 0 {
fmt.sbprint(sb, ">")
ltx_to_xml(sb, node.children, depth + 1)
fmt.sbprintf(sb, "</%s>", node.name)
} else {
fmt.sbprint(sb, " />")
}
}
}
if depth == 0 do fmt.sbprintln(sb, "</ltx>")
}
ltx_error_to_string :: proc(error: Ltx_Error) -> string {
switch error {
case .None:
return ""
case .EOF:
return "unexpected end of file"
case .KeyExpected:
return "key expected before '='"
case .ClosingBraceExpected:
return "closing brace expected"
case .ClosingBracketExpected:
return "closing bracket expected"
case .UnexpectedRightBrace:
return "unexpected '}'"
case .ValueExpected:
return "value expected after '='"
case .KeyAlreadyExists:
return "attribute key/flag already exists in attribute"
case .InvalidKey:
return "invalid key"
case .CannotReadFile:
return "cannot read file"
}
return "unknown error"
}
ltx_get_error :: proc(ltx: ^Ltx, error: Ltx_Error) -> string {
if error == .None do return ""
file_path := len(ltx.source_path) > 0 ? ltx.source_path : "[source]"
line := ltx.pos.line + 1
col := ltx.pos.col + 1
error_msg := ltx_error_to_string(error)
return fmt.tprintf("%s(%d,%d): error: %s", file_path, line, col, error_msg)
}
escape_white_space :: proc(s: string) -> string {
sb: strings.Builder = strings.builder_make()
// defer strings.builder_destroy(&sb)
for c in s {
switch c {
case '\t':
fmt.sbprint(&sb, "\\t")
case '\n':
fmt.sbprint(&sb, "\\n")
case:
fmt.sbprint(&sb, c)
}
}
return strings.to_string(sb)
}
process_white_space :: proc(s: string) -> string {
sb: strings.Builder = strings.builder_make()
defer strings.builder_destroy(&sb)
lines := strings.split(s, "\n")
for _line in lines {
line := strings.trim_right(_line, "\t ")
if len(line) <= 0 do continue
last_idx := 0
last_char: u8 = 0
for i := 0; i < len(line); i += 1 {
if line[i] == '\r' do continue
if unicode.is_space(rune(line[i])) {
if last_char == line[i] {
last_idx = i
} else {
fmt.sbprint(&sb, line[last_idx:i])
last_idx = i
}
}
last_char = line[i]
}
fmt.sbprintln(&sb, line[last_idx:len(line)])
}
// return ""
return strings.to_string(sb)
}