1
0
This commit is contained in:
2026-03-27 22:22:12 -04:00
parent d417793364
commit 2179b14048
2 changed files with 449 additions and 0 deletions
+412
View File
@@ -0,0 +1,412 @@
package ltx
import "core:fmt"
import "core:os"
import "core:path/filepath"
import "core:strings"
import "core:unicode"
// TODO(Paul): change the key-value format to allow for true arbitrary string values with the use of quotes (maybe?) and make the rules clean and easy to remember.
main :: proc() {
if len(os.args) < 2 {
fmt.eprintf("A naive implementation of TeX-based arbitrary markup. Prints syntax tree and space-preserving XML substitution\nUsage:\n\t%s <filepath>\n", os.args[0])
return
}
ltx: Ltx
parse_err := ltx_parse_file(&ltx, os.args[1])
if parse_err != .None {
fmt.eprintln(ltx_get_error(&ltx, parse_err))
return
}
for node in ltx.nodes {
print_node(node)
}
sb := strings.builder_make()
defer strings.builder_destroy(&sb)
strip_ltx(&sb, ltx.nodes)
strings.builder_reset(&sb)
ltx_to_xml(&sb, ltx.nodes)
fmt.println(strings.to_string(sb))
}
Tokens :: enum {
Backslash,
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
Assign,
Quote,
}
TokenArray := [Tokens]rune {
.Assign = '=',
.Backslash = '\\',
.LeftBrace = '{',
.RightBrace = '}',
.LeftBracket = '[',
.RightBracket = ']',
.Quote = '"',
}
Node_Kind :: enum {
Text,
Tag,
}
Attr_Type :: enum {
Flag,
Attribute,
}
Field :: struct {
value: string,
type: Attr_Type,
}
Node :: struct {
name: string,
kind: Node_Kind,
text: string,
attributes: map[string]Field,
children: [dynamic]Node,
}
Ltx_Error :: enum u32 {
None = 0,
EOF,
KeyExpected,
ClosingBracketExpected,
ClosingBraceExpected,
UnexpectedRightBrace,
ValueExpected,
KeyAlreadyExists,
InvalidKey,
CannotReadFile,
// InvalidKeyStart,
}
ltx_seek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) {
next_idx := ltx.idx + 1
if next_idx >= len(ltx.source) do return 0, .EOF
if ltx_current_char(ltx) == '\n' {
ltx.pos = {
col = 0,
line = ltx.pos.line + 1,
}
} else {
ltx.pos.col += 1
}
ltx.idx = next_idx
c := ltx_current_char(ltx)
return c, .None
}
ltx_has_next :: proc(ltx: ^Ltx) -> b32 {
return len(ltx.source) > ltx.idx + 1
}
ltx_peek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) {
if !ltx_has_next(ltx) do return 0, .EOF
return rune(ltx.source[ltx.idx + 1]), .None
}
ltx_current_char :: proc(ltx: ^Ltx) -> rune {
assert(ltx.idx < len(ltx.source), "index cannot be greater than string length")
return rune(ltx.source[ltx.idx])
}
ltx_consume_whitespace :: proc(ltx: ^Ltx) -> Ltx_Error {
for unicode.is_white_space(ltx_current_char(ltx)) do ltx_seek(ltx) or_return // TODO: do prop error handling
return .None
}
is_numeric :: proc(c: rune) -> b32 {
return c >= '0' && c <= '9'
}
is_alpha :: proc(c: rune) -> b32 {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
}
validate_key :: proc(key: string) -> Ltx_Error {
assert(len(key) > 0, "expected non-empty key")
if !is_alpha(rune(key[0])) do return .InvalidKey
for c in key {
if !is_alpha(c) && !is_numeric(c) && c != '_' do return .InvalidKey
}
return .None
}
Ltx :: struct {
source: string,
source_path: string,
nodes: [dynamic]Node,
idx: int,
pos: struct {
line: u32,
col: u32,
},
}
ltx_parse :: proc(ltx: ^Ltx) -> (err: Ltx_Error) {
ltx.idx = 0
stack := make([dynamic]Node)
defer delete(stack)
text_node_start_idx := 0
for ltx_has_next(ltx) {
if ltx_current_char(ltx) == TokenArray[.Backslash] {
// TODO: cehck if the char after slash is a token
if text_node_start_idx < ltx.idx {
node := Node {
kind = .Text,
text = ltx.source[text_node_start_idx:ltx.idx],
}
if len(stack) == 0 do append(&ltx.nodes, node)
else do append(&stack[len(stack) - 1].children, node)
}
ltx_seek(ltx) or_return // skip \
tag_start_idx := ltx.idx
for unicode.is_letter(ltx_current_char(ltx)) do ltx_seek(ltx) or_return
assert(tag_start_idx < ltx.idx)
tag_name := ltx.source[tag_start_idx:ltx.idx]
node := Node {
kind = .Tag,
name = tag_name,
}
end_pos: struct {
tag_name: int,
fields: int,
content: int,
} = {
tag_name = ltx.idx,
fields = 0,
content = 0,
}
ltx_consume_whitespace(ltx) or_return
for ltx_current_char(ltx) == TokenArray[.LeftBracket] {
c := ltx_seek(ltx) or_return
field_start := ltx.idx
key: string
for ltx_has_next(ltx) {
if c == TokenArray[.Assign] {
if field_start >= ltx.idx do return .KeyExpected
key = strings.trim(ltx.source[field_start:ltx.idx], " \t")
if len(key) <= 0 do return .KeyExpected
validate_key(key) or_return
ltx_seek(ltx) or_return
field_start = ltx.idx
} else if c == TokenArray[.RightBracket] {
if field_start >= ltx.idx do return .ValueExpected
raw_value := strings.trim(ltx.source[field_start:ltx.idx], " \t")
if len(raw_value) <= 0 do return .ValueExpected
value := Field{}
if key != "" {
value.type = .Attribute
value.value = raw_value
} else {
key = raw_value
value.type = .Flag
}
_, _, found := map_upsert(&node.attributes, key, value)
if found do return .KeyAlreadyExists
ltx_seek(ltx) or_return
end_pos.fields = ltx.idx
break
}
c = ltx_seek(ltx) or_return
}
ltx_consume_whitespace(ltx) or_return
}
ltx_consume_whitespace(ltx) or_return
if ltx_current_char(ltx) == TokenArray[.LeftBrace] {
ltx_seek(ltx) or_return // consume {
append(&stack, node)
end_pos.content = ltx.idx
} else {
append(&ltx.nodes, node)
}
if end_pos.content != 0 do text_node_start_idx = end_pos.content
else if end_pos.fields != 0 do text_node_start_idx = end_pos.fields
else do text_node_start_idx = end_pos.tag_name
} else if ltx_current_char(ltx) == TokenArray[.RightBrace] {
if len(stack) <= 0 do return .UnexpectedRightBrace
node := pop(&stack)
if text_node_start_idx < ltx.idx {
text_node := Node {
kind = .Text,
text = ltx.source[text_node_start_idx:ltx.idx],
}
append(&node.children, text_node)
}
if len(stack) > 0 {
append(&stack[len(stack) - 1].children, node)
} else {
append(&ltx.nodes, node)
}
ltx_seek(ltx) or_return // consume }
text_node_start_idx = ltx.idx
} else {
ltx_seek(ltx) or_return
}
}
if text_node_start_idx < ltx.idx {
append(&ltx.nodes, Node{kind = .Text, text = ltx.source[text_node_start_idx:ltx.idx]})
}
if len(stack) > 0 do return .ClosingBraceExpected
return .None
}
ltx_parse_file :: proc(ltx: ^Ltx, file_path: string) -> Ltx_Error {
source, ok := os.read_entire_file(file_path)
if !ok do return .CannotReadFile
abs_path, abs_ok := filepath.abs(file_path)
if !abs_ok do return .CannotReadFile
ltx.source_path = abs_path
ltx.source = string(source)
return ltx_parse(ltx)
}
print_indent :: proc(level: int) {
for i in 0 ..< level {
fmt.print(" ")
}
}
print_node :: proc(node: Node, indent_level := 0) {
print_indent(indent_level)
switch node.kind {
case .Text:
fmt.printf("TEXT: \"%s\"\n", escape_white_space(node.text))
case .Tag:
fmt.print("TAG:", node.name)
if len(node.attributes) > 0 {
for k, v in node.attributes {
if v.type == .Flag do fmt.printf(" (%s)", k)
else do fmt.printf(" {{%s: %s}}", k, v.value)
}
}
fmt.println()
if len(node.children) > 0 {
for child in node.children {
print_node(child, indent_level + 1)
}
}
}
}
strip_ltx :: proc(sb: ^strings.Builder, nodes: [dynamic]Node) -> b32 {
for node in nodes {
switch node.kind {
case .Text:
fmt.sbprint(sb, (node.text))
case .Tag:
strip_ltx(sb, node.children)
}
}
return true
}
ltx_to_xml :: proc(sb: ^strings.Builder, nodes: [dynamic]Node, depth := 0) {
if len(nodes) <= 0 do return
if depth == 0 do fmt.sbprintln(sb, "<ltx>")
for node in nodes {
switch node.kind {
case .Text:
fmt.sbprint(sb, node.text)
case .Tag:
fmt.sbprintf(sb, "<%s", node.name)
for k, v in node.attributes do fmt.sbprintf(sb, " %s=\"%s\"", k, v.value)
if len(node.children) > 0 {
fmt.sbprint(sb, ">")
ltx_to_xml(sb, node.children, depth + 1)
fmt.sbprintf(sb, "</%s>", node.name)
} else {
fmt.sbprint(sb, " />")
}
}
}
if depth == 0 do fmt.sbprintln(sb, "</ltx>")
}
ltx_error_to_string :: proc(error: Ltx_Error) -> string {
switch error {
case .None:
return ""
case .EOF:
return "unexpected end of file"
case .KeyExpected:
return "key expected before '='"
case .ClosingBraceExpected:
return "closing brace expected"
case .ClosingBracketExpected:
return "closing bracket expected"
case .UnexpectedRightBrace:
return "unexpected '}'"
case .ValueExpected:
return "value expected after '='"
case .KeyAlreadyExists:
return "attribute key/flag already exists in attribute"
case .InvalidKey:
return "invalid key"
case .CannotReadFile:
return "cannot read file"
}
return ""
}
ltx_get_error :: proc(ltx: ^Ltx, error: Ltx_Error) -> string {
if error == .None do return ""
file_path := len(ltx.source_path) > 0 ? ltx.source_path : "[source]"
line := ltx.pos.line + 1
col := ltx.pos.col + 1
error_msg := ltx_error_to_string(error)
return fmt.tprintf("%s(%d,%d): error: %s", file_path, line, col, error_msg)
}
escape_white_space :: proc(s: string) -> string {
sb: strings.Builder = strings.builder_make()
// defer strings.builder_destroy(&sb)
for c in s {
switch c {
case '\t':
fmt.sbprint(&sb, "\\t")
case '\n':
fmt.sbprint(&sb, "\\n")
case:
fmt.sbprint(&sb, c)
}
}
return strings.to_string(sb)
}
process_white_space :: proc(s: string) -> string {
sb: strings.Builder = strings.builder_make()
defer strings.builder_destroy(&sb)
lines := strings.split(s, "\n")
for _line in lines {
line := strings.trim_right(_line, "\t ")
if len(line) <= 0 do continue
last_idx := 0
last_char: u8 = 0
for i := 0; i < len(line); i += 1 {
if line[i] == '\r' do continue
if unicode.is_space(rune(line[i])) {
if last_char == line[i] {
last_idx = i
} else {
fmt.sbprint(&sb, line[last_idx:i])
last_idx = i
}
}
last_char = line[i]
}
fmt.sbprintln(&sb, line[last_idx:len(line)])
}
// return ""
return strings.to_string(sb)
}
+37
View File
@@ -0,0 +1,37 @@
\note[key=value][key2=123][flag1][flag2]{Note Heading}
\a[link_label]{A link}
\section{
\title{Resources}
\list{
\item{\a[link_flag]{sdf}}
\item{\a[link_flag2]{}}
}
}
\lol
\section{
\title{Arbitrary tags!}
\list{
\item{
\name{yo!}
\desc{yoyo!}
}
\item{
\name{oy!}
\desc{oyoy!}
}
}
}
\link[link_flag]{https://example.com}
\link[link_flag]{https://example.com}
\emphasize{Lorem ipsum} dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et
dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.