413 lines
9.9 KiB
Odin
413 lines
9.9 KiB
Odin
package ltx
|
|
import "core:fmt"
|
|
import "core:os"
|
|
import "core:path/filepath"
|
|
import "core:strings"
|
|
import "core:unicode"
|
|
// TODO(Paul): change the key-value format to allow for true arbitrary string values with the use of quotes (maybe?) and make the rules clean and easy to remember.
|
|
|
|
main :: proc() {
|
|
if len(os.args) < 2 {
|
|
fmt.eprintf("A naive implementation of TeX-based arbitrary markup. Prints syntax tree and space-preserving XML substitution\nUsage:\n\t%s <filepath>\n", os.args[0])
|
|
return
|
|
}
|
|
ltx: Ltx
|
|
|
|
parse_err := ltx_parse_file(<x, os.args[1])
|
|
if parse_err != .None {
|
|
fmt.eprintln(ltx_get_error(<x, parse_err))
|
|
return
|
|
}
|
|
for node in ltx.nodes {
|
|
print_node(node)
|
|
}
|
|
|
|
sb := strings.builder_make()
|
|
defer strings.builder_destroy(&sb)
|
|
strip_ltx(&sb, ltx.nodes)
|
|
strings.builder_reset(&sb)
|
|
ltx_to_xml(&sb, ltx.nodes)
|
|
fmt.println(strings.to_string(sb))
|
|
}
|
|
|
|
Tokens :: enum {
|
|
Backslash,
|
|
LeftBrace,
|
|
RightBrace,
|
|
LeftBracket,
|
|
RightBracket,
|
|
Assign,
|
|
Quote,
|
|
}
|
|
|
|
TokenArray := [Tokens]rune {
|
|
.Assign = '=',
|
|
.Backslash = '\\',
|
|
.LeftBrace = '{',
|
|
.RightBrace = '}',
|
|
.LeftBracket = '[',
|
|
.RightBracket = ']',
|
|
.Quote = '"',
|
|
}
|
|
|
|
Node_Kind :: enum {
|
|
Text,
|
|
Tag,
|
|
}
|
|
|
|
Attr_Type :: enum {
|
|
Flag,
|
|
Attribute,
|
|
}
|
|
|
|
Field :: struct {
|
|
value: string,
|
|
type: Attr_Type,
|
|
}
|
|
|
|
Node :: struct {
|
|
name: string,
|
|
kind: Node_Kind,
|
|
text: string,
|
|
attributes: map[string]Field,
|
|
children: [dynamic]Node,
|
|
}
|
|
|
|
Ltx_Error :: enum u32 {
|
|
None = 0,
|
|
EOF,
|
|
KeyExpected,
|
|
ClosingBracketExpected,
|
|
ClosingBraceExpected,
|
|
UnexpectedRightBrace,
|
|
ValueExpected,
|
|
KeyAlreadyExists,
|
|
InvalidKey,
|
|
CannotReadFile,
|
|
// InvalidKeyStart,
|
|
}
|
|
|
|
ltx_seek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) {
|
|
next_idx := ltx.idx + 1
|
|
if next_idx >= len(ltx.source) do return 0, .EOF
|
|
if ltx_current_char(ltx) == '\n' {
|
|
ltx.pos = {
|
|
col = 0,
|
|
line = ltx.pos.line + 1,
|
|
}
|
|
} else {
|
|
ltx.pos.col += 1
|
|
}
|
|
ltx.idx = next_idx
|
|
c := ltx_current_char(ltx)
|
|
return c, .None
|
|
}
|
|
|
|
ltx_has_next :: proc(ltx: ^Ltx) -> b32 {
|
|
return len(ltx.source) > ltx.idx + 1
|
|
}
|
|
|
|
ltx_peek :: proc(ltx: ^Ltx) -> (rune, Ltx_Error) {
|
|
if !ltx_has_next(ltx) do return 0, .EOF
|
|
return rune(ltx.source[ltx.idx + 1]), .None
|
|
}
|
|
|
|
ltx_current_char :: proc(ltx: ^Ltx) -> rune {
|
|
assert(ltx.idx < len(ltx.source), "index cannot be greater than string length")
|
|
return rune(ltx.source[ltx.idx])
|
|
}
|
|
|
|
ltx_consume_whitespace :: proc(ltx: ^Ltx) -> Ltx_Error {
|
|
for unicode.is_white_space(ltx_current_char(ltx)) do ltx_seek(ltx) or_return // TODO: do prop error handling
|
|
return .None
|
|
}
|
|
|
|
is_numeric :: proc(c: rune) -> b32 {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
is_alpha :: proc(c: rune) -> b32 {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
|
}
|
|
|
|
validate_key :: proc(key: string) -> Ltx_Error {
|
|
assert(len(key) > 0, "expected non-empty key")
|
|
if !is_alpha(rune(key[0])) do return .InvalidKey
|
|
for c in key {
|
|
if !is_alpha(c) && !is_numeric(c) && c != '_' do return .InvalidKey
|
|
}
|
|
return .None
|
|
}
|
|
|
|
Ltx :: struct {
|
|
source: string,
|
|
source_path: string,
|
|
nodes: [dynamic]Node,
|
|
idx: int,
|
|
pos: struct {
|
|
line: u32,
|
|
col: u32,
|
|
},
|
|
}
|
|
|
|
ltx_parse :: proc(ltx: ^Ltx) -> (err: Ltx_Error) {
|
|
ltx.idx = 0
|
|
stack := make([dynamic]Node)
|
|
defer delete(stack)
|
|
|
|
text_node_start_idx := 0
|
|
for ltx_has_next(ltx) {
|
|
if ltx_current_char(ltx) == TokenArray[.Backslash] {
|
|
// TODO: cehck if the char after slash is a token
|
|
if text_node_start_idx < ltx.idx {
|
|
node := Node {
|
|
kind = .Text,
|
|
text = ltx.source[text_node_start_idx:ltx.idx],
|
|
}
|
|
if len(stack) == 0 do append(<x.nodes, node)
|
|
else do append(&stack[len(stack) - 1].children, node)
|
|
}
|
|
ltx_seek(ltx) or_return // skip \
|
|
|
|
tag_start_idx := ltx.idx
|
|
for unicode.is_letter(ltx_current_char(ltx)) do ltx_seek(ltx) or_return
|
|
assert(tag_start_idx < ltx.idx)
|
|
tag_name := ltx.source[tag_start_idx:ltx.idx]
|
|
node := Node {
|
|
kind = .Tag,
|
|
name = tag_name,
|
|
}
|
|
end_pos: struct {
|
|
tag_name: int,
|
|
fields: int,
|
|
content: int,
|
|
} = {
|
|
tag_name = ltx.idx,
|
|
fields = 0,
|
|
content = 0,
|
|
}
|
|
|
|
ltx_consume_whitespace(ltx) or_return
|
|
for ltx_current_char(ltx) == TokenArray[.LeftBracket] {
|
|
c := ltx_seek(ltx) or_return
|
|
field_start := ltx.idx
|
|
key: string
|
|
for ltx_has_next(ltx) {
|
|
if c == TokenArray[.Assign] {
|
|
if field_start >= ltx.idx do return .KeyExpected
|
|
key = strings.trim(ltx.source[field_start:ltx.idx], " \t")
|
|
if len(key) <= 0 do return .KeyExpected
|
|
validate_key(key) or_return
|
|
ltx_seek(ltx) or_return
|
|
field_start = ltx.idx
|
|
} else if c == TokenArray[.RightBracket] {
|
|
if field_start >= ltx.idx do return .ValueExpected
|
|
raw_value := strings.trim(ltx.source[field_start:ltx.idx], " \t")
|
|
if len(raw_value) <= 0 do return .ValueExpected
|
|
value := Field{}
|
|
if key != "" {
|
|
value.type = .Attribute
|
|
value.value = raw_value
|
|
} else {
|
|
key = raw_value
|
|
value.type = .Flag
|
|
}
|
|
_, _, found := map_upsert(&node.attributes, key, value)
|
|
if found do return .KeyAlreadyExists
|
|
ltx_seek(ltx) or_return
|
|
end_pos.fields = ltx.idx
|
|
break
|
|
}
|
|
c = ltx_seek(ltx) or_return
|
|
}
|
|
ltx_consume_whitespace(ltx) or_return
|
|
}
|
|
ltx_consume_whitespace(ltx) or_return
|
|
if ltx_current_char(ltx) == TokenArray[.LeftBrace] {
|
|
ltx_seek(ltx) or_return // consume {
|
|
append(&stack, node)
|
|
end_pos.content = ltx.idx
|
|
} else {
|
|
append(<x.nodes, node)
|
|
}
|
|
if end_pos.content != 0 do text_node_start_idx = end_pos.content
|
|
else if end_pos.fields != 0 do text_node_start_idx = end_pos.fields
|
|
else do text_node_start_idx = end_pos.tag_name
|
|
} else if ltx_current_char(ltx) == TokenArray[.RightBrace] {
|
|
if len(stack) <= 0 do return .UnexpectedRightBrace
|
|
node := pop(&stack)
|
|
if text_node_start_idx < ltx.idx {
|
|
text_node := Node {
|
|
kind = .Text,
|
|
text = ltx.source[text_node_start_idx:ltx.idx],
|
|
}
|
|
append(&node.children, text_node)
|
|
}
|
|
if len(stack) > 0 {
|
|
append(&stack[len(stack) - 1].children, node)
|
|
} else {
|
|
append(<x.nodes, node)
|
|
}
|
|
ltx_seek(ltx) or_return // consume }
|
|
text_node_start_idx = ltx.idx
|
|
} else {
|
|
ltx_seek(ltx) or_return
|
|
}
|
|
}
|
|
if text_node_start_idx < ltx.idx {
|
|
append(<x.nodes, Node{kind = .Text, text = ltx.source[text_node_start_idx:ltx.idx]})
|
|
}
|
|
if len(stack) > 0 do return .ClosingBraceExpected
|
|
return .None
|
|
}
|
|
|
|
ltx_parse_file :: proc(ltx: ^Ltx, file_path: string) -> Ltx_Error {
|
|
source, ok := os.read_entire_file(file_path)
|
|
if !ok do return .CannotReadFile
|
|
abs_path, abs_ok := filepath.abs(file_path)
|
|
if !abs_ok do return .CannotReadFile
|
|
ltx.source_path = abs_path
|
|
ltx.source = string(source)
|
|
return ltx_parse(ltx)
|
|
}
|
|
|
|
print_indent :: proc(level: int) {
|
|
for i in 0 ..< level {
|
|
fmt.print(" ")
|
|
}
|
|
}
|
|
print_node :: proc(node: Node, indent_level := 0) {
|
|
print_indent(indent_level)
|
|
switch node.kind {
|
|
case .Text:
|
|
fmt.printf("TEXT: \"%s\"\n", escape_white_space(node.text))
|
|
case .Tag:
|
|
fmt.print("TAG:", node.name)
|
|
if len(node.attributes) > 0 {
|
|
for k, v in node.attributes {
|
|
if v.type == .Flag do fmt.printf(" (%s)", k)
|
|
else do fmt.printf(" {{%s: %s}}", k, v.value)
|
|
}
|
|
}
|
|
fmt.println()
|
|
if len(node.children) > 0 {
|
|
for child in node.children {
|
|
print_node(child, indent_level + 1)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
strip_ltx :: proc(sb: ^strings.Builder, nodes: [dynamic]Node) -> b32 {
|
|
for node in nodes {
|
|
switch node.kind {
|
|
case .Text:
|
|
fmt.sbprint(sb, (node.text))
|
|
case .Tag:
|
|
strip_ltx(sb, node.children)
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
ltx_to_xml :: proc(sb: ^strings.Builder, nodes: [dynamic]Node, depth := 0) {
|
|
if len(nodes) <= 0 do return
|
|
if depth == 0 do fmt.sbprintln(sb, "<ltx>")
|
|
for node in nodes {
|
|
switch node.kind {
|
|
case .Text:
|
|
fmt.sbprint(sb, node.text)
|
|
case .Tag:
|
|
fmt.sbprintf(sb, "<%s", node.name)
|
|
for k, v in node.attributes do fmt.sbprintf(sb, " %s=\"%s\"", k, v.value)
|
|
if len(node.children) > 0 {
|
|
fmt.sbprint(sb, ">")
|
|
ltx_to_xml(sb, node.children, depth + 1)
|
|
fmt.sbprintf(sb, "</%s>", node.name)
|
|
} else {
|
|
fmt.sbprint(sb, " />")
|
|
}
|
|
}
|
|
}
|
|
if depth == 0 do fmt.sbprintln(sb, "</ltx>")
|
|
}
|
|
|
|
ltx_error_to_string :: proc(error: Ltx_Error) -> string {
|
|
switch error {
|
|
case .None:
|
|
return ""
|
|
case .EOF:
|
|
return "unexpected end of file"
|
|
case .KeyExpected:
|
|
return "key expected before '='"
|
|
case .ClosingBraceExpected:
|
|
return "closing brace expected"
|
|
case .ClosingBracketExpected:
|
|
return "closing bracket expected"
|
|
case .UnexpectedRightBrace:
|
|
return "unexpected '}'"
|
|
case .ValueExpected:
|
|
return "value expected after '='"
|
|
case .KeyAlreadyExists:
|
|
return "attribute key/flag already exists in attribute"
|
|
case .InvalidKey:
|
|
return "invalid key"
|
|
case .CannotReadFile:
|
|
return "cannot read file"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
ltx_get_error :: proc(ltx: ^Ltx, error: Ltx_Error) -> string {
|
|
if error == .None do return ""
|
|
file_path := len(ltx.source_path) > 0 ? ltx.source_path : "[source]"
|
|
line := ltx.pos.line + 1
|
|
col := ltx.pos.col + 1
|
|
error_msg := ltx_error_to_string(error)
|
|
return fmt.tprintf("%s(%d,%d): error: %s", file_path, line, col, error_msg)
|
|
}
|
|
|
|
escape_white_space :: proc(s: string) -> string {
|
|
sb: strings.Builder = strings.builder_make()
|
|
// defer strings.builder_destroy(&sb)
|
|
for c in s {
|
|
switch c {
|
|
case '\t':
|
|
fmt.sbprint(&sb, "\\t")
|
|
case '\n':
|
|
fmt.sbprint(&sb, "\\n")
|
|
case:
|
|
fmt.sbprint(&sb, c)
|
|
}
|
|
}
|
|
return strings.to_string(sb)
|
|
}
|
|
|
|
process_white_space :: proc(s: string) -> string {
|
|
sb: strings.Builder = strings.builder_make()
|
|
defer strings.builder_destroy(&sb)
|
|
lines := strings.split(s, "\n")
|
|
for _line in lines {
|
|
line := strings.trim_right(_line, "\t ")
|
|
if len(line) <= 0 do continue
|
|
last_idx := 0
|
|
last_char: u8 = 0
|
|
for i := 0; i < len(line); i += 1 {
|
|
if line[i] == '\r' do continue
|
|
if unicode.is_space(rune(line[i])) {
|
|
if last_char == line[i] {
|
|
last_idx = i
|
|
} else {
|
|
fmt.sbprint(&sb, line[last_idx:i])
|
|
last_idx = i
|
|
}
|
|
}
|
|
last_char = line[i]
|
|
}
|
|
fmt.sbprintln(&sb, line[last_idx:len(line)])
|
|
}
|
|
// return ""
|
|
return strings.to_string(sb)
|
|
}
|
|
|