From 512744013eb4043f55b1d7c5640437c064ac3f5a Mon Sep 17 00:00:00 2001 From: Julian Daube Date: Sat, 7 Oct 2017 23:21:00 +0200 Subject: [PATCH] initial --- Makefile | 17 ++++ main.c | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.tex | 3 + 3 files changed, 268 insertions(+) create mode 100644 Makefile create mode 100644 main.c create mode 100644 test.tex diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6926686 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +OBJ:=main.o +OUTPUT:= preparse + +.PHONY: test debug clean + +test: debug + ./$(OUTPUT) test.tex + +debug: CFLAGS:= -g +debug: $(OUTPUT) + +clean: + $(RM) $(OBJ) $(OUTPUT) + +$(OUTPUT): $(OBJ) + $(CC) $(OBJ) -o preparse + diff --git a/main.c b/main.c new file mode 100644 index 0000000..9ce007c --- /dev/null +++ b/main.c @@ -0,0 +1,248 @@ +#include +#include +#include +#include + +typedef struct { char *start, *end; } substr_t; + +inline static int substr_len(substr_t * str) { + return (int)(str->end - str->start); +} + +void substr_conv(char * src, substr_t * target) { + target->start = src; + target->end = src + strlen(src); +} + +// make new substr on heap using calloc +// does not copy take ownership of from +substr_t * substr_new(char * from) { + substr_t * ptr = calloc(1, sizeof(substr_t)); + if (ptr == NULL) { return ptr; } + + ptr->start = from; + ptr->end = ptr->start + strlen(ptr->start); + + return ptr; +} + + + +#define testEnd(current) if (*current->start == '\0' || current->start == current->end) return 0 +#define next(current) current->start++; testEnd(current) + +typedef struct { + substr_t name; +} macro_t; + +macro_t * macro_table = NULL; +size_t macro_table_size = 0; + +void macro_add(macro_t *m) { + macro_table = realloc(macro_table, sizeof(macro_t)*(macro_table_size+1)); + if (macro_table== NULL) { + printf("out of MEM!!\n"); + exit(1); + } + + // copy macro + macro_table[macro_table_size] = *m; + macro_table_size++; +} + +void macro_table_drop() { + free(macro_table); +} + +void macro_table_init() { + // add default macros + // /newCommand + // /def + // /begin + // /end + macro_t temp; + substr_conv("newCommand", &temp.name); + macro_add(&temp); + + substr_conv(&temp.name, "renewCommand", &temp.name); + macro_add(&temp); + + substr_conv(&temp.name, "def"); + macro_add(&temp); + + substr_conv(&temp.name, "begin"); + macro_add(&temp); + + substr_conv(&temp.name, "end"); + macro_add(&temp); +} + +int macro_name_cmp(macro_t * one, macro_t * two) { + // length mismatch results in failure + if (substr_len(&one->name) != substr_len(&two->name)) return 0; + + substr_t A = one->name, B = two->name; + + for(; A.start != A.end && *A.start == *B.start; A.start++, B.start++) {} + + if (A.start != A.end) { + return 0; + } + + return 1; +} + +int macro_lookup(macro_t *m) { + if (substr_len(&m->name) == 0) { + return 0; // len == 0 is not allowed + } + + size_t i = 0; + for (; i < macro_table_size; i++) { + if (macro_name_cmp(macro_table + i, m)) { + break; // found it + } + } + + return i; +} + + +// tex macro calls always start with backslash +int parseMacro(macro_t *target, substr_t * current) { + if (*current->start != '\\') { + return 0; + } + + target->name.end = target->name.start = current->start; + + while((*current->start != '\0') && + substr_len(current) > 0 && + !isspace(*current->start) && + *current->start != '[' && + *current->start != '{' ) { + target->name.end = ++current->start; + // check for known + if (macro_lookup(target)) { + break; + // this macros name is known, break for now + } + } + + target->name.end = current->start; + + // read name + next(current); + + int depth = 1; + + // optional arguments + if (*current->start == '[') { + depth = 1; + while(*current->start != '\0' && substr_len(current) > 0 && depth) { + current->start++; + if (*current->start == '[') depth++; + if (*current->start == ']') depth--; + } + + next(current); + } + + // required arguments + if (*current->start == '{') { + depth = 1; + while(*current->start != '\0' && substr_len(current) > 0 && depth) { + current->start++; + if (*current->start == '{') depth++; + if (*current->start == '}') depth--; + } + } + + // all done + return 1; +} + +void printMacro(macro_t * macro) { + printf("macro: "); + char * current = macro->name.start; + while(current != macro->name.end) { + fputc(*current, stdout); + ++current; + } + fputc('\n', stdout); +} + +int parseText(substr_t * str) { + while(substr_len(str) > 0) { + // a text can contain macro calls too + while(substr_len(str) > 0 && *str->start != '\\') { + ++str->start; + } + + // here starts a macro + macro_t macro; + if (!parseMacro(¯o, str)) { + return 0; // strange + } + + printMacro(¯o); + } + return 1; +} + + +#include // for mmap() +#include // for fstat() +#include // for open() +#include // for close() +#include // for perror() + +int main(int argc, char ** args) { + // find all the files the given tex files depend on + + int fd = 0; + struct stat filestat; + + for(;argc > 1; --argc) { + char * filename = args[argc-1]; + printf("looking at %s\n", filename); + + // try to open file + fd = open(filename, O_RDONLY); + if (fd == -1) { + perror("could not open input file"); + continue; + } + fstat(fd, &filestat); + printf("file size: %d\n", filestat.st_size); + + // try to mmap file + char * memory_area = mmap(NULL, filestat.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (memory_area == NULL) { + perror("could not mmap the input"); + continue; + } + + printf("mmap success, parse file\n"); + + substr_t file = { + .start = memory_area, + .end = memory_area + filestat.st_size, + }; + + macro_table_init(); + + if (!parseText(&file)) { + printf("error parsing!"); + } else { + printf("parsed %s\n", filename); + } + + // cleanup + munmap(memory_area, filestat.st_size); + close(fd); + macro_table_drop(); + } + + printf("done\n"); +} diff --git a/test.tex b/test.tex new file mode 100644 index 0000000..a5a9c3a --- /dev/null +++ b/test.tex @@ -0,0 +1,3 @@ +\def{hi} + +\include{\hi/test.tex}