diff --git a/Makefile b/Makefile index 6926686..352da1c 100644 --- a/Makefile +++ b/Makefile @@ -3,15 +3,15 @@ OUTPUT:= preparse .PHONY: test debug clean -test: debug +all test: debug ./$(OUTPUT) test.tex -debug: CFLAGS:= -g +debug: CXXFLAGS:= -g debug: $(OUTPUT) clean: $(RM) $(OBJ) $(OUTPUT) $(OUTPUT): $(OBJ) - $(CC) $(OBJ) -o preparse + $(CXX) $(OBJ) -o preparse diff --git a/main.c b/main.c deleted file mode 100644 index 9ce007c..0000000 --- a/main.c +++ /dev/null @@ -1,248 +0,0 @@ -#include -#include -#include -#include - -typedef struct { char *start, *end; } substr_t; - -inline static int substr_len(substr_t * str) { - return (int)(str->end - str->start); -} - -void substr_conv(char * src, substr_t * target) { - target->start = src; - target->end = src + strlen(src); -} - -// make new substr on heap using calloc -// does not copy take ownership of from -substr_t * substr_new(char * from) { - substr_t * ptr = calloc(1, sizeof(substr_t)); - if (ptr == NULL) { return ptr; } - - ptr->start = from; - ptr->end = ptr->start + strlen(ptr->start); - - return ptr; -} - - - -#define testEnd(current) if (*current->start == '\0' || current->start == current->end) return 0 -#define next(current) current->start++; testEnd(current) - -typedef struct { - substr_t name; -} macro_t; - -macro_t * macro_table = NULL; -size_t macro_table_size = 0; - -void macro_add(macro_t *m) { - macro_table = realloc(macro_table, sizeof(macro_t)*(macro_table_size+1)); - if (macro_table== NULL) { - printf("out of MEM!!\n"); - exit(1); - } - - // copy macro - macro_table[macro_table_size] = *m; - macro_table_size++; -} - -void macro_table_drop() { - free(macro_table); -} - -void macro_table_init() { - // add default macros - // /newCommand - // /def - // /begin - // /end - macro_t temp; - substr_conv("newCommand", &temp.name); - macro_add(&temp); - - substr_conv(&temp.name, "renewCommand", &temp.name); - macro_add(&temp); - - substr_conv(&temp.name, "def"); - macro_add(&temp); - - substr_conv(&temp.name, "begin"); - macro_add(&temp); - - substr_conv(&temp.name, "end"); - macro_add(&temp); -} - -int macro_name_cmp(macro_t * one, macro_t * two) { - // length mismatch results in failure - if (substr_len(&one->name) != substr_len(&two->name)) return 0; - - substr_t A = one->name, B = two->name; - - for(; A.start != A.end && *A.start == *B.start; A.start++, B.start++) {} - - if (A.start != A.end) { - return 0; - } - - return 1; -} - -int macro_lookup(macro_t *m) { - if (substr_len(&m->name) == 0) { - return 0; // len == 0 is not allowed - } - - size_t i = 0; - for (; i < macro_table_size; i++) { - if (macro_name_cmp(macro_table + i, m)) { - break; // found it - } - } - - return i; -} - - -// tex macro calls always start with backslash -int parseMacro(macro_t *target, substr_t * current) { - if (*current->start != '\\') { - return 0; - } - - target->name.end = target->name.start = current->start; - - while((*current->start != '\0') && - substr_len(current) > 0 && - !isspace(*current->start) && - *current->start != '[' && - *current->start != '{' ) { - target->name.end = ++current->start; - // check for known - if (macro_lookup(target)) { - break; - // this macros name is known, break for now - } - } - - target->name.end = current->start; - - // read name - next(current); - - int depth = 1; - - // optional arguments - if (*current->start == '[') { - depth = 1; - while(*current->start != '\0' && substr_len(current) > 0 && depth) { - current->start++; - if (*current->start == '[') depth++; - if (*current->start == ']') depth--; - } - - next(current); - } - - // required arguments - if (*current->start == '{') { - depth = 1; - while(*current->start != '\0' && substr_len(current) > 0 && depth) { - current->start++; - if (*current->start == '{') depth++; - if (*current->start == '}') depth--; - } - } - - // all done - return 1; -} - -void printMacro(macro_t * macro) { - printf("macro: "); - char * current = macro->name.start; - while(current != macro->name.end) { - fputc(*current, stdout); - ++current; - } - fputc('\n', stdout); -} - -int parseText(substr_t * str) { - while(substr_len(str) > 0) { - // a text can contain macro calls too - while(substr_len(str) > 0 && *str->start != '\\') { - ++str->start; - } - - // here starts a macro - macro_t macro; - if (!parseMacro(¯o, str)) { - return 0; // strange - } - - printMacro(¯o); - } - return 1; -} - - -#include // for mmap() -#include // for fstat() -#include // for open() -#include // for close() -#include // for perror() - -int main(int argc, char ** args) { - // find all the files the given tex files depend on - - int fd = 0; - struct stat filestat; - - for(;argc > 1; --argc) { - char * filename = args[argc-1]; - printf("looking at %s\n", filename); - - // try to open file - fd = open(filename, O_RDONLY); - if (fd == -1) { - perror("could not open input file"); - continue; - } - fstat(fd, &filestat); - printf("file size: %d\n", filestat.st_size); - - // try to mmap file - char * memory_area = mmap(NULL, filestat.st_size, PROT_READ, MAP_SHARED, fd, 0); - if (memory_area == NULL) { - perror("could not mmap the input"); - continue; - } - - printf("mmap success, parse file\n"); - - substr_t file = { - .start = memory_area, - .end = memory_area + filestat.st_size, - }; - - macro_table_init(); - - if (!parseText(&file)) { - printf("error parsing!"); - } else { - printf("parsed %s\n", filename); - } - - // cleanup - munmap(memory_area, filestat.st_size); - close(fd); - macro_table_drop(); - } - - printf("done\n"); -} diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..7031b9b --- /dev/null +++ b/main.cpp @@ -0,0 +1,427 @@ +/* + * main.cpp + * + * Created on: 07.10.2017 + * Author: julian + */ + +#include // for mmap() +#include // for fstat() +#include // for open() +#include // for close() +#include // for perror() + + +#include +#include +#include +#include +#include + +using namespace std; + +struct Substring { + const char * start, * end; + + Substring(const char * start, const char * end) : + start(start), end(end) {} + + Substring(const char * str): + start(str), end(str + strlen(str)) + {} + + Substring(): + start(nullptr), end(nullptr) + {} + + + std::string toString() const { + std::string temp; + temp.reserve(size()); + + const char * it = start; + while(it != end) { + temp += *it; + it++; + } + + return temp; + } + operator std::string() { + return toString(); + } + + std::string::size_type size() const { + return (std::string::size_type)(end - start); + } + + friend std::ostream &operator<<(std::ostream &out, const Substring &str); + + template + int __compare(IterA a, IterA aend, IterB b, IterB bend) const { + while(a != aend && b != bend) { + if (*a < * b) { + return -1; + } + if (*a > *b) { + return 1; + } + + a++; + b++; + } + + if (a == aend && b == bend) { + return 0; + } + + if (a != aend) { + return 1; + } + + return -1; + } + + + int compare(const Substring &other) const { + return __compare(start, end, other.start, other.end); + } + + int compare(const std::string &other) const { + return __compare(start, end, other.begin(), other.end()); + } + + template + bool operator<(const T &other) const { + return compare(other) < 0; + } + + template + bool operator>(const T &other) const { + return compare(other) > 0; + } + + template + bool operator==(const T &other) const { + return compare(other) == 0; + } + + template + bool operator!=(const T &other) const { + return !(*this == other); + } +}; + + + +std::ostream &operator<<(std::ostream &out, const Substring &str) { + auto temp = str.start; + while(temp != str.end) { + out << *temp; + temp++; + } + + return out; +} + + +class InputExtractor +{ +public: + class Exception : public std::runtime_error { + public: + Exception(const std::string &str) : std::runtime_error(str) {} + }; + + typedef std::vector List; + List operator()(const Substring &input); + + std::string macroExpand(Substring input); + +protected: + Substring file; + std::map macros; +}; + +Substring readBrackets(Substring &input, const char * brackets) { + if (input.size() <= 0 || *input.start != brackets[0]) { + cout << "expected " << brackets[0] << ", got: '" << *input.start << "'"; + return Substring(); + } + + input.start++; + + int depth = 1; + Substring result(input.start, input.start); + + while(depth > 0 && input.size() > 0) { + result.end = ++input.start; + + if (*input.start == brackets[0]) { + depth++; + } + if (*input.start == brackets[1]) { + depth--; + } + } + // advance beyond last bracket + if (input.size()) + input.start++; + + return result; +} + +std::string InputExtractor::macroExpand(Substring input) { + std::string result; + Substring name; + std::map::iterator lookup; + //cout << "expanding: " << input << endl; + + while(input.size() > 0) { + if (*input.start == '\\') { + input.start++; + name.start = name.end = input.start; + + while(input.size() > 0) { + name.end = ++input.start; + + if ((lookup = macros.find(name.toString())) != macros.end()) { + break; + } + } + + if (lookup == macros.end()) { + throw Exception("unknown macro in macro expansion: " + name.toString()); + } + + result += lookup->second.toString(); + } else { + result += *input.start; + } + input.start++; + } + + return result; +} + +#include +typedef std::map> CommandList; + +std::string Extension(std::string str) { + auto it = str.end(); + while(it != str.begin() && *it != '.' && *it != '/' && *it != '\\') { + it--; + } + + return std::string(it, str.end()); +} + +std::string Basedir(std::string path) { + auto it = path.end(); + while(it != path.begin() && *it != '/' && *it != '\\') { + it--; + } + + return std::string(path.begin(), it+1); +} + +std::string Name(std::string path) { + auto it = path.end(); + while(it != path.begin() && *it != '/' && *it != '\\') { + it--; + } + + return std::string(it, path.end()); +} + +std::string Basename(std::string path) { + std::string temp = Name(path); + + auto it = temp.end(); + while(it != temp.begin() && *it != '.') { + it--; + } + + return std::string(temp.begin(), it); +} + +#include + +bool Exists(std::string path) { + std::ifstream file(path); + if (!file) { + return false; + } + + file.close(); + return true; +} + +InputExtractor::List Include(std::string path) { + InputExtractor::List list; + + int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) { + cerr << "cannot open " << path << endl; + return list; + } + + struct stat fileinfo; + if (fstat(fd, &fileinfo) == -1) { + perror("stat"); + close(fd); + return list; + } + + void * memptr = mmap(NULL, fileinfo.st_size, PROT_READ, MAP_SHARED, fd, 0); + + if (memptr == NULL) { + perror("mmap"); + close(fd); + return list; + } + + Substring str((const char *)memptr, (const char*)memptr + fileinfo.st_size); + + std::string basedir = Basedir(path); + list = InputExtractor()(str); + // add basedir to list + for (auto it = list.begin(); it != list.end(); it++) { + *it = basedir + '/' + *it; + } + // cleanup + munmap(memptr, fileinfo.st_size); + close(fd); + return list; +} + +InputExtractor::List InputExtractor::operator()(const Substring &input){ + file = input; + List result; + CommandList IncludeCommands; + + IncludeCommands["input"] = [](List &l, std::string a) { + if (Extension(a) != ".tex") a += ".tex"; + l.push_back(a); + // try to extract all inputs of that file + auto sub = Include(a); + std::copy(sub.begin(), sub.end(), std::inserter(l, l.end())); + }; + + IncludeCommands["include"] = IncludeCommands["input"]; + IncludeCommands["lstinputlisting"] = [](List &l, std::string a){ l.push_back(a); }; + + // skip normie text + while(file.size()) { + + if (*file.start == '%') { + // line commment + while(file.size() > 0 && *file.start != '\n') + file.start++; + + continue; + } + if (*file.start != '\\') { + file.start++; + continue; + } + + // read macro name + file.start++; + + // TODO: throw exception + if (!file.size()) + throw Exception("unexpected EOF"); + + Substring name; + name.start = name.end = file.start; + for(name.end = ++file.start; file.size() > 0 && !isspace(*file.start) && *file.start != '{' && *file.start != '\\'; name.end = (++file.start+1)) { + auto searchHit = IncludeCommands.find(name); + if ((searchHit = IncludeCommands.find(name)) != IncludeCommands.end()) { + file.start++; + cout << searchHit->first; + cout << "[" << readBrackets(file, "[]") << "]"; + auto args = readBrackets(file, "{}"); + cout << ":" << args << endl; + searchHit->second(result, macroExpand(args)); + break; + } else if (name == std::string("def")) { + file.start++; + + if (file.size() <= 0 || *file.start != '\\') { + continue; + } + file.start++; + + Substring name(file.start, file.end); + while (file.size() > 0 && *file.start != '{' && !isspace(*file.start)) { + name.end = ++file.start; + } + + cout << "new macro definition: " << name << endl; + macros.insert(std::pair(name.toString(), readBrackets(file, "{}"))); + break; + } + } + } + + return result; +} + +int main(int argc, char ** args) { + // find all the files the given tex files depend on + + int fd = 0; + struct stat filestat; + + for(;argc > 1; --argc) { + char * filename = args[argc-1]; + cout << "looking at " << filename << std::endl; + + // try to open file + fd = open(filename, O_RDONLY); + if (fd == -1) { + perror("could not open input file"); + continue; + } + + fstat(fd, &filestat); + //cout << "file size: " << filestat.st_size << endl; + + // try to mmap file + void * memory_area = mmap(NULL, filestat.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (memory_area == nullptr) { + perror("could not mmap the input"); + continue; + } + + + //cout << "mmap success, parse file" << endl; + + Substring file((const char*)memory_area, (const char*)memory_area + filestat.st_size); + try { + auto list = InputExtractor()(file); + // write in makefile style + std::ofstream output(Basename(filename) + ".d"); + if (!output) { + std::cout << "could not create output file" << std::endl; + } else { + output << "filename: "; + for (auto it = list.begin(); it != list.end(); it++) { + output << *it << "\\\\\n"; + } + output << endl; + } + + output.close(); + } catch(InputExtractor::Exception &e) { + cout << e.what() << endl; + } + + // cleanup + munmap(memory_area, filestat.st_size); + close(fd); + } + + printf("done\n"); +} diff --git a/test.tex b/test.tex index a5a9c3a..27fa66e 100644 --- a/test.tex +++ b/test.tex @@ -1,3 +1,5 @@ -\def{hi} +\def\hi{hi} \include{\hi/test.tex} + +\lstinputlisting{jkhdfkjlhsdfkjsdhfk} \ No newline at end of file