From f7122b9513a148130cba190e5c67205fa454bb9a Mon Sep 17 00:00:00 2001 From: marrub Date: Fri, 5 Jun 2015 07:06:28 -0400 Subject: [PATCH] Initial commit --- .gitignore | 4 + LICENSE.txt | 20 +++ Makefile | 8 + lt.c | 488 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lt.h | 117 +++++++++++++ parser.lua | 96 +++++++++++ 6 files changed, 733 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE.txt create mode 100644 Makefile create mode 100644 lt.c create mode 100644 lt.h create mode 100644 parser.lua diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d78137b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +bin/* +test/* +test +bin diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..064bf6c --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,20 @@ +Copyright (c) 2015 Benjamin Moir +Copyright (c) 2015 Marrub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..912e692 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +# sorry this makefile doesn't include anything cross-platform +# was hurriedly made to test it + +all: + mkdir -p bin + mingw32-gcc --std=c99 -g -ggdb -c -o bin/lt.o lt.c + mingw32-gcc -shared -g -ggdb -o bin/LoveToken.dll bin/lt.o -Wl,--out-implib,bin/libLoveToken.a + # cp bin/LoveToken.dll test/LoveToken.dll diff --git a/lt.c b/lt.c new file mode 100644 index 0000000..d06ffa3 --- /dev/null +++ b/lt.c @@ -0,0 +1,488 @@ +/* +Copyright (c) 2015 Benjamin Moir +Copyright (c) 2015 Marrub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include "lt.h" + +FILE *LT_ParseFile; +bool LT_AssertError = false; +LT_GarbageList *gbHead, *gbRover; + +static bool escapeChars = true; + +static char *tokenTypes[] = { + // [marrub] So, this was an interesting bug. This was completely misordered from the enum. + // As can be guessed, this caused many issues. Seriously, all of them. + "TOK_Colon", "TOK_Comma", "TOK_Div", "TOK_Mod", "TOK_Mul", "TOK_Query", + "TOK_BraceO", "TOK_BraceC", "TOK_BrackO", "TOK_BrackC", "TOK_ParenO", "TOK_ParenC", + "TOK_LnEnd", "TOK_Add2", "TOK_Add", "TOK_And2", "TOK_And", "TOK_CmpGE", + "TOK_ShR", "TOK_CmpGT", "TOK_CmpLE", "TOK_ShL", "TOK_CmpNE", "TOK_CmpLT", + "TOK_CmpEQ", "TOK_Equal", "TOK_Not", "TOK_OrI2", "TOK_OrI", "TOK_OrX2", + "TOK_OrX", "TOK_Sub2", "TOK_Sub", "TOK_String", "TOK_Charac", "TOK_Number", + "TOK_Identi", "TOK_EOF", "TOK_ChrSeq" +}; + +void LT_Init(LT_InitInfo initInfo) +{ + escapeChars = initInfo.escapeChars; + + gbHead = malloc(sizeof(LT_GarbageList)); + gbHead->next = NULL; + gbHead->ptr = NULL; + + gbRover = gbHead; +} + +void LT_Quit() +{ + gbRover = gbHead; + + while(gbRover != NULL) + { + LT_GarbageList *next = gbRover->next; + + if(gbRover->ptr != NULL) + { + free(gbRover->ptr); + gbRover->ptr = NULL; + } + + free(gbRover); + + gbRover = next; + } + + gbRover = NULL; + gbHead = NULL; +} + +bool LT_Assert(bool assertion, const char *str) +{ + if(assertion) + { + LT_AssertError = true; + fprintf(stderr, "LT_Assert: %s", str); + } + + return assertion; +} + +bool LT_OpenFile(const char *filePath) +{ + LT_ParseFile = fopen(filePath, "r"); + + if(LT_ParseFile == NULL) + { + perror("LT_OpenFile"); + return false; + } + + return true; +} + +void LT_CloseFile() +{ + if(LT_ParseFile != NULL) + { + fclose(LT_ParseFile); + } +} + +char *LT_ReadNumber() +{ + size_t i = 0, str_blocks = 1; + char c, *str = malloc(TOKEN_STR_BLOCK_LENGTH); + + while(!feof(LT_ParseFile)) + { + fread(&c, 1, 1, LT_ParseFile); + + if(!isalnum(c)) + { + fseek(LT_ParseFile, -1, SEEK_CUR); + break; + } + + if(i > TOKEN_STR_BLOCK_LENGTH) + { + realloc(str, TOKEN_STR_BLOCK_LENGTH * str_blocks++); + } + + str[i++] = c; + } + + str[i++] = '\0'; + + gbRover->next = malloc(sizeof(LT_GarbageList)); + gbRover = gbRover->next; + gbRover->ptr = realloc(str, i); + gbRover->next = NULL; + + return gbRover->ptr; +} + +char *LT_ReadString(char term) +{ + size_t i = 0, str_blocks = 1; + char c, *str = malloc(TOKEN_STR_BLOCK_LENGTH); + static char *emptyString = ""; + + while(true) + { + fread(&c, 1, 1, LT_ParseFile); + + if(c == term) + { + break; + } + + if(LT_Assert(feof(LT_ParseFile) || c == '\n', "unterminated string literal")) + { + return emptyString; + } + + if(c == '\\' && escapeChars) + { + fread(&c, 1, 1, LT_ParseFile); + + if(LT_Assert(feof(LT_ParseFile) || c == '\n', "unterminated string literal")) + { + str[i] = '\0'; + return str; + } + + if(i > TOKEN_STR_BLOCK_LENGTH) + { + realloc(str, TOKEN_STR_BLOCK_LENGTH * str_blocks++); + } + + str = LT_Escaper(str, i++, c); + } + else + { + if(i > TOKEN_STR_BLOCK_LENGTH) + { + realloc(str, TOKEN_STR_BLOCK_LENGTH * str_blocks++); + } + + str[i++] = c; + } + } + + str[i++] = '\0'; + + gbRover->next = malloc(sizeof(LT_GarbageList)); + gbRover = gbRover->next; + gbRover->ptr = realloc(str, i); + gbRover->next = NULL; + + return gbRover->ptr; +} + +char *LT_Escaper(char *str, size_t pos, char escape) +{ + switch(escape) + { + case '\\': case '\'': case '"': str[pos] = escape; break; + case 'C': str[pos] = '\x1C'; break; + case 'a': str[pos] = '\a'; break; + case 'b': str[pos] = '\b'; break; + case 'f': str[pos] = '\f'; break; + case 'n': str[pos] = '\n'; break; + case 'r': str[pos] = '\r'; break; + case 't': str[pos] = '\t'; break; + case 'v': str[pos] = '\v'; break; + case 'x': // [marrub] THIS ONE IS FUN + for(unsigned int i = 0;;) + { + char c; + fread(&c, 1, 1, LT_ParseFile); + + switch(c) + { + case '0': i = i * 16 + 0x0; break; + case '1': i = i * 16 + 0x1; break; + case '2': i = i * 16 + 0x2; break; + case '3': i = i * 16 + 0x3; break; + case '4': i = i * 16 + 0x4; break; + case '5': i = i * 16 + 0x5; break; + case '6': i = i * 16 + 0x6; break; + case '7': i = i * 16 + 0x7; break; + case '8': i = i * 16 + 0x8; break; + case '9': i = i * 16 + 0x9; break; + case 'a': case 'A': i = i * 16 + 0xA; break; + case 'b': case 'B': i = i * 16 + 0xB; break; + case 'c': case 'C': i = i * 16 + 0xC; break; + case 'd': case 'D': i = i * 16 + 0xD; break; + case 'e': case 'E': i = i * 16 + 0xE; break; + case 'f': case 'F': i = i * 16 + 0xF; break; + + default: + fseek(LT_ParseFile, -1, SEEK_CUR); + str[pos] = i; + break; + } + } + + break; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + { + char c = escape; + unsigned int i = 0; + + for(unsigned int n = 2; n != 0; n--) + { + switch(c) + { + case '0': i = i * 8 + 00; break; + case '1': i = i * 8 + 01; break; + case '2': i = i * 8 + 02; break; + case '3': i = i * 8 + 03; break; + case '4': i = i * 8 + 04; break; + case '5': i = i * 8 + 05; break; + case '6': i = i * 8 + 06; break; + case '7': i = i * 8 + 07; break; + default: + fseek(LT_ParseFile, -1, SEEK_CUR); + str[pos] = i; + return str; + } + + fread(&c, 1, 1, LT_ParseFile); + } + + str[pos] = i; + break; + } + + break; + default: LT_Assert(true, "unknown escape character"); break; + } + + return str; +} + +LT_Token LT_GetToken() +{ + char c; + LT_Token tk = { 0 }; + + fread(&c, 1, 1, LT_ParseFile); + + if(feof(LT_ParseFile)) + { + tk.token = tokenTypes[TOK_EOF]; + tk.string = NULL; + tk.pos = ftell(LT_ParseFile); + return tk; + } + + while(isspace(c) && c != '\n') + { + fread(&c, 1, 1, LT_ParseFile); + + if(feof(LT_ParseFile)) // [marrub] This could have caused issues if there was whitespace before EOF. + { + tk.token = tokenTypes[TOK_EOF]; + tk.string = NULL; + tk.pos = ftell(LT_ParseFile); + return tk; + } + } + + tk.pos = ftell(LT_ParseFile); + + switch(c) + { + case ':': tk.token = tokenTypes[TOK_Colon]; return tk; + case ',': tk.token = tokenTypes[TOK_Comma]; return tk; + case '/': tk.token = tokenTypes[TOK_Div]; return tk; + case '%': tk.token = tokenTypes[TOK_Mod]; return tk; + case '*': tk.token = tokenTypes[TOK_Mul]; return tk; + case '?': tk.token = tokenTypes[TOK_Query]; return tk; + case '{': tk.token = tokenTypes[TOK_BraceO]; return tk; + case '}': tk.token = tokenTypes[TOK_BraceC]; return tk; + case '[': tk.token = tokenTypes[TOK_BrackO]; return tk; + case ']': tk.token = tokenTypes[TOK_BrackC]; return tk; + case '(': tk.token = tokenTypes[TOK_ParenO]; return tk; + case ')': tk.token = tokenTypes[TOK_ParenC]; return tk; + case '\n': tk.token = tokenTypes[TOK_LnEnd]; return tk; + + // [marrub] Sorry, I wouldn't normally do a quick and dirty hack like this, + // but sometimes I really do care about my sanity. And wrists. +#define DoubleTokDef(ch, t1, t2) \ + case ch: \ + fread(&c, 1, 1, LT_ParseFile); \ + \ + if(c == ch) \ + { \ + tk.token = tokenTypes[t2]; \ + } \ + else \ + { \ + tk.token = tokenTypes[t1]; \ + fseek(LT_ParseFile, -1, SEEK_CUR); \ + } \ + \ + return tk; + + DoubleTokDef('+', TOK_Add, TOK_Add2); + DoubleTokDef('-', TOK_Sub, TOK_Sub2); + DoubleTokDef('&', TOK_And, TOK_And2); + DoubleTokDef('=', TOK_Equal, TOK_CmpEQ); + DoubleTokDef('^', TOK_OrX, TOK_OrX2); + DoubleTokDef('|', TOK_OrI, TOK_OrI2); + +#undef DoubleTokDef + + // [marrub] Special god damn snowflakes + case '>': + fread(&c, 1, 1, LT_ParseFile); + + if(c == '=') + { + tk.token = tokenTypes[TOK_CmpGE]; + } + else if(c == '>') + { + tk.token = tokenTypes[TOK_ShR]; + } + else + { + tk.token = tokenTypes[TOK_CmpGT]; + fseek(LT_ParseFile, -1, SEEK_CUR); + } + + return tk; + case '<': + fread(&c, 1, 1, LT_ParseFile); + + if(c == '=') + { + tk.token = tokenTypes[TOK_CmpLE]; + } + else if(c == '<') + { + tk.token = tokenTypes[TOK_ShL]; + } + else if(c == '>') + { + tk.token = tokenTypes[TOK_CmpNE]; + } + else + { + tk.token = tokenTypes[TOK_CmpLT]; + fseek(LT_ParseFile, -1, SEEK_CUR); + } + + return tk; + case '!': + fread(&c, 1, 1, LT_ParseFile); + + if(c == '=') + { + tk.token = tokenTypes[TOK_CmpNE]; + } + else + { + tk.token = tokenTypes[TOK_Not]; + fseek(LT_ParseFile, -1, SEEK_CUR); + } + + return tk; + case '~': + fread(&c, 1, 1, LT_ParseFile); + + if(c == '=') + { + tk.token = tokenTypes[TOK_CmpNE]; + } + else + { + fseek(LT_ParseFile, -1, SEEK_CUR); + LT_Assert(true, "syntax error"); // [marrub] Yet more error checking that was forgotten before. + } + + return tk; + case '"': case '\'': + tk.string = LT_ReadString(c); + + if(c == '"') + { + tk.token = tokenTypes[TOK_String]; + } + else + { + tk.token = tokenTypes[TOK_Charac]; + } + + return tk; + } + + if(isdigit(c)) + { + fseek(LT_ParseFile, -1, SEEK_CUR); + + tk.string = LT_ReadNumber(); + tk.token = tokenTypes[TOK_Number]; + return tk; + } + + if(isalpha(c) || c == '_') + { + size_t i = 0, str_blocks = 1; + char *str = malloc(TOKEN_STR_BLOCK_LENGTH); + + while(!(feof(LT_ParseFile)) && (isalnum(c) || c == '_')) + { + if(i > TOKEN_STR_BLOCK_LENGTH) + { + realloc(str, TOKEN_STR_BLOCK_LENGTH * str_blocks++); + } + + str[i++] = c; + fread(&c, 1, 1, LT_ParseFile); + } + + str[i++] = '\0'; // [marrub] Completely forgot this line earlier. Really screwed up everything. + + gbRover->next = malloc(sizeof(LT_GarbageList)); + gbRover = gbRover->next; + gbRover->ptr = realloc(str, i); + gbRover->next = NULL; + + fseek(LT_ParseFile, -1, SEEK_CUR); + + tk.string = gbRover->ptr; + tk.token = tokenTypes[TOK_Identi]; + return tk; + } + + return tk; +} + diff --git a/lt.h b/lt.h new file mode 100644 index 0000000..562016e --- /dev/null +++ b/lt.h @@ -0,0 +1,117 @@ +/* +Copyright (c) 2015 Benjamin Moir +Copyright (c) 2015 Marrub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef LOVETOKEN_LT_H +#define LOVETOKEN_LT_H + +#include +#include +#include + +#define TOKEN_STR_BLOCK_LENGTH 512 + +// [marrub] When using in FFI, remove this from the definitions +// Also redefine this for cross-platform. +#define LT_EXPORT __declspec(dllexport) + +typedef struct +{ + bool escapeChars; +} LT_InitInfo; + +typedef struct +{ + char *token; + char *string; + int pos; +} LT_Token; + +extern bool LT_EXPORT LT_AssertError; + +void LT_EXPORT LT_Init(LT_InitInfo initInfo); +void LT_EXPORT LT_Quit(); +bool LT_EXPORT LT_Assert(bool assertion, const char *str); + +bool LT_EXPORT LT_OpenFile(const char *filePath); +void LT_EXPORT LT_CloseFile(); + +char *LT_EXPORT LT_ReadNumber(); +char *LT_EXPORT LT_ReadString(char term); +char *LT_EXPORT LT_Escaper(char *str, size_t pos, char escape); +LT_Token LT_EXPORT LT_GetToken(); + +// [marrub] Don't include stuff below here into the FFI definitions + +typedef struct LT_GarbageList_s +{ + struct LT_GarbageList_s *next; + void *ptr; +} LT_GarbageList; + +extern FILE *LT_ParseFile; + +enum +{ + TOK_Colon, + TOK_Comma, + TOK_Div, + TOK_Mod, + TOK_Mul, + TOK_Query, + TOK_BraceO, + TOK_BraceC, + TOK_BrackO, + TOK_BrackC, + TOK_ParenO, + TOK_ParenC, + TOK_LnEnd, + TOK_Add2, + TOK_Add, + TOK_And2, + TOK_And, + TOK_CmpGE, + TOK_ShR, + TOK_CmpGT, + TOK_CmpLE, + TOK_ShL, + TOK_CmpNE, + TOK_CmpLT, + TOK_CmpEQ, + TOK_Equal, + TOK_Not, + TOK_OrI2, + TOK_OrI, + TOK_OrX2, + TOK_OrX, + TOK_Sub2, + TOK_Sub, + TOK_String, + TOK_Charac, + TOK_Number, + TOK_Identi, + TOK_EOF, + TOK_ChrSeq +}; + +#endif + diff --git a/parser.lua b/parser.lua new file mode 100644 index 0000000..85b2f68 --- /dev/null +++ b/parser.lua @@ -0,0 +1,96 @@ +--[[ + +Copyright (c) 2015 Benjamin Moir +Copyright (c) 2015 Marrub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +--]] + +local ffi = require("ffi") +local parser = {} + +local loveToken = ffi.load("LoveToken") +ffi.cdef([[ +typedef struct +{ + bool escapeChars; +} LT_InitInfo; + +typedef struct +{ + char *token; + char *string; + int pos; +} LT_Token; + +extern bool LT_AssertError; + +void LT_Init(LT_InitInfo initInfo); +void LT_Quit(); +bool LT_Assert(bool assertion, const char *str); + +bool LT_OpenFile(const char *filePath); +void LT_CloseFile(); + +char *LT_ReadNumber(); +char *LT_ReadString(char term); +char *LT_Escaper(char *str, size_t pos, char escape); +LT_Token LT_GetToken(); +]]) + +function parser:init(initInfo, filePath) + loveToken.LT_Init(initInfo) + loveToken.LT_OpenFile(filePath) +end + +function parser:assert(assertion, str) + return loveToken.LT_Assert(assertion, str) +end + +function parser:openFile(filePath) + return loveToken.LT_OpenFile(filePath) +end + +function parser:closeFile() + loveToken.LT_CloseFile() +end + +function parser:quit() + loveToken.LT_CloseFile() + loveToken.LT_Quit() +end + +function parser:readNumber() + return ffi.string(loveToken.LT_ReadNumber()) +end + +function parser:readString(term) + return ffi.string(loveToken.LT_ReadString(term)) +end + +function parser:escaper(str, pos, escape) + return ffi.string(loveToken.LT_Escaper(str, pos, escape)) +end + +function parser:getToken() + return loveToken.LT_GetToken() +end + +return parser