omi-eikyo/src/m_token.c

160 lines
4.3 KiB
C

// Copyright © 2017 Project Golan, all rights reserved.
// See COPYING for more information.
#include "m_token.h"
#include "m_darray.h"
#include <ctype.h>
#include <stdlib.h>
#define M_Tk_getPredStr(fn) \
do { \
for(; fn(ch); ch = fgetc(fp)) { \
M_Vec_grow(tok->text, 1); \
M_Vec_next(tok->text) = ch; \
} \
M_Vec_grow(tok->text, 1); \
M_Vec_next(tok->text) = '\0'; \
ungetc(ch, fp); \
} while(0)
#define M_Tk_isidenti(ch) (isalnum(ch) || (ch) == '_')
#define M_Tk_isnum(ch) (isalnum(ch) || (ch) == '_' || (ch) == '.')
// Extern Functions ----------------------------------------------------------|
//
// M_Tk_Parse
//
void M_Tk_Parse(FILE *fp, M_token *tok)
{
if(!tok) return;
M_Vec_clear(tok->text);
if(!fp || feof(fp)) {
tok->type = tok_eof;
return;
}
begin:;
int ch;
switch((ch = fgetc(fp)))
{
case EOF:
case '\0': tok->type = tok_eof; return;
case '\r': if((ch = fgetc(fp)) != '\n') ungetc(ch, fp);
case '\n': tok->type = tok_lnend; return;
case ';': tok->type = tok_semico; return;
case ',': tok->type = tok_comma; return;
case '[': tok->type = tok_bracko; return;
case ']': tok->type = tok_brackc; return;
case '{': tok->type = tok_braceo; return;
case '}': tok->type = tok_bracec; return;
case '(': tok->type = tok_pareno; return;
case ')': tok->type = tok_parenc; return;
#define TokOp2(c, c2, t, t2) \
case c: \
if((ch = fgetc(fp)) == c2) tok->type = t2; \
else {ungetc(ch, fp); tok->type = t;} \
return
TokOp2('=', '=', tok_eq, tok_eq2);
TokOp2('?', '=', tok_tern, tok_terneq);
TokOp2('!', '=', tok_not, tok_neq);
TokOp2('~', '=', tok_bnot, tok_bneq);
TokOp2('*', '=', tok_mul, tok_muleq);
TokOp2('@', '@', tok_at, tok_at2);
#undef TokOp2
#define TokOp3(c, t, t2, teq) \
case c: \
if((ch = fgetc(fp)) == c) tok->type = t2; \
else if(ch == '=') tok->type = teq; \
else {ungetc(ch, fp); tok->type = t;} \
return
TokOp3('<', tok_lt, tok_lt2, tok_le);
TokOp3('>', tok_gt, tok_gt2, tok_ge);
TokOp3('|', tok_or, tok_or2, tok_oreq);
TokOp3('&', tok_and, tok_and2, tok_andeq);
TokOp3('+', tok_add, tok_add2, tok_addeq);
TokOp3('%', tok_mod, tok_mod2, tok_modeq);
TokOp3('^', tok_xor, tok_xor2, tok_xoreq);
TokOp3(':', tok_col, tok_col2, tok_coleq);
#undef TokOp3
case '-':
if((ch = fgetc(fp)) == '-') tok->type = tok_sub2;
else if(ch == '=') tok->type = tok_subeq;
else if(ch == '>') tok->type = tok_rarrow;
else if(isdigit(ch)) {ungetc(ch, fp); ch = '-'; break;}
else {ungetc(ch, fp); tok->type = tok_sub;}
return;
case '/':
if((ch = fgetc(fp)) == '=')
tok->type = tok_diveq;
else if(ch == '/')
{
#define incmt(ch) ((ch) != '\n' && !feof(fp))
ch = fgetc(fp);
M_Tk_getPredStr(incmt);
tok->type = tok_cmtlin;
#undef incmt
}
else
{ungetc(ch, fp); tok->type = tok_div;}
return;
case '.':
if((ch = fgetc(fp)) == '.')
{
if((ch = fgetc(fp)) == '.')
tok->type = tok_dot3;
else
{ungetc(ch, fp); tok->type = tok_dot2;}
}
else if(isdigit(ch))
{ungetc(ch, fp); break;}
else
{ungetc(ch, fp); tok->type = tok_dot;}
return;
case '\'': tok->type = tok_charac; goto string;
case '"': tok->type = tok_string; goto string;
string: {
int i, beg;
for(i = 0, beg = ch; (ch = fgetc(fp)) != beg && !feof(fp);) {
M_Vec_grow(tok->text, 1);
M_Vec_next(tok->text) = ch;
}
M_Vec_grow(tok->text, 1);
M_Vec_next(tok->text) = '\0';
return;
}
}
if(isblank(ch))
{
while(isblank(ch = fgetc(fp)));
ungetc(ch, fp);
goto begin;
}
else if(isdigit(ch) || ch == '.' || ch == '-')
{
M_Vec_grow(tok->text, 1);
M_Vec_next(tok->text) = ch;
ch = fgetc(fp);
M_Tk_getPredStr(M_Tk_isnum);
tok->type = tok_number;
}
else if(M_Tk_isidenti(ch))
{
M_Tk_getPredStr(M_Tk_isidenti);
tok->type = tok_identi;
}
else
{
M_Vec_grow(tok->text, 2);
M_Vec_next(tok->text) = ch;
M_Vec_next(tok->text) = '\0';
tok->type = tok_chrseq;
}
}
// EOF