Commits

PioneerAxon committed 1e0c202

Pre-Lexer complete

Working Pre-Lexer code
-coded for readability of the code.
-accepts all token from lex file.
-handles EOS (end of stream) at last. EOS happens once in a lifetime so, no need to waste CPU power for that in each call.

  • Participants
  • Parent commits 51d6498

Comments (0)

Files changed (3)

 int main()
 {
 	gchar str[MAIN_C_MAX_BUFF];
-	gunichar uc;
+	int token;
 	LexerState * ls;
 	scanf ("%[^\n]",str);
 	ls = pl_create_scanner (str);
 	pl_set_marker (ls);
-	while ((uc = pl_get_next_gunichar (ls)) != 0)
+	while ((token = pl_get_next_token (ls)) != PL_EOS)
 	{
+		printf("Got Token #%3d = ",token);
 		printf ("%s\n", pl_get_marked_substring (ls));
 		pl_set_marker (ls);
 	}

File src/prelexer.c

 #include<glib.h>
 #include<assert.h>
 #include<prelexer.h>
-
+#include<stdio.h>
 //Creates a scanner state which will be useful for accessing the lexer later.
 LexerState *
 pl_create_scanner (const gchar* input)
 	gchar * tmp;
 	tmp = g_utf8_find_prev_char (state->stream, state->stream + state->next_index);
 	if (tmp == NULL)
+		//Already at the beginning of the stram. Reset index.
 		state->next_index = 0;
 	else
 		state->next_index = tmp - state->stream;
 {
 	return g_strndup (state->stream + state->mark_index, state->next_index - state->mark_index);
 }
+
+//Pre-Lexer tokanizer. To be called only by Lexer.
+LexerToken
+pl_get_next_token (LexerState * state)
+{
+	gunichar ch = pl_get_next_gunichar (state);
+	if (ch == g_utf8_get_char (".") || ch == g_utf8_get_char (",")) 	return PL_DECIMAL;
+	if (g_unichar_isdigit (ch)) 						return PL_DIGIT;	//0-9.
+	if (g_unichar_isxdigit (ch)) 						return PL_HEX;		//This is supposed to report just the A-F.
+	if (ch == g_utf8_get_char ("⁰") || ch == g_utf8_get_char ("¹")
+	 || ch == g_utf8_get_char ("²") || ch == g_utf8_get_char ("³")
+	 || ch == g_utf8_get_char ("⁴") || ch == g_utf8_get_char ("⁵")
+	 || ch == g_utf8_get_char ("⁶") || ch == g_utf8_get_char ("⁷")
+	 || ch == g_utf8_get_char ("⁸") || ch == g_utf8_get_char ("⁹"))		return PL_SUPER_DIGIT;
+	if (ch == g_utf8_get_char ("⁻"))					return PL_SUPER_MINUS;
+	if (ch == g_utf8_get_char ("₀") || ch == g_utf8_get_char ("₁")
+	 || ch == g_utf8_get_char ("₂") || ch == g_utf8_get_char ("₃")
+	 || ch == g_utf8_get_char ("₄") || ch == g_utf8_get_char ("₅")
+	 || ch == g_utf8_get_char ("₆") || ch == g_utf8_get_char ("₇")
+	 || ch == g_utf8_get_char ("₈") || ch == g_utf8_get_char ("₉"))		return PL_SUB_DIGIT;
+	if (ch == g_utf8_get_char ("½") || ch == g_utf8_get_char ("⅓")
+	 || ch == g_utf8_get_char ("⅔") || ch == g_utf8_get_char ("¼")
+	 || ch == g_utf8_get_char ("¾") || ch == g_utf8_get_char ("⅕")
+	 || ch == g_utf8_get_char ("⅖") || ch == g_utf8_get_char ("⅗")
+	 || ch == g_utf8_get_char ("⅘") || ch == g_utf8_get_char ("⅙")
+	 || ch == g_utf8_get_char ("⅚") || ch == g_utf8_get_char ("⅛")
+	 || ch == g_utf8_get_char ("⅜") || ch == g_utf8_get_char ("⅝")
+	 || ch == g_utf8_get_char ("⅞"))					return PL_FRACTION;
+	if (ch == g_utf8_get_char ("°"))					return PL_DEGREE;
+	if (ch == g_utf8_get_char ("'"))					return PL_MINUTE;
+	if (ch == g_utf8_get_char ("\""))					return PL_SECOND;
+	if (g_unichar_isalpha (ch))						return PL_LETTER;			//All alphabets
+	if (ch == g_utf8_get_char ("∧"))					return T_AND;
+	if (ch == g_utf8_get_char ("∨"))					return T_OR;
+	if (ch == g_utf8_get_char ("⊻") || ch == g_utf8_get_char ("⊕"))		return T_XOR;
+	if (ch == g_utf8_get_char ("¬") || ch == g_utf8_get_char ("~"))		return T_NOT;
+	if (ch == g_utf8_get_char ("ℜ"))					return PL_RE;				//What is real? How do you define real?
+	if (ch == g_utf8_get_char ("ℑ"))					return PL_IM;
+	if (ch == g_utf8_get_char ("+"))					return T_ADD;
+	if (ch == g_utf8_get_char ("-") || ch == g_utf8_get_char ("−"))		return T_SUBTRACT;
+	if (ch == g_utf8_get_char ("*") || ch == g_utf8_get_char ("×"))		return T_MULTIPLY;
+	if (ch == g_utf8_get_char ("/") || ch == g_utf8_get_char ("∕")
+	 || ch == g_utf8_get_char ("÷"))					return T_DIVIDE;
+	if (ch == g_utf8_get_char ("⌊"))					return T_L_FLOOR;
+	if (ch == g_utf8_get_char ("⌋"))					return T_R_FLOOR;
+	if (ch == g_utf8_get_char ("⌈"))					return T_L_CEILING;
+	if (ch == g_utf8_get_char ("⌉"))					return T_R_CEILING;
+	if (ch == g_utf8_get_char ("√"))					return T_ROOT;
+	if (ch == g_utf8_get_char ("∛"))					return T_ROOT_3;
+	if (ch == g_utf8_get_char ("∜"))					return T_ROOT_4;
+	if (ch == g_utf8_get_char (" ") || ch == g_utf8_get_char ("\r")
+	 || ch == g_utf8_get_char ("\t") || ch == g_utf8_get_char ("\n"))	return pl_get_next_token (state);	//Gotta ignore'Em all!!! ;)
+	if (ch == 0)								return PL_EOS;				//Least chance of this happening. Thus at the end.
+	return T_UNKNOWN;												//There is no spoon.
+}

File src/prelexer.h

 
 
 #include<glib.h>
+
 //Structure to store lexer state.
 typedef struct {
         gchar* stream;		//Pointer to the local copy of input string.
 	guint mark_index;	//Location, last marked. Useful for getting substrings as part of highlighting
 } LexerState;
 
+//Enum for tokens generated by pre-lexer and lexer.
+typedef enum {
+	T_UNKNOWN=0,		//Unknown
+	//These are all Pre-Lexer tokens, returned by pre-lexer
+	PL_DECIMAL,		//Decimal saperator
+	PL_DIGIT,		//Decimal digit
+	PL_HEX,			//A-F of Hex digits
+	PL_SUPER_DIGIT,		//Super digits
+	PL_SUPER_MINUS,		//Super minus
+	PL_SUB_DIGIT,		//Sub digits
+	PL_FRACTION,		//Fractions
+	PL_DEGREE,		//Degree
+	PL_MINUTE,		//Minutes
+	PL_SECOND,		//Seconds
+	PL_LETTER,		//Alphabets
+	PL_RE,			//Real part
+	PL_IM,			//Imaginary part
+	PL_EOS,			//End of stream. Yay!!
+	//These are all tokens, returned by Lexer.
+	T_ADD,			//Plus
+	T_SUBTRACT,		//Minus
+	T_MULTIPLY,		//Multiply
+	T_DIVIDE,		//Divide
+	T_MOD,			//Modulus
+	T_L_FLOOR,		//Floor ( Left )
+	T_R_FLOOR,		//Floor ( Right )
+	T_L_CEILING,		//Ceiling ( Left )
+	T_R_CEILING,		//Ceiling ( Right )
+	T_ROOT,			//Square root
+	T_ROOT_3,		//Cube root
+	T_ROOT_4,		//Fourth root
+	T_NOT,			//Bitwise NOT
+	T_AND,			//Bitwise AND
+	T_OR,			//Bitwise OR
+	T_XOR,			//Bitwise XOR
+	T_IN,			//IN ( for converter )
+	T_NUMBER,		//Number
+	T_SUP_NUMBER,		//Super Number
+	T_NSUP_NUMBER,		//Negative Super Number
+	T_SUB_NUMBER,		//Sub Number
+	T_FUNCTION,		//Function
+	T_VARIABLE,		//Variable name
+}LexerToken;
+
 //Creates a scanner state. Useful when multiple scanners are in action.
 LexerState * pl_create_scanner (const gchar *);
 
 //Get marked substring. To be used for error reporting.
 gchar * pl_get_marked_substring (LexerState *);
 
+//Get next Pre-Lexer token from stream.
+LexerToken pl_get_next_token (LexerState *);
+
 
 #endif