95 lines
3.2 KiB
C
95 lines
3.2 KiB
C
/*
|
|
This code is part of the EIPA Platform
|
|
|
|
This code contains the implementations of all functions related to the lexical analysis of the EIPA Assembly file
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "../header/common.h"
|
|
|
|
void lexer(FILE *input_file, char tokens[][MAX_TOKEN_SIZE])
|
|
{
|
|
// Stores the current character we are examining
|
|
char current_char = 0;
|
|
// Stores at which token current_char is
|
|
unsigned int token_index = 0;
|
|
// Stores wether current_char is the beginning of a line, so that we can skip any indentation at that point
|
|
short unsigned int is_pos_line_start = 1;
|
|
|
|
// Loop trough all characters in the file
|
|
while ((current_char = fgetc(input_file)) != EOF)
|
|
{
|
|
if (is_pos_line_start)
|
|
{
|
|
// Loop trough Spaces and Tabs, to make empty lines and Indentation work
|
|
is_pos_line_start = 0;
|
|
while (current_char != EOF && (current_char == ASCII_SPACE || current_char == ASCII_TAB || current_char == ASCII_NEWLINE))
|
|
{
|
|
current_char = fgetc(input_file);
|
|
}
|
|
}
|
|
|
|
switch (current_char)
|
|
{
|
|
case EOF:
|
|
break;
|
|
case ASCII_TAB:
|
|
case ASCII_SPACE:
|
|
// This is an indice of a new token begining, so we probably need to increase token_index
|
|
|
|
// Loop to the characters until the next character fgetc() would read is not space or tab
|
|
while (PREFGETC(input_file) == ASCII_SPACE || PREFGETC(input_file) == ASCII_TAB)
|
|
{
|
|
current_char = fgetc(input_file);
|
|
}
|
|
/*
|
|
Between the Adress and the newline in an Instruction is usually no space.
|
|
Therefore the token_index gets increased, whenever a newline is found.
|
|
However, there can also be a space between the Adress and the newline.
|
|
To not increase the token_index 2 times, we need to not increase it here if the next character is a \n
|
|
*/
|
|
if (PREFGETC(input_file) != ';' && PREFGETC(input_file) != ASCII_NEWLINE)
|
|
{
|
|
token_index++;
|
|
}
|
|
break;
|
|
case ASCII_NEWLINE:
|
|
// This is a indice of a new token -> increase token_index
|
|
token_index++;
|
|
|
|
// This also is a indice of a new instruction beginning
|
|
// in the tokens array, instructions are seperated by semicolons
|
|
tokens[token_index][0] = ';';
|
|
|
|
// Since the Instruction seperator (';') is also a token, we need to increase token_index again
|
|
token_index++;
|
|
|
|
is_pos_line_start = 1;
|
|
break;
|
|
case ';':
|
|
// Loop over the comment
|
|
while (PREFGETC(input_file) != ASCII_NEWLINE && PREFGETC(input_file) != '\0')
|
|
{
|
|
current_char = fgetc(input_file);
|
|
}
|
|
break;
|
|
default:
|
|
strncat(tokens[token_index], ¤t_char, 1);
|
|
break;
|
|
}
|
|
}
|
|
tokens[token_index + 1][0] = ';';
|
|
tokens[token_index + 2][0] = EOF;
|
|
}
|
|
|
|
void print_tokens(char tokens[][MAX_TOKEN_SIZE])
|
|
{
|
|
int token_index = 0;
|
|
while (tokens[token_index][0] != EOF)
|
|
{
|
|
printf("%s\n", tokens[token_index]);
|
|
token_index++;
|
|
}
|
|
}
|