Added support for empty lines and indentation to the Parser

This commit is contained in:
XOR 2023-01-03 08:44:37 +01:00
parent 048b8513cf
commit 6a623e8952

42
main.c
View file

@ -21,6 +21,8 @@
#define ASCII_SPACE 32
#define ASCII_NEWLINE 10
#define PREFGETC(file) *file->_IO_read_ptr
int saveinstructionpart(unsigned short int part, char content[])
{
// strcat(content,"\0"); //REMOVE
@ -39,23 +41,34 @@ void tokenize(FILE *input_file, char tokens[][MAX_TOKEN_SIZE])
{
char current_char = 0;
unsigned int token_index = 0;
short unsigned int is_pos_line_start = 1;
while ((current_char = fgetc(input_file)) != EOF)
{
switch (current_char)
if (is_pos_line_start)
{
case ASCII_TAB:
case ASCII_SPACE:
// Loop to the characters until the next character fgetc() would read is not space or tab
while (*input_file->_IO_read_ptr == ASCII_SPACE || *input_file->_IO_read_ptr == ASCII_TAB) // The character which fgetc will read the next time when its called
is_pos_line_start = 0;
// Loop trough Spaces and Tabs, to make empty lines and Indentation work
while (current_char != EOF && (current_char == ASCII_SPACE || current_char == ASCII_TAB || current_char == ASCII_NEWLINE))
{
current_char = fgetc(input_file);
}
//Between the Adress and the newline in an Instruction is usually no space.
//Therefore the token_index gets increased, whenever a newline is found.
//However, there can also be a space between the Adress and the newline.
//To not increase the token_index 2 times, we need to not increase it here if the next character is a \n
if(*input_file->_IO_read_ptr != ';' && *input_file->_IO_read_ptr != ASCII_NEWLINE)
}
switch (current_char)
{
case EOF:
break;
case ASCII_TAB:
case ASCII_SPACE:
// Loop to the characters until the next character fgetc() would read is not space or tab
while (PREFGETC(input_file) == ASCII_SPACE || PREFGETC(input_file) == ASCII_TAB) // The character which fgetc will read the next time when its called
{
current_char = fgetc(input_file);
}
// Between the Adress and the newline in an Instruction is usually no space.
// Therefore the token_index gets increased, whenever a newline is found.
// However, there can also be a space between the Adress and the newline.
// To not increase the token_index 2 times, we need to not increase it here if the next character is a \n
if (PREFGETC(input_file) != ';' && PREFGETC(input_file) != ASCII_NEWLINE)
{
token_index++;
}
@ -64,9 +77,10 @@ void tokenize(FILE *input_file, char tokens[][MAX_TOKEN_SIZE])
token_index++;
tokens[token_index][0] = ';';
token_index++;
is_pos_line_start = 1;
break;
case ';':
while (*input_file->_IO_read_ptr != ASCII_NEWLINE) // The character which fgetc will read the next time when its called
while (PREFGETC(input_file) != ASCII_NEWLINE) // The character which fgetc will read the next time when its called
{
current_char = fgetc(input_file);
}
@ -98,9 +112,9 @@ int main(int argc, char const *argv[])
char(*tokens)[MAX_TOKEN_SIZE] = malloc(sizeof(*tokens) * MAX_MEMORY);
tokenize(assembly_file, tokens);
for (int i = 0; i<30;i++)
for (int i = 0; i < 30; i++)
{
printf("%s\n",tokens[i]);
printf("%s\n", tokens[i]);
}
fclose(assembly_file);
return 0;