Added support for empty lines and indentation to the Parser

2023-01-03 08:44:37 +01:00 · 2023-01-03 08:44:37 +01:00 · 6a623e8952
commit 6a623e8952
parent 048b8513cf
1 changed files with 28 additions and 14 deletions
--- a/main.c
+++ b/main.c
@ -21,6 +21,8 @@
 #define ASCII_SPACE 32
 #define ASCII_NEWLINE 10

+#define PREFGETC(file) *file->_IO_read_ptr
+
 int saveinstructionpart(unsigned short int part, char content[])
 {
    // strcat(content,"\0"); //REMOVE
@ -39,23 +41,34 @@ void tokenize(FILE *input_file, char tokens[][MAX_TOKEN_SIZE])
 {
    char current_char = 0;
    unsigned int token_index = 0;
-
+    short unsigned int is_pos_line_start = 1;
    while ((current_char = fgetc(input_file)) != EOF)
    {
-        switch (current_char)
+        if (is_pos_line_start)
        {
-        case ASCII_TAB:
-        case ASCII_SPACE:
-            // Loop to the characters until the next character fgetc() would read is not space or tab
-            while (*input_file->_IO_read_ptr == ASCII_SPACE || *input_file->_IO_read_ptr == ASCII_TAB) // The character which fgetc will read the next time when its called
+            is_pos_line_start = 0;
+            // Loop trough Spaces and Tabs, to make empty lines and Indentation work
+            while (current_char != EOF && (current_char == ASCII_SPACE || current_char == ASCII_TAB || current_char == ASCII_NEWLINE))
            {
                current_char = fgetc(input_file);
            }
-            //Between the Adress and the newline in an Instruction is usually no space. 
-            //Therefore the token_index gets increased, whenever a newline is found.
-            //However, there can also be a space between the Adress and the newline.
-            //To not increase the token_index 2 times, we need to not increase it here if the next character is a \n
-            if(*input_file->_IO_read_ptr != ';' && *input_file->_IO_read_ptr != ASCII_NEWLINE)
+        }
+        switch (current_char)
+        {
+        case EOF:
+            break;
+        case ASCII_TAB:
+        case ASCII_SPACE:
+            // Loop to the characters until the next character fgetc() would read is not space or tab
+            while (PREFGETC(input_file) == ASCII_SPACE || PREFGETC(input_file) == ASCII_TAB) // The character which fgetc will read the next time when its called
+            {
+                current_char = fgetc(input_file);
+            }
+            // Between the Adress and the newline in an Instruction is usually no space.
+            // Therefore the token_index gets increased, whenever a newline is found.
+            // However, there can also be a space between the Adress and the newline.
+            // To not increase the token_index 2 times, we need to not increase it here if the next character is a \n
+            if (PREFGETC(input_file) != ';' && PREFGETC(input_file) != ASCII_NEWLINE)
            {
                token_index++;
            }
@ -64,9 +77,10 @@ void tokenize(FILE *input_file, char tokens[][MAX_TOKEN_SIZE])
            token_index++;
            tokens[token_index][0] = ';';
            token_index++;
+            is_pos_line_start = 1;
            break;
        case ';':
-            while (*input_file->_IO_read_ptr != ASCII_NEWLINE) // The character which fgetc will read the next time when its called
+            while (PREFGETC(input_file) != ASCII_NEWLINE) // The character which fgetc will read the next time when its called
            {
                current_char = fgetc(input_file);
            }
@ -98,9 +112,9 @@ int main(int argc, char const *argv[])

    char(*tokens)[MAX_TOKEN_SIZE] = malloc(sizeof(*tokens) * MAX_MEMORY);
    tokenize(assembly_file, tokens);
-    for (int i = 0; i<30;i++)
+    for (int i = 0; i < 30; i++)
    {
-        printf("%s\n",tokens[i]);
+        printf("%s\n", tokens[i]);
    }
    fclose(assembly_file);
    return 0;