bwinf_41_1/Aufgabe 1/main.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../libs/utf8.h"

#define SIZEOFARR(a) (sizeof(a)/sizeof(a[0]))

char allowedChars[] = "aäbcdefghijklmnoöpqrstuüvwxyzAÄBCDEFGHIJKLMNOÖPQRSTUÜVWXYZß1234567890 \n";

unsigned short int arrContains(char checkForVal, char *checkInArr, int arrSize)
{
    for (int i = 0;i<arrSize;i++)
    {
        if(checkForVal==checkInArr[i]){
            return 1;
        }
    }
    return 0;
}

struct Gap_sentence
{
    char words[100][50];
    __uint8_t gapsizes[100];
    short unsigned int length;
};


int main(int argc, char const *argv[])
{
    char words[30000][20];
    FILE *fp;
    FILE *fpp;
    fpp = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/debug.txt","w");
    fp = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/Alice_im_Wunderland.txt", "r");
    //fp = fopen("/home/xor/Dokumente/Coding/BwInf_41_1/Aufgabe 1/test.txt", "r");
    if(fp == NULL||fpp==NULL)
    {
	    printf("Datei konnte nicht geoeffnet werden.\n");
        return -1;
    }


    int word_index = 0;
    int character_index = 0;
    int word_found = 0; //checks if multiple spaces are behind each other, we don't want to add a new word
    while(1){
        char curr_char = fgetc(fp);
        if(curr_char==EOF)
        {
            break;
        }
        if (!arrContains(curr_char,allowedChars,SIZEOFARR(allowedChars)))
        {
            continue;
        }
        else if (curr_char==' '|| curr_char=='\n')
        {
            if (word_found)
            {
                word_index++;
                character_index = 0;
                word_found = 0;
            }

        }
        else{
            word_found = 1;
            words[word_index][character_index]=curr_char;
            character_index++;
        }

    }

    //Get sentence with gap
    FILE *gap_file;
    struct Gap_sentence gap_sentence;
    gap_file = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/stoerung0.txt","r");

    character_index = 0;
    word_found = 0;
    short int file_read = 0;
    while(!file_read){
        char curr_char = fgetc(gap_file);
        switch (curr_char)
        {
        case EOF:
            file_read = 1;
            break;
        case ' ':
            /*only begin a new word, if the last characters were part of a word
            or if the next character is part of a word as we dont want to save the next word in the index where
            the last underscores were counted (and therefore word_index was not increased)*/
            if(word_found || *gap_file->_IO_read_ptr != '_')
            {
                gap_sentence.length++;
                word_found=0;
                character_index = 0;
            }
            break;
        case '_':
            gap_sentence.gapsizes[gap_sentence.length]++;
            break;
        default:
            word_found = 1;
            gap_sentence.words[gap_sentence.length][character_index] = curr_char;
            character_index++;
            break;
        }

    }
    //After the last word, there is no space, so weed need to add one length
    gap_sentence.length++;
    fclose(gap_file);

    unsigned int occurance_index = 0;
    int sentence_occurances[100];
    int sentence_occurance_index = 0;

    while(occurance_index<sizeof(words)/sizeof(words[0]))
    {
        //find next occurance of first word
        while(1){
            if(utf8casecmp(gap_sentence.words[0],words[occurance_index])==0){
                break;
            }
            if(occurance_index>sizeof(words)/sizeof(words[0])){
                break;
            }
            occurance_index++;
        }

        for(int i = 1; i<gap_sentence.length;i++)
        {
            if(gap_sentence.gapsizes[i]==0)
            {
                if(utf8casecmp(gap_sentence.words[i],words[i+occurance_index])!=0)
                {
                    goto nomatch;
                }
            }
            else{
                // -1 because i gets increased by 1 after 'continue'
                i += gap_sentence.gapsizes[i]-1;
                continue;
            }
        }
        sentence_occurances[sentence_occurance_index] = occurance_index;
        sentence_occurance_index++;
        nomatch:
        //We know the current first word to be matching, so no need to check it again
        occurance_index++;

    }

    for(int j = 0; j<=sentence_occurance_index;j++)
    {
        for (int i=0;i<gap_sentence.length;i++){
            printf("%s ",words[sentence_occurances[j]+i]);
        }
        printf("\n");
    }


    return 0;
}