bwinf_41_1/Aufgabe 1/main.c
2022-11-14 21:18:37 +01:00

167 lines
No EOL
4.5 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../libs/utf8.h"
#define SIZEOFARR(a) (sizeof(a)/sizeof(a[0]))
char allowedChars[] = "aäbcdefghijklmnoöpqrstuüvwxyzAÄBCDEFGHIJKLMNOÖPQRSTUÜVWXYZß1234567890 \n";
unsigned short int arrContains(char checkForVal, char *checkInArr, int arrSize)
{
for (int i = 0;i<arrSize;i++)
{
if(checkForVal==checkInArr[i]){
return 1;
}
}
return 0;
}
struct Gap_sentence
{
char words[100][50];
__uint8_t gapsizes[100];
short unsigned int length;
};
int main(int argc, char const *argv[])
{
char words[30000][20];
FILE *fp;
FILE *fpp;
fpp = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/debug.txt","w");
fp = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/Alice_im_Wunderland.txt", "r");
//fp = fopen("/home/xor/Dokumente/Coding/BwInf_41_1/Aufgabe 1/test.txt", "r");
if(fp == NULL||fpp==NULL)
{
printf("Datei konnte nicht geoeffnet werden.\n");
return -1;
}
int word_index = 0;
int character_index = 0;
int word_found = 0; //checks if multiple spaces are behind each other, we don't want to add a new word
while(1){
char curr_char = fgetc(fp);
if(curr_char==EOF)
{
break;
}
if (!arrContains(curr_char,allowedChars,SIZEOFARR(allowedChars)))
{
continue;
}
else if (curr_char==' '|| curr_char=='\n')
{
if (word_found)
{
word_index++;
character_index = 0;
word_found = 0;
}
}
else{
word_found = 1;
words[word_index][character_index]=curr_char;
character_index++;
}
}
//Get sentence with gap
FILE *gap_file;
struct Gap_sentence gap_sentence;
gap_file = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/stoerung0.txt","r");
character_index = 0;
word_found = 0;
short int file_read = 0;
while(!file_read){
char curr_char = fgetc(gap_file);
switch (curr_char)
{
case EOF:
file_read = 1;
break;
case ' ':
/*only begin a new word, if the last characters were part of a word
or if the next character is part of a word as we dont want to save the next word in the index where
the last underscores were counted (and therefore word_index was not increased)*/
if(word_found || *gap_file->_IO_read_ptr != '_')
{
gap_sentence.length++;
word_found=0;
character_index = 0;
}
break;
case '_':
gap_sentence.gapsizes[gap_sentence.length]++;
break;
default:
word_found = 1;
gap_sentence.words[gap_sentence.length][character_index] = curr_char;
character_index++;
break;
}
}
//After the last word, there is no space, so weed need to add one length
gap_sentence.length++;
fclose(gap_file);
unsigned int occurance_index = 0;
int sentence_occurances[100];
int sentence_occurance_index = 0;
while(occurance_index<sizeof(words)/sizeof(words[0]))
{
//find next occurance of first word
while(1){
if(utf8casecmp(gap_sentence.words[0],words[occurance_index])==0){
break;
}
if(occurance_index>sizeof(words)/sizeof(words[0])){
break;
}
occurance_index++;
}
for(int i = 1; i<gap_sentence.length;i++)
{
if(gap_sentence.gapsizes[i]==0)
{
if(utf8casecmp(gap_sentence.words[i],words[i+occurance_index])!=0)
{
goto nomatch;
}
}
else{
// -1 because i gets increased by 1 after 'continue'
i += gap_sentence.gapsizes[i]-1;
continue;
}
}
sentence_occurances[sentence_occurance_index] = occurance_index;
sentence_occurance_index++;
nomatch:
//We know the current first word to be matching, so no need to check it again
occurance_index++;
}
for(int j = 0; j<=sentence_occurance_index;j++)
{
for (int i=0;i<gap_sentence.length;i++){
printf("%s ",words[sentence_occurances[j]+i]);
}
printf("\n");
}
return 0;
}