167 lines
No EOL
4.5 KiB
C
167 lines
No EOL
4.5 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "../libs/utf8.h"
|
|
|
|
#define SIZEOFARR(a) (sizeof(a)/sizeof(a[0]))
|
|
|
|
char allowedChars[] = "aäbcdefghijklmnoöpqrstuüvwxyzAÄBCDEFGHIJKLMNOÖPQRSTUÜVWXYZß1234567890 \n";
|
|
|
|
unsigned short int arrContains(char checkForVal, char *checkInArr, int arrSize)
|
|
{
|
|
for (int i = 0;i<arrSize;i++)
|
|
{
|
|
if(checkForVal==checkInArr[i]){
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
struct Gap_sentence
|
|
{
|
|
char words[100][50];
|
|
__uint8_t gapsizes[100];
|
|
short unsigned int length;
|
|
};
|
|
|
|
|
|
|
|
int main(int argc, char const *argv[])
|
|
{
|
|
char words[30000][20];
|
|
FILE *fp;
|
|
FILE *fpp;
|
|
fpp = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/debug.txt","w");
|
|
fp = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/Alice_im_Wunderland.txt", "r");
|
|
//fp = fopen("/home/xor/Dokumente/Coding/BwInf_41_1/Aufgabe 1/test.txt", "r");
|
|
if(fp == NULL||fpp==NULL)
|
|
{
|
|
printf("Datei konnte nicht geoeffnet werden.\n");
|
|
return -1;
|
|
}
|
|
|
|
|
|
int word_index = 0;
|
|
int character_index = 0;
|
|
int word_found = 0; //checks if multiple spaces are behind each other, we don't want to add a new word
|
|
while(1){
|
|
char curr_char = fgetc(fp);
|
|
if(curr_char==EOF)
|
|
{
|
|
break;
|
|
}
|
|
if (!arrContains(curr_char,allowedChars,SIZEOFARR(allowedChars)))
|
|
{
|
|
continue;
|
|
}
|
|
else if (curr_char==' '|| curr_char=='\n')
|
|
{
|
|
if (word_found)
|
|
{
|
|
word_index++;
|
|
character_index = 0;
|
|
word_found = 0;
|
|
}
|
|
|
|
}
|
|
else{
|
|
word_found = 1;
|
|
words[word_index][character_index]=curr_char;
|
|
character_index++;
|
|
}
|
|
|
|
}
|
|
|
|
//Get sentence with gap
|
|
FILE *gap_file;
|
|
struct Gap_sentence gap_sentence;
|
|
gap_file = fopen("/home/xor/coding/BwInf_41_1/Aufgabe 1/stoerung0.txt","r");
|
|
|
|
character_index = 0;
|
|
word_found = 0;
|
|
short int file_read = 0;
|
|
while(!file_read){
|
|
char curr_char = fgetc(gap_file);
|
|
switch (curr_char)
|
|
{
|
|
case EOF:
|
|
file_read = 1;
|
|
break;
|
|
case ' ':
|
|
/*only begin a new word, if the last characters were part of a word
|
|
or if the next character is part of a word as we dont want to save the next word in the index where
|
|
the last underscores were counted (and therefore word_index was not increased)*/
|
|
if(word_found || *gap_file->_IO_read_ptr != '_')
|
|
{
|
|
gap_sentence.length++;
|
|
word_found=0;
|
|
character_index = 0;
|
|
}
|
|
break;
|
|
case '_':
|
|
gap_sentence.gapsizes[gap_sentence.length]++;
|
|
break;
|
|
default:
|
|
word_found = 1;
|
|
gap_sentence.words[gap_sentence.length][character_index] = curr_char;
|
|
character_index++;
|
|
break;
|
|
}
|
|
|
|
}
|
|
//After the last word, there is no space, so weed need to add one length
|
|
gap_sentence.length++;
|
|
fclose(gap_file);
|
|
|
|
unsigned int occurance_index = 0;
|
|
int sentence_occurances[100];
|
|
int sentence_occurance_index = 0;
|
|
|
|
while(occurance_index<sizeof(words)/sizeof(words[0]))
|
|
{
|
|
//find next occurance of first word
|
|
while(1){
|
|
if(utf8casecmp(gap_sentence.words[0],words[occurance_index])==0){
|
|
break;
|
|
}
|
|
if(occurance_index>sizeof(words)/sizeof(words[0])){
|
|
break;
|
|
}
|
|
occurance_index++;
|
|
}
|
|
|
|
for(int i = 1; i<gap_sentence.length;i++)
|
|
{
|
|
if(gap_sentence.gapsizes[i]==0)
|
|
{
|
|
if(utf8casecmp(gap_sentence.words[i],words[i+occurance_index])!=0)
|
|
{
|
|
goto nomatch;
|
|
}
|
|
}
|
|
else{
|
|
// -1 because i gets increased by 1 after 'continue'
|
|
i += gap_sentence.gapsizes[i]-1;
|
|
continue;
|
|
}
|
|
}
|
|
sentence_occurances[sentence_occurance_index] = occurance_index;
|
|
sentence_occurance_index++;
|
|
nomatch:
|
|
//We know the current first word to be matching, so no need to check it again
|
|
occurance_index++;
|
|
|
|
}
|
|
|
|
for(int j = 0; j<=sentence_occurance_index;j++)
|
|
{
|
|
for (int i=0;i<gap_sentence.length;i++){
|
|
printf("%s ",words[sentence_occurances[j]+i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
|
|
return 0;
|
|
} |