Querying the Document

  • + 0 comments

    Split the stirng by '\n' (for paragraph), '.' (for sentance), and ' ' (for word) step by step will be easy to implement.

    char *kth_word_in_mth_sentence_of_nth_paragraph(char ****document, int k, int m, int n) {
        return document[n - 1][m - 1][k - 1];
    }
    
    char **kth_sentence_in_mth_paragraph(char ****document, int k, int m) {
        return document[m - 1][k - 1];
    }
    
    char ***kth_paragraph(char ****document, int k) {
        return document[k - 1];
    }
    
    char **strsplit(char *text, const char *dilim, const int max, int *len) {
        char **arr = (char **)malloc(max * sizeof(char *));
        for (int i = 0; i < max; i++)
            arr[i] = (char *)malloc(MAX_CHARACTERS * sizeof(char));
        int count = 0;
    
        char *token = strtok(text, dilim);
        while (token != NULL) {
            strcpy(arr[count], token);
            arr[count] = (char *)realloc(arr[count], (strlen(arr[count]) + 1) * sizeof(char));
            count++;
            token = strtok(NULL, dilim);
        }
    
        if (count != max)
            arr = (char **)realloc(arr, count * sizeof(char *));
    
        *len = count;
        return arr;
    }
    
    char ****get_document(char *text) {
        int c_para;
        char **ps = strsplit(text, "\n", MAX_PARAGRAPHS, &c_para);
        char ****doc = (char ****)malloc(c_para * sizeof(char ***));
        for (int i = 0; i < c_para; i++) {
            int c_line;
            char **ls = strsplit(ps[i], ".", 100, &c_line);
            doc[i] = (char ***)malloc(c_line * sizeof(char **));
            for (int j = 0; j < c_line; j++) {
                int c_word;
                doc[i][j] = strsplit(ls[j], " ", 100, &c_word);
                free(ls[j]);
            }
            free(ps[i]);
            free(ls);
        }
        free(ps);
    
        return doc;
    }