Querying the Document

Sort by

recency

|

155 Discussions

|

  • + 0 comments

    We can use strtok_r and save its internal states using save pointers since we use nested looping.

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include<assert.h>
    #define MAX_CHARACTERS 1005
    #define MAX_PARAGRAPHS 5
    
    int char_count(char *s, char c) {
        int count = 0;
        for (int i = 0; i < strlen(s); i++)
            count += (int) (s[i] == c);
        return count;
    }
    
    char* kth_word_in_mth_sentence_of_nth_paragraph(char**** document, int k, int m, int n) {
        return document[n - 1][m - 1][k - 1];
    }
    
    char** kth_sentence_in_mth_paragraph(char**** document, int k, int m) {
        return document[m - 1][k - 1];
    }
    
    char*** kth_paragraph(char**** document, int k) {
        return document[k - 1];
    }
    
    char**** get_document(char* text) {
        char ****doc;
        int pcnt = char_count(text, '\n') + 1, cp = 0;
        doc = (char ****)malloc((sizeof(char ***)) * pcnt);
        char *ptoken, *psaveptr;
        ptoken = strtok_r(text, "\n", &psaveptr);
        while (ptoken != NULL) {
            char *p = (char *)malloc((sizeof(char)) * (strlen(ptoken) + 1));
            strcpy(p, ptoken);
            int scnt = char_count(p, '.') + 1, cs = 0;
            doc[cp] = (char ***)malloc((sizeof(char **)) * scnt);
            char *stoken, *ssaveptr;
            stoken = strtok_r(p, ".", &ssaveptr);
            while (stoken != NULL) {
                char *s = (char *)malloc((sizeof(char)) * (strlen(stoken) + 1));
                strcpy(s, stoken);
                int wcnt = char_count(s, ' ') + 1, cw = 0;
                doc[cp][cs] = (char **)malloc((sizeof(char *)) * wcnt);
                char *wtoken, *wsaveptr;
                wtoken = strtok_r(s, " ", &wsaveptr);
                while (wtoken != NULL) {
                    char *w = malloc((sizeof(char)) * (strlen(wtoken) + 1));
                    strcpy(w, wtoken);
                    int ccnt = strlen(w);
                    doc[cp][cs][cw] = (char *)malloc((sizeof(char)) * (ccnt + 1));
                    strcpy(doc[cp][cs][cw], w);
                    wtoken = strtok_r(NULL, " ", &wsaveptr);
                    cw++;
                }
                stoken = strtok_r(NULL, ".", &ssaveptr);
                cs++;
            }
            ptoken = strtok_r(NULL, "\n", &psaveptr);
            cp++;
        }
        return doc;
    }
    
    
    
    char* get_input_text() {	
        int paragraph_count;
        scanf("%d", &paragraph_count);
    
        char p[MAX_PARAGRAPHS][MAX_CHARACTERS], doc[MAX_CHARACTERS];
        memset(doc, 0, sizeof(doc));
        getchar();
        for (int i = 0; i < paragraph_count; i++) {
            scanf("%[^\n]%*c", p[i]);
            strcat(doc, p[i]);
            if (i != paragraph_count - 1)
                strcat(doc, "\n");
        }
    
        char* returnDoc = (char*)malloc((strlen (doc)+1) * (sizeof(char)));
        strcpy(returnDoc, doc);
        return returnDoc;
    }
    
    void print_word(char* word) {
        printf("%s", word);
    }
    
    void print_sentence(char** sentence) {
        int word_count;
        scanf("%d", &word_count);
        for(int i = 0; i < word_count; i++){
            printf("%s", sentence[i]);
            if( i != word_count - 1)
                printf(" ");
        }
    } 
    
    void print_paragraph(char*** paragraph) {
        int sentence_count;
        scanf("%d", &sentence_count);
        for (int i = 0; i < sentence_count; i++) {
            print_sentence(*(paragraph + i));
            printf(".");
        }
    }
    
    int main() 
    {
        char* text = get_input_text();
        char**** document = get_document(text);
    
        int q;
        scanf("%d", &q);
    
        while (q--) {
            int type;
            scanf("%d", &type);
    
            if (type == 3){
                int k, m, n;
                scanf("%d %d %d", &k, &m, &n);
                char* word = kth_word_in_mth_sentence_of_nth_paragraph(document, k, m, n);
                print_word(word);
            }
    
            else if (type == 2){
                int k, m;
                scanf("%d %d", &k, &m);
                char** sentence = kth_sentence_in_mth_paragraph(document, k, m);
                print_sentence(sentence);
            }
    
            else{
                int k;
                scanf("%d", &k);
                char*** paragraph = kth_paragraph(document, k);
                print_paragraph(paragraph);
            }
            printf("\n");
        }     
    }
    
  • + 0 comments
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include<assert.h>
    #define MAX_CHARACTERS 1005
    #define MAX_PARAGRAPHS 5
    
    char* kth_word_in_mth_sentence_of_nth_paragraph(char**** document, int k, int m, int n) {
        return document[n - 1][m - 1][k - 1]; 
    }
    
    char** kth_sentence_in_mth_paragraph(char**** document, int k, int m) { 
        return document[m - 1][k - 1]; 
    }
    
    char*** kth_paragraph(char**** document, int k) {
        return document[k - 1]; 
    }
    
    char* get_word(const char* word) 
    {
        char* ans = NULL; 
        ans = (char*) strdup(word); 
        return ans; 
    }
    
    char** get_sentence(const char* sentence) 
    {
        char** ans = NULL; 
        int count_word = 0;
        char *temp_sentence = (char*) strdup(sentence); 
        char *start = temp_sentence; 
        char *end = strchr(start, ' '); 
        while (end != NULL) {
            *end = '\0'; 
            char* temp_word = (char*) strdup(start); 
            ans = (char**) realloc(ans, sizeof(char**) * (count_word + 1)); 
            ans[count_word] = get_word(temp_word);       
            start = end + 1; 
            end = strchr(start, ' '); 
            count_word++; 
        }
        if (*start != '\0') {
            ans = (char**) realloc(ans, sizeof(char*) * (count_word + 1));
            char* temp_word = (char*) strdup(start);  
            ans[count_word] = get_word(temp_word);       
            count_word++;    
        }
        return ans; 
    }
    
    char*** get_paragraph(const char* paragraph) 
    {
        char*** ans = NULL; 
        int count_sentence = 0; 
        char* temp_paragraph = (char*) strdup(paragraph); 
        char *start = temp_paragraph; 
        char *end = strchr(start, '.'); 
        while (end != NULL) {
            *end = '\0'; 
            char *temp_sentence = (char*) strdup(start); 
            ans = (char***) realloc(ans, sizeof(char**) * (count_sentence + 1)); 
            ans[count_sentence] = get_sentence(temp_sentence); 
            start = end + 1; 
            end = strchr(start, '.'); 
            count_sentence++; 
        }
        if (*start != '\0') {
            ans = (char***) realloc(ans, sizeof(char**) * (count_sentence + 1)); 
            char *temp_sentence = (char*) strdup(start); 
            ans[count_sentence] = get_sentence(temp_paragraph); 
            count_sentence++; 
        }
        return ans; 
    }
    
    char**** get_document(char* document) {
        char ****ans = NULL; 
        int count_paragraph = 0; 
        char *temp_document = (char*) strdup(document); 
        char *start = temp_document; 
        char *end = strchr(start, '\n'); 
        while (end != NULL) {
            *end = '\0'; 
            char *temp_paragraph = (char*) strdup(start); 
            ans = (char****) realloc(ans, sizeof(char***) * (count_paragraph + 1));
            ans[count_paragraph] = get_paragraph(temp_paragraph); 
            start = end + 1; 
            end = strchr(start, '\n'); 
            count_paragraph++; 
        }
        if (*start != '\0') {
            ans = (char****) realloc(ans, sizeof(char***) * (count_paragraph + 1));
            char *temp_paragraph = (char*) strdup(start); 
            ans[count_paragraph] = get_paragraph(temp_paragraph); 
            count_paragraph++; 
        }
        return ans; 
    }   
    
    
    char* get_input_text() {	
        int paragraph_count;
        scanf("%d", &paragraph_count);
    
        char p[MAX_PARAGRAPHS][MAX_CHARACTERS], doc[MAX_CHARACTERS];
        memset(doc, 0, sizeof(doc));
        getchar();
        for (int i = 0; i < paragraph_count; i++) {
            scanf("%[^\n]%*c", p[i]);
            strcat(doc, p[i]);
            if (i != paragraph_count - 1)
                strcat(doc, "\n");
        }
    
        char* returnDoc = (char*)malloc((strlen (doc)+1) * (sizeof(char)));
        strcpy(returnDoc, doc);
        return returnDoc;
    }
    
    void print_word(char* word) {
        printf("%s", word);
    }
    
    void print_sentence(char** sentence) {
        int word_count;
        scanf("%d", &word_count);
        for(int i = 0; i < word_count; i++){
            printf("%s", sentence[i]);
            if( i != word_count - 1)
                printf(" ");
        }
    } 
    
    void print_paragraph(char*** paragraph) {
        int sentence_count;
        scanf("%d", &sentence_count);
        for (int i = 0; i < sentence_count; i++) {
            print_sentence(*(paragraph + i));
            printf(".");
        }
    }
    
    int main() 
    {
        char* text = get_input_text();
        char**** document = get_document(text);
    
        int q;
        scanf("%d", &q);
    
        while (q--) {
            int type;
            scanf("%d", &type);
    
            if (type == 3){
                int k, m, n;
                scanf("%d %d %d", &k, &m, &n);
                char* word = kth_word_in_mth_sentence_of_nth_paragraph(document, k, m, n);
                print_word(word);
            }
    
            else if (type == 2){
                int k, m;
                scanf("%d %d", &k, &m);
                char** sentence = kth_sentence_in_mth_paragraph(document, k, m);
                print_sentence(sentence);
            }
    
            else{
                int k;
                scanf("%d", &k);
                char*** paragraph = kth_paragraph(document, k);
                print_paragraph(paragraph);
            }
            printf("\n");
        }     
    }
    
  • + 0 comments
    char* kth_word_in_mth_sentence_of_nth_paragraph(char**** document, int k, int m, int n) {
        return document[n-1][m-1][k-1];
    }
    
    char** kth_sentence_in_mth_paragraph(char**** document, int k, int m) { 
        return document[m-1][k-1];
    }
    
    char*** kth_paragraph(char**** document, int k) {
        return document[k-1];
    }
    
    char**** get_document(char* text) {
        int count_word = 0;
        int count_sentence = 0;
        int count_paragraph = 1;
        int len = strlen(text);
        for(int i = 0; i < len; i++)
        {
            if(text[i] == ' ')
                count_word++;
            else if(text[i] == '.')
            {
                count_word++;
                count_sentence++;
            }
            else if(text[i] == '\n')
                count_paragraph++;
        }
        char** words = (char**) malloc(count_word * sizeof(char*));
        words[0] = text;
        char*** sentences = (char***) malloc(count_sentence * sizeof(char**));
        sentences[0] = words;
        char**** paragraphs = (char****) malloc(count_paragraph * sizeof(char***));
        paragraphs[0] = sentences;
        
        for(int i = 1, i_word = 1, i_sentence = 1, i_paragraph = 1; i < len; i++)
        {
            if(text[i] == ' ')
            {
                text[i] = '\0';
                words[i_word++] = text+i+1;
            }
            else if(text[i] == '.')
            {
                text[i] = '\0';
                if(i_sentence < count_sentence && text[i+1] != '\n')
                {
                    words[i_word++] = text+i+1;
                    sentences[i_sentence++] = words + i_word - 1;
                }
            }
            else if(text[i] == '\n')
            {
                words[i_word++] = text+i+1;
                sentences[i_sentence++] = words + i_word - 1;
                paragraphs[i_paragraph++] = sentences + i_sentence -1;
            }
        }
        return paragraphs;
    }
    
  • + 0 comments
    char* kth_word_in_mth_sentence_of_nth_paragraph(char**** document, int k, int m, int n) {
    
        return document[n - 1][m - 1][k - 1];
    }
    
    char** kth_sentence_in_mth_paragraph(char**** document, int k, int m) {
    
        return document[m - 1][k - 1];
    }
    
    char*** kth_paragraph(char**** document, int k) {
    
        return document[k - 1];
    }
    
    char**** get_document(char* text)
    {
        char ****doc = NULL;
        int i_paragraph = 0;
        int i_sentence = 0;
        int i_word = 0;
    
        doc = (char ****) malloc(sizeof(char ***));
        doc[0] = (char ***) malloc(sizeof(char **));
        doc[0][0] = (char **) malloc(sizeof(char *));
    
        char *word = NULL;
    
        for (char *s = text; *s; ++s)
        {
            if (*s == ' ' || *s == '.')
            {
                fprintf(stderr, "add word p%d s%d w%d: %.*s\n", i_paragraph, i_sentence, i_word, (int)(s - word), word);
                doc[i_paragraph][i_sentence][i_word] = word;
    
                i_word++;
                doc[i_paragraph][i_sentence] = (char **) realloc(doc[i_paragraph][i_sentence], sizeof(char *) * (i_word + 1));
    
                if (*s == '.' && s[1] != '\n')
                {
                    i_word = 0;
                    i_sentence++;
    
                    doc[i_paragraph] = (char ***) realloc(doc[i_paragraph], sizeof(char **) * (i_sentence + 1));
                    doc[i_paragraph][i_sentence] = (char **) malloc(sizeof(char *));
                }
    
                *s = 0;
                word = NULL;
            }
    
            else if (*s == '\n')
            {
                *s = 0;
                word = NULL;
    
                i_word = 0;
                i_sentence = 0;
                i_paragraph++;
    
                doc = (char ****) realloc(doc, sizeof(char ***) * (i_paragraph + 1));
                doc[i_paragraph] = (char ***) malloc(sizeof(char **));
                doc[i_paragraph][0] = (char **) malloc(sizeof(char *));
            }
            else
            {
                if (word == NULL)
                {
                    word = s;
                    //printf("new word: %s\n", word);
                }
            }
        }
    
        return doc;
    }
    
  • + 0 comments

    Split the stirng by '\n' (for paragraph), '.' (for sentance), and ' ' (for word) step by step will be easy to implement.

    char *kth_word_in_mth_sentence_of_nth_paragraph(char ****document, int k, int m, int n) {
        return document[n - 1][m - 1][k - 1];
    }
    
    char **kth_sentence_in_mth_paragraph(char ****document, int k, int m) {
        return document[m - 1][k - 1];
    }
    
    char ***kth_paragraph(char ****document, int k) {
        return document[k - 1];
    }
    
    char **strsplit(char *text, const char *dilim, const int max, int *len) {
        char **arr = (char **)malloc(max * sizeof(char *));
        for (int i = 0; i < max; i++)
            arr[i] = (char *)malloc(MAX_CHARACTERS * sizeof(char));
        int count = 0;
    
        char *token = strtok(text, dilim);
        while (token != NULL) {
            strcpy(arr[count], token);
            arr[count] = (char *)realloc(arr[count], (strlen(arr[count]) + 1) * sizeof(char));
            count++;
            token = strtok(NULL, dilim);
        }
    
        if (count != max)
            arr = (char **)realloc(arr, count * sizeof(char *));
    
        *len = count;
        return arr;
    }
    
    char ****get_document(char *text) {
        int c_para;
        char **ps = strsplit(text, "\n", MAX_PARAGRAPHS, &c_para);
        char ****doc = (char ****)malloc(c_para * sizeof(char ***));
        for (int i = 0; i < c_para; i++) {
            int c_line;
            char **ls = strsplit(ps[i], ".", 100, &c_line);
            doc[i] = (char ***)malloc(c_line * sizeof(char **));
            for (int j = 0; j < c_line; j++) {
                int c_word;
                doc[i][j] = strsplit(ls[j], " ", 100, &c_word);
                free(ls[j]);
            }
            free(ps[i]);
            free(ls);
        }
        free(ps);
    
        return doc;
    }