Structuring the Document

  • + 0 comments

    for (int i = 0; i < len; i++) { free(*(arr + i)); } free(arr); }

    char ** split(char * s, const char delim, int * count) { char ** split_s = NULL; char * part = NULL; unsigned int idx_split_s = 0; unsigned int idx_part = 0;

    char c = '\0'; for (unsigned int i = 0; *(s + i) != '\0'; i++) { c = *(s + i);

    if (c == delim && idx_part != 0) {
      // mark the end of part string
      part = realloc(part, sizeof(char) * (idx_part + 1));
      *(part + idx_part) = '\0';
    
      // append part to split_s
      split_s = realloc(split_s, sizeof(char*) * (idx_split_s + 1));
      *(split_s + idx_split_s) = part;
      idx_split_s++;
    
      // clear part string values
      part = NULL;
      idx_part = 0;
    }
    else {
      // append character to part
      part = realloc(part, sizeof(char) * (idx_part + 1));
      *(part + idx_part) = c;
      idx_part++;
    }
    

    }

    // append last part to split_s if (idx_part != 0) { // mark the end of part string part = realloc(part, sizeof(char) * (idx_part + 1)); *(part + idx_part) = '\0';

    // append part to split_s
    split_s = realloc(split_s, sizeof(char*) * (idx_split_s + 1));
    *(split_s + idx_split_s) = part;
    idx_split_s++;
    

    }

    *count = idx_split_s;

    return split_s; }

    struct document get_document(char* text) { int * paragraph_count = calloc(1, sizeof(int)); int * sentence_count = calloc(1, sizeof(int)); int * word_count = calloc(1, sizeof(int));

    // split into paragraphs and store temporarily char ** paragraphs_temp = split(text, '\n', paragraph_count);

    struct document document; document.paragraph_count = *paragraph_count; // allocate space for paragraphs in document document.data = malloc(*paragraph_count * sizeof(struct paragraph));

    char * s = NULL; for (int i = 0; i < *paragraph_count; i++) { s = *(paragraphs_temp + i); // get current paragraph

    // split into sentences and store temporarily
    char ** sentences_temp = split(s, '.', sentence_count);
    
    struct paragraph paragraph;
    paragraph.sentence_count = *sentence_count;
    // allocate space for sentences in each paragraph
    paragraph.data = malloc(*sentence_count * sizeof(struct sentence));
    
    for (int j = 0; j < *sentence_count; j++) {
      s = *(sentences_temp + j);  // get current sentence
    
      // split into words and store temporarily
      char ** words_temp = split(s, ' ', word_count);
    
      // *(*(document + i) + j) = malloc(*word_count * sizeof(char*));
      struct sentence sentence;
      sentence.word_count = *word_count;
      // allocate space for words in each sentence
      sentence.data = malloc(*word_count * sizeof(struct word));
    
      for (int k = 0; k < *word_count; k++) {
        s = *(words_temp + k);  // get current word
    
        struct word word;
        word.data = s;
    
        *(sentence.data + k) = word;
      } // end of word_count loop
    
      *(paragraph.data + j) = sentence;
    
      free(words_temp);
    } // end of sentence_count loop
    
    *(document.data + i) = paragraph;
    
    free_ptr_ptr(sentences_temp, *sentence_count);
    

    } // end of paragraph_count loop

    free_ptr_ptr(paragraphs_temp, *paragraph_count);

    free(word_count); free(sentence_count); free(paragraph_count);

    return document; }

    struct word kth_word_in_mth_sentence_of_nth_paragraph(struct document Doc, int k, int m, int n) { // return ((((*(Doc.data + --n)).data + --m)).data + --k);

    struct paragraph nth = *(Doc.data + --n);
    struct sentence mth = *(nth.data + --m);
    struct word kth = *(mth.data + --k);
    
    return kth;
    

    }

    struct sentence kth_sentence_in_mth_paragraph(struct document Doc, int k, int m) { // return (((Doc.data + --m)).data + --k);

    struct paragraph mth = *(Doc.data + --m);
    struct sentence kth = *(mth.data + --k);
    
    return kth;
    

    }

    struct paragraph kth_paragraph(struct document Doc, int k) { return *(Doc.data + --k); }