Structuring the Document

Sort by

recency

|

123 Discussions

|

  • + 0 comments
    struct document get_document(char* text)
    {
        struct document doc;
        struct paragraph *cur_paragraph = NULL;
        struct sentence *cur_sentence = NULL;
        char *new_word = NULL;
    
        doc.data = NULL;
        doc.paragraph_count = 0;
    
        for (char *s = text; *s; ++s)
        {
            if (*s == ' ' || *s == '.')
            {
                // nouveau paragraphe
                if (cur_paragraph == NULL)
                {
                    doc.paragraph_count++;
                    doc.data = (struct paragraph *) realloc(doc.data, sizeof(struct paragraph) * doc.paragraph_count);
    
                    cur_paragraph = doc.data + doc.paragraph_count - 1;
                    cur_paragraph->data = NULL;
                    cur_paragraph->sentence_count = 0;
    
                    cur_sentence = NULL;        // on recommence de facto une phrase
                }
    
                // nouvelle phrase
                if (cur_sentence == NULL)
                {
                    cur_paragraph->sentence_count++;
                    cur_paragraph->data = (struct sentence *) realloc(cur_paragraph->data, sizeof(struct sentence) * cur_paragraph->sentence_count);
    
                    cur_sentence = cur_paragraph->data + cur_paragraph->sentence_count - 1;
                    cur_sentence->data = NULL;
                    cur_sentence->word_count = 0;
                }
    
                // nouveau mot
                cur_sentence->word_count++;
                cur_sentence->data = (struct word *) realloc(cur_sentence->data, sizeof(struct word) * cur_sentence->word_count);
                cur_sentence->data[cur_sentence->word_count - 1].data = new_word;
                new_word = NULL;
    
                if (*s == '.')
                    cur_sentence = NULL;        // on recommencera une phrase
                *s = 0;
            }
    
            else if (*s == '\n')
            {
                cur_sentence = NULL;
                cur_paragraph = NULL;
            }
            else
            {
                if (new_word == NULL)
                {
                    new_word = s;
                }
            }
        }
    
        return doc;
    }
    
    struct word kth_word_in_mth_sentence_of_nth_paragraph(struct document Doc, int k, int m, int n)
    {
        return Doc.data[n - 1].data[m - 1].data[k - 1];
    }
    
    struct sentence kth_sentence_in_mth_paragraph(struct document Doc, int k, int m)
    {
        return Doc.data[m - 1].data[k - 1];
    }
    
    struct paragraph kth_paragraph(struct document Doc, int k)
    {
        return Doc.data[k - 1];
    }
    
  • + 0 comments
    #define PARAGRAPH_COUNT     d.paragraph_count
    #define SENTENCE_COUNT      d.data[PARAGRAPH_COUNT].sentence_count
    #define WORD_COUNT          d.data[PARAGRAPH_COUNT].data[SENTENCE_COUNT].word_count
    
    #define PARAGRAPH_DATA      d.data[PARAGRAPH_COUNT].data
    #define SENTENCE_DATA       PARAGRAPH_DATA[SENTENCE_COUNT].data
    #define WORD_DATA           SENTENCE_DATA[WORD_COUNT].data
    
    struct document get_document(char* text) {
    
        struct document d = {0};
        d.data = calloc(MAX_PARAGRAPHS , sizeof(struct paragraph));
        d.data[0].data = calloc(((MAX_CHARACTERS/4)/2) , sizeof(struct sentence));
        d.data[0].data[0].data = calloc((MAX_CHARACTERS/4) , sizeof(struct word));
        
        char * ptr = text;
        int s_index = 0 , current_index = 0;
        
        while (*ptr != '\0') 
        {
            if( *ptr == ' ')
            {
                *ptr = '\0';
                if(strlen(text+s_index) > 0)
                {
                    WORD_DATA = calloc(strlen(text+s_index) , sizeof(char));
                    WORD_DATA = (text+s_index);
                    WORD_COUNT++;
                }
                s_index = current_index+1;
            }  
    
            else if (*ptr == '.') {
                *ptr = '\0';
                if(strlen(text+s_index) > 0)
                {
                    WORD_DATA = calloc(strlen(text+s_index) , sizeof(char));
                    WORD_DATA = (text+s_index);
                    WORD_COUNT++;
                    
                    SENTENCE_COUNT++;
                    SENTENCE_DATA = calloc(((MAX_CHARACTERS/4)/2) , sizeof(struct word));                
                }
                s_index = current_index+1;    
            }
            else if (*ptr == '\n') {
                *ptr = '\0';
    
                PARAGRAPH_COUNT++;
                PARAGRAPH_DATA = calloc(MAX_PARAGRAPHS , sizeof(struct sentence));
                SENTENCE_DATA = calloc(((MAX_CHARACTERS/4)/2) , sizeof(struct word));
                s_index = current_index+1;            
            }
            
            ptr++;
            current_index++;
        }
    
        PARAGRAPH_COUNT++;
        return d;
    }
    
    struct word kth_word_in_mth_sentence_of_nth_paragraph(struct document Doc, int k, int m, int n) {
        return Doc.data[n-1].data[m-1].data[k-1];
    }
    
    struct sentence kth_sentence_in_mth_paragraph(struct document Doc, int k, int m) { 
        return Doc.data[m-1].data[k-1];
    }
    
    struct paragraph kth_paragraph(struct document Doc, int k) {
        return Doc.data[k-1];
    }
    
  • + 0 comments

    .c

    #define DEFAULT_LEN 8
    
    /// @brief Helper function to insert a character into a word, resizing the word as needed
    /// @param word pointer to pointer to char
    /// @param word_len pointer to int
    /// @param ch the character
    void insert_char(char** word, int* word_len, char ch) {
        /* These lines are important, also the '()' more important */
        (*word_len)++;
        (*word) = realloc(*word, sizeof(char) * (*word_len));
        (*word)[*word_len - 1] = ch;
    }
    
    /// @brief Function to check if a character is whitespace
    /// @param ch the character
    /// @return boolean
    int is_whitespace(char ch) {
        return (ch == ' ');
    }
    
    /// @brief Function to trim leading whitespace characters
    /// @param text pointer to char
    /// @param character pointer to int
    void trim_whitespace(char* text, int* character) {
        while (is_whitespace(text[*character])) {
            (*character)++;
        }
    }
    
    /// @brief Function to check if a character is a valid text character
    /// @param ch the character
    /// @return boolean
    int is_text(char ch) {
        return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
    }
    
    /// @brief Function to check if a character is a sentence terminator (period)
    /// @param ch the character
    /// @return boolean
    int is_sentence_terminator(char ch) {
        return (ch == '.');
    }
    
    /// @brief Function to check if a character is a paragraph terminator (newline or null terminator)
    /// @param ch the character
    /// @return boolean
    int is_paragraph_terminator(char ch) {
        return ((ch == '\n') || (ch == '\0'));
    }
    
    /// @brief Function to get the next character in the text and increment the index
    /// @param text pointer to char
    /// @param character pointer to int
    /// @return **char** - the character
    char next_character(char* text, int* character) {
        return (text[(*character)++]);
    }
    
    /// @brief Function to parse the next word from the text
    /// @param text pointer to char
    /// @param character pointer to int
    /// @return word struct - the word
    struct word next_word(char* text, int* character) {
        char* word = malloc(DEFAULT_LEN * sizeof(char));    // Allocate initial memory for the word
        int word_size = 0;                                  // Initialize word size
    
        // Loop through characters until a non-text character is encountered
        while (is_text(text[*character])) {
            char ch = next_character(text, character);      // Get the next character
            insert_char(&word, &word_size, ch);             // Insert the character into the word
        }
        
        insert_char(&word, &word_size, '\0');               // Null-terminate the word
    
        // Move word array to the data array of the word structure
        struct word W;
        W.data = word;
    
        return W;
    }
    
    /// @brief Function to parse the next sentence from the text
    /// @param text pointer to char
    /// @param character pointer to int
    /// @return sentence struct - the sentence
    struct sentence next_sentence(char* text, int* character) {
        struct sentence S;
        S.data = malloc(DEFAULT_LEN * sizeof(struct word));                     // Allocate initial memory for the sentence
        S.word_count = 0;                                                       // Initialize sentence length
        
        // Loop through characters until a sentence terminator is encountered
        while (!is_sentence_terminator(text[*character])) {
            trim_whitespace(text, character);                                   // Trim leading whitespace
            struct word W = next_word(text, character);                         // Parse the next word
    
            S.word_count++;
            S.data = realloc(S.data, S.word_count * sizeof(struct word));       // Resize the sentence array
            S.data[S.word_count - 1] = W;                                       // Add the word to the sentence
        }
        
        next_character(text, character);                                        // Move past the period
        return S;
    }
    
    /// @brief Function to parse the next paragraph from the text
    /// @param text pointer to char
    /// @param character pointer to int
    /// @return paragraph struct - the paragraph
    struct paragraph next_paragraph(char* text, int* character) {
        struct paragraph P;
        P.data = malloc(DEFAULT_LEN * sizeof(struct sentence));                     // Allocate initial memory for the paragraph
        P.sentence_count = 0;                                                       // Initialize paragraph length
        
        // Loop through characters until a paragraph terminator is encountered
        while (!is_paragraph_terminator(text[*character])) {
            struct sentence S = next_sentence(text, character);                     // Parse the next sentence
    
            P.sentence_count++;
            P.data = realloc(P.data, P.sentence_count * sizeof(struct sentence));   // Resize the paragraph array
            P.data[P.sentence_count - 1] = S;                                       // Add the sentence to the paragraph
        }
        
        return P;
    }
    
    /// @brief Function to parse the entire document from the text
    /// @param text pointer to char
    /// @return document struct - the document
    struct document get_document(char* text) {
        struct document D;
        D.data = malloc(DEFAULT_LEN * sizeof(struct paragraph));                    // Allocate initial memory for the document
        D.paragraph_count = 0;                                                      // Initialize document length
        int character = 0;                                                          // Initialize character index
        int last_paragraph = 0;                                                     // Flag to check if the last paragraph has been reached
        
        // Loop until the last paragraph is reached
        while (!last_paragraph) {
            struct paragraph P = next_paragraph(text, &character);                  // Parse the next paragraph
            last_paragraph = next_character(text, &character) == '\0';              // Check if the last character is reached
    
            D.paragraph_count++;
            D.data = realloc(D.data, D.paragraph_count * sizeof(struct paragraph)); // Resize the document array
            D.data[D.paragraph_count - 1] = P;                                      // Add the paragraph to the document
        }
        
        return D;
    }
    
    /// @brief Return the k-th word in the m-th sentence of the n-th paragraph
    /// @param document the document
    /// @param k the words
    /// @param m the sentences
    /// @param n the paragraphs
    /// @return word struct
    struct word kth_word_in_mth_sentence_of_nth_paragraph(struct document Doc, int k, int m, int n) {
        return Doc.data[n - 1].data[m - 1].data[k - 1];
    }
    
    /// @brief Return the k-th sentence in the m-th paragraph
    /// @param document the document
    /// @param k the sentences
    /// @param m the paragraphs
    /// @return sentence struct
    struct sentence kth_sentence_in_mth_paragraph(struct document Doc, int k, int m) { 
        return Doc.data[m - 1].data[k - 1];
    }
    
    /// @brief Return the k-th paragraph
    /// @param document the document
    /// @param k the paragraphs
    /// @return paragraph struct
    struct paragraph kth_paragraph(struct document Doc, int k) {
        return Doc.data[k - 1];
    }
    
  • + 0 comments

    not the prettiest code i've ever written

    void append_word(struct word *tmp_word, int *letter_count, char ch){
        tmp_word->data = realloc(tmp_word->data, 
            sizeof(char)*(*letter_count+1));
        tmp_word->data[*letter_count] = ch;
        *letter_count += 1;
    }
    
    void reset_word(struct word *tmp_word, int *letter_count){
        tmp_word->data = malloc(sizeof(char));
        *letter_count = 0;
    }
    
    void append_sentence(struct sentence *tmp_sentence, struct word *tmp_word){
        tmp_sentence->data = realloc(tmp_sentence->data, 
            sizeof(struct word)*(tmp_sentence->word_count+1));
        tmp_sentence->data[tmp_sentence->word_count] = *tmp_word;
        tmp_sentence->word_count++;
    }
    
    void reset_sentence(struct sentence *tmp_sentence){
        tmp_sentence->data = malloc(sizeof(struct word));
        tmp_sentence->word_count = 0;
    }
    
    void append_paragraph(struct paragraph *tmp_paragraph, struct sentence *tmp_sentence){
        tmp_paragraph->data = realloc(tmp_paragraph->data, 
            sizeof(struct sentence)*(tmp_paragraph->sentence_count+1));
        tmp_paragraph->data[tmp_paragraph->sentence_count] = *tmp_sentence;
        tmp_paragraph->sentence_count++;
    }
    
    void reset_paragraph(struct paragraph *tmp_paragraph){
        tmp_paragraph->data = malloc(sizeof(struct sentence));
        tmp_paragraph->sentence_count = 0;
    }
    
    void append_document(struct document *tmp_document, struct paragraph *tmp_paragraph){
        tmp_document->data = realloc(tmp_document->data, 
            sizeof(struct paragraph)*(tmp_document->paragraph_count+1));
        tmp_document->data[tmp_document->paragraph_count] = *tmp_paragraph;
        tmp_document->paragraph_count++;
    }
    
    struct document get_document(char* text) {
        //get length of text
        int text_length = strlen(text);
        
        struct word *tmp_word;
        int letters;
        tmp_word = malloc(sizeof(struct word));
        reset_word(tmp_word, &letters);
        
        struct sentence *tmp_sentence;
        tmp_sentence = malloc(sizeof(struct sentence));
        reset_sentence(tmp_sentence);
        
        struct paragraph *tmp_paragraph;
        tmp_paragraph = malloc(sizeof(struct paragraph));
        reset_paragraph(tmp_paragraph);
        
        struct document *tmp_document;
        tmp_document = malloc(sizeof(struct document));
        tmp_document->paragraph_count = 0;
        tmp_document->data = malloc(sizeof(struct sentence)); //paragraph[0]
        
        
        //iterate over the text
        for(int i=0; i<text_length; i++){ //condense by i<strlen(text)
            char ch = text[i];
            if((ch >= 'a' && ch <= 'z') || 
                (ch >= 'A' && ch <= 'Z')){
                //found letter
                
                //put into word
                append_word(tmp_word, &letters, ch);
            } else {
                //found end of word   
                if(letters>0){     
                    //terminate word string
                    tmp_word->data[letters] = '\0'; 
                    //put word into current sentence     
                    append_sentence(tmp_sentence, tmp_word);           
                    //clean tmp word
                    reset_word(tmp_word, &letters);
                }
    
                //check for end of sentence or paragraph
                if (ch == ' '){ //is word
                
                } else if (ch == '\n'){ 
                    //found end of paragraph
                    //put into document
                    append_document(tmp_document, tmp_paragraph);
                    reset_paragraph(tmp_paragraph);
                }
                else { 
                    //found end of sentence
                    //put into current paragraph
                    append_paragraph(tmp_paragraph, tmp_sentence);
                    reset_sentence(tmp_sentence);
                }
            }
        }
        //end of document
        append_document(tmp_document, tmp_paragraph);
        
        return *tmp_document;
    }
    
    struct paragraph kth_paragraph(struct document Doc, int k) {
        return Doc.data[k-1];
    }
    
    struct sentence kth_sentence_in_mth_paragraph(struct document Doc, int k, int m) { 
        return Doc.data[m-1].data[k-1];
    }
    
    struct word kth_word_in_mth_sentence_of_nth_paragraph(struct document Doc, int k, int m, int n) {
        return Doc.data[n-1].data[m-1].data[k-1];
    }
    
  • + 0 comments

    for (int i = 0; i < len; i++) { free(*(arr + i)); } free(arr); }

    char ** split(char * s, const char delim, int * count) { char ** split_s = NULL; char * part = NULL; unsigned int idx_split_s = 0; unsigned int idx_part = 0;

    char c = '\0'; for (unsigned int i = 0; *(s + i) != '\0'; i++) { c = *(s + i);

    if (c == delim && idx_part != 0) {
      // mark the end of part string
      part = realloc(part, sizeof(char) * (idx_part + 1));
      *(part + idx_part) = '\0';
    
      // append part to split_s
      split_s = realloc(split_s, sizeof(char*) * (idx_split_s + 1));
      *(split_s + idx_split_s) = part;
      idx_split_s++;
    
      // clear part string values
      part = NULL;
      idx_part = 0;
    }
    else {
      // append character to part
      part = realloc(part, sizeof(char) * (idx_part + 1));
      *(part + idx_part) = c;
      idx_part++;
    }
    

    }

    // append last part to split_s if (idx_part != 0) { // mark the end of part string part = realloc(part, sizeof(char) * (idx_part + 1)); *(part + idx_part) = '\0';

    // append part to split_s
    split_s = realloc(split_s, sizeof(char*) * (idx_split_s + 1));
    *(split_s + idx_split_s) = part;
    idx_split_s++;
    

    }

    *count = idx_split_s;

    return split_s; }

    struct document get_document(char* text) { int * paragraph_count = calloc(1, sizeof(int)); int * sentence_count = calloc(1, sizeof(int)); int * word_count = calloc(1, sizeof(int));

    // split into paragraphs and store temporarily char ** paragraphs_temp = split(text, '\n', paragraph_count);

    struct document document; document.paragraph_count = *paragraph_count; // allocate space for paragraphs in document document.data = malloc(*paragraph_count * sizeof(struct paragraph));

    char * s = NULL; for (int i = 0; i < *paragraph_count; i++) { s = *(paragraphs_temp + i); // get current paragraph

    // split into sentences and store temporarily
    char ** sentences_temp = split(s, '.', sentence_count);
    
    struct paragraph paragraph;
    paragraph.sentence_count = *sentence_count;
    // allocate space for sentences in each paragraph
    paragraph.data = malloc(*sentence_count * sizeof(struct sentence));
    
    for (int j = 0; j < *sentence_count; j++) {
      s = *(sentences_temp + j);  // get current sentence
    
      // split into words and store temporarily
      char ** words_temp = split(s, ' ', word_count);
    
      // *(*(document + i) + j) = malloc(*word_count * sizeof(char*));
      struct sentence sentence;
      sentence.word_count = *word_count;
      // allocate space for words in each sentence
      sentence.data = malloc(*word_count * sizeof(struct word));
    
      for (int k = 0; k < *word_count; k++) {
        s = *(words_temp + k);  // get current word
    
        struct word word;
        word.data = s;
    
        *(sentence.data + k) = word;
      } // end of word_count loop
    
      *(paragraph.data + j) = sentence;
    
      free(words_temp);
    } // end of sentence_count loop
    
    *(document.data + i) = paragraph;
    
    free_ptr_ptr(sentences_temp, *sentence_count);
    

    } // end of paragraph_count loop

    free_ptr_ptr(paragraphs_temp, *paragraph_count);

    free(word_count); free(sentence_count); free(paragraph_count);

    return document; }

    struct word kth_word_in_mth_sentence_of_nth_paragraph(struct document Doc, int k, int m, int n) { // return ((((*(Doc.data + --n)).data + --m)).data + --k);

    struct paragraph nth = *(Doc.data + --n);
    struct sentence mth = *(nth.data + --m);
    struct word kth = *(mth.data + --k);
    
    return kth;
    

    }

    struct sentence kth_sentence_in_mth_paragraph(struct document Doc, int k, int m) { // return (((Doc.data + --m)).data + --k);

    struct paragraph mth = *(Doc.data + --m);
    struct sentence kth = *(mth.data + --k);
    
    return kth;
    

    }

    struct paragraph kth_paragraph(struct document Doc, int k) { return *(Doc.data + --k); }