We use cookies to ensure you have the best browsing experience on our website. Please read our cookie policy for more information about how we use cookies.
Works similarly to a normal tokenizer. Nothing crazy. Realloc is used instead of assuming the number of words/sentences/paragraphs in the document.
#define DEFAULT_LEN 8char*kth_word_in_mth_sentence_of_nth_paragraph(char****document,intk,intm,intn){returndocument[n-1][m-1][k-1];}char**kth_sentence_in_mth_paragraph(char****document,intk,intm){returndocument[m-1][k-1];}char***kth_paragraph(char****document,intk){returndocument[k-1];}// Parsing:voidinsert_char(char**word,int*word_len,charch){(*word_len)++;(*word)=realloc(*word,sizeof(char)*(*word_len));(*word)[*word_len-1]=ch;}intis_whitespace(charch){returnch==' '||ch=='\t';}voidtrim_whitespace(char*text,int*character){while(is_whitespace(text[*character])){(*character)++;}}intis_text(charch){return(ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z');}intis_sentence_terminator(charch){returnch=='.';}intis_paragraph_terminator(charch){returnch=='\n'||ch=='\0';}charnext_character(char*text,int*character){returntext[(*character)++];}char*next_word(char*text,int*character){char*word=malloc(DEFAULT_LEN*sizeof(char));intword_size=0;while(is_text(text[*character])){charch=next_character(text,character);insert_char(&word,&word_size,ch);}// Insert a zero to end the word.insert_char(&word,&word_size,0);returnword;}char**next_sentence(char*text,int*character){char**sentence=malloc(DEFAULT_LEN*sizeof(char*));intsentence_len=0;while(!is_sentence_terminator(text[*character])){trim_whitespace(text,character);char*word=next_word(text,character);sentence_len++;sentence=realloc(sentence,sentence_len*sizeof(char*));sentence[sentence_len-1]=word;}// Move past the period.next_character(text,character);returnsentence;}char***next_paragraph(char*text,int*character){char***paragraph=malloc(DEFAULT_LEN*sizeof(char**));intparagraph_len=0;while(!is_paragraph_terminator(text[*character])){char**sentence=next_sentence(text,character);paragraph_len++;paragraph=realloc(paragraph,paragraph_len*sizeof(char**));paragraph[paragraph_len-1]=sentence;}returnparagraph;}char****get_document(char*text){char****document=malloc(DEFAULT_LEN*sizeof(char***));intcharacter=0;intdoc_len=0;intlast_paragraph=0;while(!last_paragraph){char***paragraph=next_paragraph(text,&character);last_paragraph=next_character(text,&character)=='\0';doc_len++;document=realloc(document,doc_len*sizeof(char***));document[doc_len-1]=paragraph;}returndocument;}
Cookie support is required to access HackerRank
Seems like cookies are disabled on this browser, please enable them to open this website
Querying the Document
You are viewing a single comment's thread. Return to all comments →
Works similarly to a normal tokenizer. Nothing crazy. Realloc is used instead of assuming the number of words/sentences/paragraphs in the document.