Bear and Steady Gene

  • + 0 comments

    O(n) cpp solution, using two indices to keep track of min substrings, and pushing both indices until we find a new valid substring.

    bool isSatisfied(std::map<char, int> reqs, std::map<char, int> vals) {
        for (auto e : reqs) {
            if (vals[e.first] < e.second) {
                return false;
            }
        }
        return true;
    }
    
    int steadyGene(string gene) {
        // each gene occurs exactly n/4 times, so we know how many subs we need for each letter.
        // once a gene is subbed, it becomes a "wildcard" that can match any gene.
        // because of this we only need to change the genes are overrepresented, and they can fill
        // any of the underrepresented genes, we don't need to calculate that.
        std::map<char, int> gene_cnt = {};
        gene_cnt['A'] = 0;
        gene_cnt['C'] = 0;
        gene_cnt['G'] = 0;
        gene_cnt['T'] = 0;
        int target_cnt = gene.length()/4;
        // iterate through the input once to build the excess map
        for (int i = 0; i < gene.length(); i++) {
            char c = gene[i];
            if (gene_cnt.find(c) != gene_cnt.end()) {
                gene_cnt[c]++;
            }
        }
        // Now the gene_cnt contains the excess chars required subs to make a gene steady.
        std::map<char, int> rolling_cnt = {}, excess = {};
        for (auto entry : gene_cnt) {
            if (entry.second > target_cnt) {
                rolling_cnt[entry.first] = 0;
                excess[entry.first] = entry.second - target_cnt;
            }
        }
        // check base case of gene is already steady
        if (excess.empty()) {
            return 0;
        }
        int start_it = 0, end_it = 0;
        // find first instances, moves both iterators to starting positions at the first char we need to remove.
        while (start_it < gene.length()) {
            if (excess.find(gene[start_it]) != excess.end()) {
                rolling_cnt[gene[start_it]]++;
                end_it++;
                break;
            }
            start_it++;
            end_it++;
        }
        int min_cut = gene.length();
        // now move the end iterator through the string until we find a satisfactory cut, then push
        // the end iterator until the substring is unsatisfactory, etc until both iterators move through the string.
        while (end_it <= gene.length() && start_it < end_it) {
            if (!isSatisfied(excess, rolling_cnt)) {
                // push end_it if the substring is not satisfactory.
                end_it++;
                fflush(stdout);
                if (rolling_cnt.find(gene[end_it - 1]) != rolling_cnt.end()) {
                    rolling_cnt[gene[end_it - 1]]++;
                }
            } else {
                // record a satisfactory string if it is a new min.
                if (min_cut > end_it - start_it) {
                    min_cut = end_it - start_it;
                }
                // push the start_it to the next character we should remove and continue. The substr may
                // still be satisfactory if the initial char is overrepresented at the beginning of the
                // string.
                rolling_cnt[gene[start_it]]--;
                start_it++;
                while(rolling_cnt.find(gene[start_it]) == rolling_cnt.end() && start_it < end_it) {
                    start_it++;
                }
            }
        }
        return min_cut;
    }