Bear and Steady Gene

Sort by

recency

|

179 Discussions

|

  • + 1 comment

    If you imagine us initially having every character in the gene, we can 'reverse' the problem to try and remove the fewest characters possible, while still having the other characters be below the required limit of . This transforms the problem into a classic sliding window, where we need to find the smallest window to 'remove', such that the characters outside of the window are still valid.

    def steadyGene(gene):
        N = len(gene)
        goal = N // 4
        curr = Counter(gene)
        ans = N
        l = 0
        for r in range(N):
            curr[gene[r]] -= 1
            while l < N and max(curr.values()) <= goal:
                ans = min(ans, r - l + 1)
                curr[gene[l]] += 1
                l += 1
        return ans
    
  • + 0 comments

    include

    using namespace std;

    int test(int n, string s) { int required_count = n / 4; map mp; for (char it : s) { mp[it]++; } bool ok = true; for (auto tam : mp) { if (tam.second > required_count) { ok = false; break; } } if (ok) return 0; int min_val = n; int start = 0; for (int end = 0; end < n; end++) { mp[s[end]]--; while (mp['A'] <= required_count && mp['C'] <= required_count && mp['G'] <= required_count && mp['T'] <= required_count) { min_val = min(min_val, end - start + 1); mp[s[start]]++; start++; } } return min_val; }

    int main() { int n; cin >> n; string s; cin >> s; cout << test(n, s) << endl; return 0; }

  • + 0 comments

    O(n) cpp solution, using two indices to keep track of min substrings, and pushing both indices until we find a new valid substring.

    bool isSatisfied(std::map<char, int> reqs, std::map<char, int> vals) {
        for (auto e : reqs) {
            if (vals[e.first] < e.second) {
                return false;
            }
        }
        return true;
    }
    
    int steadyGene(string gene) {
        // each gene occurs exactly n/4 times, so we know how many subs we need for each letter.
        // once a gene is subbed, it becomes a "wildcard" that can match any gene.
        // because of this we only need to change the genes are overrepresented, and they can fill
        // any of the underrepresented genes, we don't need to calculate that.
        std::map<char, int> gene_cnt = {};
        gene_cnt['A'] = 0;
        gene_cnt['C'] = 0;
        gene_cnt['G'] = 0;
        gene_cnt['T'] = 0;
        int target_cnt = gene.length()/4;
        // iterate through the input once to build the excess map
        for (int i = 0; i < gene.length(); i++) {
            char c = gene[i];
            if (gene_cnt.find(c) != gene_cnt.end()) {
                gene_cnt[c]++;
            }
        }
        // Now the gene_cnt contains the excess chars required subs to make a gene steady.
        std::map<char, int> rolling_cnt = {}, excess = {};
        for (auto entry : gene_cnt) {
            if (entry.second > target_cnt) {
                rolling_cnt[entry.first] = 0;
                excess[entry.first] = entry.second - target_cnt;
            }
        }
        // check base case of gene is already steady
        if (excess.empty()) {
            return 0;
        }
        int start_it = 0, end_it = 0;
        // find first instances, moves both iterators to starting positions at the first char we need to remove.
        while (start_it < gene.length()) {
            if (excess.find(gene[start_it]) != excess.end()) {
                rolling_cnt[gene[start_it]]++;
                end_it++;
                break;
            }
            start_it++;
            end_it++;
        }
        int min_cut = gene.length();
        // now move the end iterator through the string until we find a satisfactory cut, then push
        // the end iterator until the substring is unsatisfactory, etc until both iterators move through the string.
        while (end_it <= gene.length() && start_it < end_it) {
            if (!isSatisfied(excess, rolling_cnt)) {
                // push end_it if the substring is not satisfactory.
                end_it++;
                fflush(stdout);
                if (rolling_cnt.find(gene[end_it - 1]) != rolling_cnt.end()) {
                    rolling_cnt[gene[end_it - 1]]++;
                }
            } else {
                // record a satisfactory string if it is a new min.
                if (min_cut > end_it - start_it) {
                    min_cut = end_it - start_it;
                }
                // push the start_it to the next character we should remove and continue. The substr may
                // still be satisfactory if the initial char is overrepresented at the beginning of the
                // string.
                rolling_cnt[gene[start_it]]--;
                start_it++;
                while(rolling_cnt.find(gene[start_it]) == rolling_cnt.end() && start_it < end_it) {
                    start_it++;
                }
            }
        }
        return min_cut;
    }
    
  • + 0 comments

    I find the optimal length of the substring to be replaced using binary search and for a given substring length I swipe through all possible substrings. The length of the substring to be replaced must be larger than the number of letters which I need to add to the rest of the string to compensate for the unequal number of letters in the latter. The solution scales as N*log(N)

     
    const int N_letters = 4;
    
    int my_map(char c)
    {   
            if (c == 'A')
                return 0;
            if (c == 'C')
                return 1;
            if (c == 'T')
                return 2;
            if (c == 'G')
                return 3;
        
            return -1;
    }
    
    
    // Obtain array with the number of each kind of letters in the string
    
    array<int , N_letters>  Get_letters_count(string gene)
    {
        array <int, N_letters> arr = {};
        
        for (char c : gene)
        {
            arr[my_map(c)]++;
           
        }
        return arr;
    }
    
    // How many letters need to be replaced from looking at the rest of the string
    int Get_num_of_letters_to_replace( array<int , N_letters> letters_count )
    {
        int ans = 0;
        
        int max_lett = *max_element(letters_count.begin() , letters_count.end());
        
        for (auto a : letters_count)
        {
            ans += max_lett - a;    
        }
        
        return ans;
    }
    
    /////////////////////////////////////////////////////////
    
    
    int steadyGene(string gene) 
    {   
        int N  = gene.length();
        
        array<int , N_letters> letters_count = Get_letters_count( gene );
        int lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
        if (lett_to_replace == 0)
            return 0;
        
        
        // Start binary search (analogous to bisection method)
        int ans = N;
        int l_min = 0;
        int l_max = N;
        int l = int(N/2);
       
        while(l >= 1)
        {        
    				// Look at the substring of length l starting at the beginning of the gene
            string s1 = ""; 
            string s2 = gene.substr(l, N-l);
                
            letters_count = Get_letters_count( s2 );
            lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
                
            
            for (int i = 0 ; i < N - l ; i++)
            {
    				    // Shift the substring by 1 element and update it
                if (i > 0)
                {
                    letters_count[my_map(gene[i-1])] ++;
                    letters_count[my_map(gene[i+l-1])] --;
                    lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
                }
                
    						// If I found the solution for the string of length l then try the solution for a shorter length
                if (l >= lett_to_replace)
                {
                    ans = l;
                    break;
                }
            }
            
            if (ans == l)
            {
                l_max = l;
                l = int( (l_min + l_max)/2 );
            }
    				// If I did not find a solution of length l, try longer substring
            else
            {
                l_min = l;
                l = int(l_min + l_max)/2;
            }
            if (l_max - l_min <= 1)
                break;
    
            
        }
    
        return ans;
    }
    
  • + 0 comments

    thought this was a dynamic programming question, but turns out its just a simple log time search at each iteration algo.

    O(n*log(n)) using lower bound

    int steadyGene(int n, const string& gene) {
        int result = n;
        vector<int> A = {0}, C = {0}, T = {0}, G = {0};
        for (int i=0; i < n; i++) {
            A.push_back(A.back() + (gene[i] == 'A' ? 1 : 0));
            C.push_back(C.back() + (gene[i] == 'C' ? 1 : 0));
            T.push_back(T.back() + (gene[i] == 'T' ? 1 : 0));
            G.push_back(G.back() + (gene[i] == 'G' ? 1 : 0));
        }
        if (A.back() == n/4 and C.back() == n/4 and T.back() == n/4 and G.back() == n/4) return 0;
        for (int i=1; i <= n; i++) {
            auto itA = lower_bound(A.begin()+i, A.end(), A.back()+A[i-1]-n/4);
            auto itC = lower_bound(C.begin()+i, C.end(), C.back()+C[i-1]-n/4);
            auto itT = lower_bound(T.begin()+i, T.end(), T.back()+T[i-1]-n/4);
            auto itG = lower_bound(G.begin()+i, G.end(), G.back()+G[i-1]-n/4);
            if (itA == A.end() or itC == C.end() or itT == T.end() or itG == G.end()) break;
            int j = max({distance(A.begin(), itA), distance(C.begin(), itC), distance(T.begin(), itT), distance(G.begin(), itG)});
            result = min(result, j-i+1);
        }
        return result;
    }