Bear and Steady Gene

  • + 0 comments

    thought this was a dynamic programming question, but turns out its just a simple log time search at each iteration algo.

    O(n*log(n)) using lower bound

    int steadyGene(int n, const string& gene) {
        int result = n;
        vector<int> A = {0}, C = {0}, T = {0}, G = {0};
        for (int i=0; i < n; i++) {
            A.push_back(A.back() + (gene[i] == 'A' ? 1 : 0));
            C.push_back(C.back() + (gene[i] == 'C' ? 1 : 0));
            T.push_back(T.back() + (gene[i] == 'T' ? 1 : 0));
            G.push_back(G.back() + (gene[i] == 'G' ? 1 : 0));
        }
        if (A.back() == n/4 and C.back() == n/4 and T.back() == n/4 and G.back() == n/4) return 0;
        for (int i=1; i <= n; i++) {
            auto itA = lower_bound(A.begin()+i, A.end(), A.back()+A[i-1]-n/4);
            auto itC = lower_bound(C.begin()+i, C.end(), C.back()+C[i-1]-n/4);
            auto itT = lower_bound(T.begin()+i, T.end(), T.back()+T[i-1]-n/4);
            auto itG = lower_bound(G.begin()+i, G.end(), G.back()+G[i-1]-n/4);
            if (itA == A.end() or itC == C.end() or itT == T.end() or itG == G.end()) break;
            int j = max({distance(A.begin(), itA), distance(C.begin(), itC), distance(T.begin(), itT), distance(G.begin(), itG)});
            result = min(result, j-i+1);
        }
        return result;
    }