Bear and Steady Gene

Sort by

recency

|

178 Discussions

|

  • + 0 comments

    include

    using namespace std;

    int test(int n, string s) { int required_count = n / 4; map mp; for (char it : s) { mp[it]++; } bool ok = true; for (auto tam : mp) { if (tam.second > required_count) { ok = false; break; } } if (ok) return 0; int min_val = n; int start = 0; for (int end = 0; end < n; end++) { mp[s[end]]--; while (mp['A'] <= required_count && mp['C'] <= required_count && mp['G'] <= required_count && mp['T'] <= required_count) { min_val = min(min_val, end - start + 1); mp[s[start]]++; start++; } } return min_val; }

    int main() { int n; cin >> n; string s; cin >> s; cout << test(n, s) << endl; return 0; }

  • + 0 comments

    O(n) cpp solution, using two indices to keep track of min substrings, and pushing both indices until we find a new valid substring.

    bool isSatisfied(std::map<char, int> reqs, std::map<char, int> vals) {
        for (auto e : reqs) {
            if (vals[e.first] < e.second) {
                return false;
            }
        }
        return true;
    }
    
    int steadyGene(string gene) {
        // each gene occurs exactly n/4 times, so we know how many subs we need for each letter.
        // once a gene is subbed, it becomes a "wildcard" that can match any gene.
        // because of this we only need to change the genes are overrepresented, and they can fill
        // any of the underrepresented genes, we don't need to calculate that.
        std::map<char, int> gene_cnt = {};
        gene_cnt['A'] = 0;
        gene_cnt['C'] = 0;
        gene_cnt['G'] = 0;
        gene_cnt['T'] = 0;
        int target_cnt = gene.length()/4;
        // iterate through the input once to build the excess map
        for (int i = 0; i < gene.length(); i++) {
            char c = gene[i];
            if (gene_cnt.find(c) != gene_cnt.end()) {
                gene_cnt[c]++;
            }
        }
        // Now the gene_cnt contains the excess chars required subs to make a gene steady.
        std::map<char, int> rolling_cnt = {}, excess = {};
        for (auto entry : gene_cnt) {
            if (entry.second > target_cnt) {
                rolling_cnt[entry.first] = 0;
                excess[entry.first] = entry.second - target_cnt;
            }
        }
        // check base case of gene is already steady
        if (excess.empty()) {
            return 0;
        }
        int start_it = 0, end_it = 0;
        // find first instances, moves both iterators to starting positions at the first char we need to remove.
        while (start_it < gene.length()) {
            if (excess.find(gene[start_it]) != excess.end()) {
                rolling_cnt[gene[start_it]]++;
                end_it++;
                break;
            }
            start_it++;
            end_it++;
        }
        int min_cut = gene.length();
        // now move the end iterator through the string until we find a satisfactory cut, then push
        // the end iterator until the substring is unsatisfactory, etc until both iterators move through the string.
        while (end_it <= gene.length() && start_it < end_it) {
            if (!isSatisfied(excess, rolling_cnt)) {
                // push end_it if the substring is not satisfactory.
                end_it++;
                fflush(stdout);
                if (rolling_cnt.find(gene[end_it - 1]) != rolling_cnt.end()) {
                    rolling_cnt[gene[end_it - 1]]++;
                }
            } else {
                // record a satisfactory string if it is a new min.
                if (min_cut > end_it - start_it) {
                    min_cut = end_it - start_it;
                }
                // push the start_it to the next character we should remove and continue. The substr may
                // still be satisfactory if the initial char is overrepresented at the beginning of the
                // string.
                rolling_cnt[gene[start_it]]--;
                start_it++;
                while(rolling_cnt.find(gene[start_it]) == rolling_cnt.end() && start_it < end_it) {
                    start_it++;
                }
            }
        }
        return min_cut;
    }
    
  • + 0 comments

    I find the optimal length of the substring to be replaced using binary search and for a given substring length I swipe through all possible substrings. The length of the substring to be replaced must be larger than the number of letters which I need to add to the rest of the string to compensate for the unequal number of letters in the latter. The solution scales as N*log(N)

     
    const int N_letters = 4;
    
    int my_map(char c)
    {   
            if (c == 'A')
                return 0;
            if (c == 'C')
                return 1;
            if (c == 'T')
                return 2;
            if (c == 'G')
                return 3;
        
            return -1;
    }
    
    
    // Obtain array with the number of each kind of letters in the string
    
    array<int , N_letters>  Get_letters_count(string gene)
    {
        array <int, N_letters> arr = {};
        
        for (char c : gene)
        {
            arr[my_map(c)]++;
           
        }
        return arr;
    }
    
    // How many letters need to be replaced from looking at the rest of the string
    int Get_num_of_letters_to_replace( array<int , N_letters> letters_count )
    {
        int ans = 0;
        
        int max_lett = *max_element(letters_count.begin() , letters_count.end());
        
        for (auto a : letters_count)
        {
            ans += max_lett - a;    
        }
        
        return ans;
    }
    
    /////////////////////////////////////////////////////////
    
    
    int steadyGene(string gene) 
    {   
        int N  = gene.length();
        
        array<int , N_letters> letters_count = Get_letters_count( gene );
        int lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
        if (lett_to_replace == 0)
            return 0;
        
        
        // Start binary search (analogous to bisection method)
        int ans = N;
        int l_min = 0;
        int l_max = N;
        int l = int(N/2);
       
        while(l >= 1)
        {        
    				// Look at the substring of length l starting at the beginning of the gene
            string s1 = ""; 
            string s2 = gene.substr(l, N-l);
                
            letters_count = Get_letters_count( s2 );
            lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
                
            
            for (int i = 0 ; i < N - l ; i++)
            {
    				    // Shift the substring by 1 element and update it
                if (i > 0)
                {
                    letters_count[my_map(gene[i-1])] ++;
                    letters_count[my_map(gene[i+l-1])] --;
                    lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
                }
                
    						// If I found the solution for the string of length l then try the solution for a shorter length
                if (l >= lett_to_replace)
                {
                    ans = l;
                    break;
                }
            }
            
            if (ans == l)
            {
                l_max = l;
                l = int( (l_min + l_max)/2 );
            }
    				// If I did not find a solution of length l, try longer substring
            else
            {
                l_min = l;
                l = int(l_min + l_max)/2;
            }
            if (l_max - l_min <= 1)
                break;
    
            
        }
    
        return ans;
    }
    
  • + 0 comments

    thought this was a dynamic programming question, but turns out its just a simple log time search at each iteration algo.

    O(n*log(n)) using lower bound

    int steadyGene(int n, const string& gene) {
        int result = n;
        vector<int> A = {0}, C = {0}, T = {0}, G = {0};
        for (int i=0; i < n; i++) {
            A.push_back(A.back() + (gene[i] == 'A' ? 1 : 0));
            C.push_back(C.back() + (gene[i] == 'C' ? 1 : 0));
            T.push_back(T.back() + (gene[i] == 'T' ? 1 : 0));
            G.push_back(G.back() + (gene[i] == 'G' ? 1 : 0));
        }
        if (A.back() == n/4 and C.back() == n/4 and T.back() == n/4 and G.back() == n/4) return 0;
        for (int i=1; i <= n; i++) {
            auto itA = lower_bound(A.begin()+i, A.end(), A.back()+A[i-1]-n/4);
            auto itC = lower_bound(C.begin()+i, C.end(), C.back()+C[i-1]-n/4);
            auto itT = lower_bound(T.begin()+i, T.end(), T.back()+T[i-1]-n/4);
            auto itG = lower_bound(G.begin()+i, G.end(), G.back()+G[i-1]-n/4);
            if (itA == A.end() or itC == C.end() or itT == T.end() or itG == G.end()) break;
            int j = max({distance(A.begin(), itA), distance(C.begin(), itC), distance(T.begin(), itT), distance(G.begin(), itG)});
            result = min(result, j-i+1);
        }
        return result;
    }
    
  • + 0 comments

    Haskell:

    module Main where
    
    import qualified Data.Map as M
    import Data.List (find)
    import qualified Data.Vector  as V
    
    type Counts = M.Map Char Int
    type Window = (Int, Int, Counts)
    
    symbols :: [Char]
    symbols = ['A', 'C', 'G', 'T']
    
    initCounts :: Counts
    initCounts = M.fromList [(c, 0) | c <- symbols]
    
    getCounts :: String -> Counts
    getCounts = foldl (\m c -> M.insertWith (+) c 1 m) initCounts
    
    extras :: String -> Maybe Counts
    extras [] = Nothing
    extras s =
        let
            n = length s `div` 4  -- we're asured it's a multiple of 4
            counts = getCounts s
            adjusted = M.map (\x -> x - n) counts
            es = M.filter (> 0) adjusted
         in
            case length es of
                0 -> Nothing  -- already balanced
                _ -> Just es  -- work to do
    
    satisfied :: Counts -> Counts -> Bool
    satisfied required actual = all (\c -> actual M.! c >= required M.! c) (M.keys required)
    
    soln0 :: Counts -> String -> Maybe (Int, Counts)
    soln0 es s = do
        let dists = drop 1 $ scanl (\m c -> M.insertWith (+) c 1 m) initCounts s
        let indexedDists = zip [0..] dists
        find (\(j, d) -> satisfied es d) indexedDists
    
    slideUntilSuccess :: V.Vector Char -> Counts -> Window -> Maybe Window
    slideUntilSuccess v required (i, j, counts) = do
        addC <- v V.!? (j + 1)  -- return Nothing if j + 1 is out of bounds
        let cminus = M.insertWith (+) (v V.! i) (-1) counts
        let counts' = M.insertWith (+) addC 1 cminus
        (if satisfied required counts' then Just (i + 1, j + 1, counts') else slideUntilSuccess v required (i + 1, j + 1, counts'))
    
    shrinkUntilFail :: V.Vector Char -> Counts -> Window -> Window
    shrinkUntilFail v required (i, j, counts) =
        if satisfied required counts
            then let counts' = M.insertWith (+) (v V.! i) (-1) counts
                in shrinkUntilFail v required (i + 1, j, counts')
            else (i, j, counts)
    
    runner :: V.Vector Char -> Counts -> Window -> Window
    runner v required w = do
        let w' = shrinkUntilFail v required w
        let w'' = slideUntilSuccess v required w'
        case w'' of
            Nothing -> w'
            Just ws -> runner v required ws
    
    solve :: String -> Maybe Window
    solve s = do
        es <- extras s
        (j0, dist0) <- soln0 es s
        let v = V.fromList s
        return $ runner v es (0, j0, dist0)
    
    main :: IO ()
    main = do
        n <- readLn :: IO Int
        s <- getLine
        case solve s of
            Nothing -> putStrLn "0"
            Just (i, j, _) -> print (j-i+2)