Bear and Steady Gene

  • + 0 comments

    I find the optimal length of the substring to be replaced using binary search and for a given substring length I swipe through all possible substrings. The length of the substring to be replaced must be larger than the number of letters which I need to add to the rest of the string to compensate for the unequal number of letters in the latter. The solution scales as N*log(N)

     
    const int N_letters = 4;
    
    int my_map(char c)
    {   
            if (c == 'A')
                return 0;
            if (c == 'C')
                return 1;
            if (c == 'T')
                return 2;
            if (c == 'G')
                return 3;
        
            return -1;
    }
    
    
    // Obtain array with the number of each kind of letters in the string
    
    array<int , N_letters>  Get_letters_count(string gene)
    {
        array <int, N_letters> arr = {};
        
        for (char c : gene)
        {
            arr[my_map(c)]++;
           
        }
        return arr;
    }
    
    // How many letters need to be replaced from looking at the rest of the string
    int Get_num_of_letters_to_replace( array<int , N_letters> letters_count )
    {
        int ans = 0;
        
        int max_lett = *max_element(letters_count.begin() , letters_count.end());
        
        for (auto a : letters_count)
        {
            ans += max_lett - a;    
        }
        
        return ans;
    }
    
    /////////////////////////////////////////////////////////
    
    
    int steadyGene(string gene) 
    {   
        int N  = gene.length();
        
        array<int , N_letters> letters_count = Get_letters_count( gene );
        int lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
        if (lett_to_replace == 0)
            return 0;
        
        
        // Start binary search (analogous to bisection method)
        int ans = N;
        int l_min = 0;
        int l_max = N;
        int l = int(N/2);
       
        while(l >= 1)
        {        
    				// Look at the substring of length l starting at the beginning of the gene
            string s1 = ""; 
            string s2 = gene.substr(l, N-l);
                
            letters_count = Get_letters_count( s2 );
            lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
                
            
            for (int i = 0 ; i < N - l ; i++)
            {
    				    // Shift the substring by 1 element and update it
                if (i > 0)
                {
                    letters_count[my_map(gene[i-1])] ++;
                    letters_count[my_map(gene[i+l-1])] --;
                    lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
                }
                
    						// If I found the solution for the string of length l then try the solution for a shorter length
                if (l >= lett_to_replace)
                {
                    ans = l;
                    break;
                }
            }
            
            if (ans == l)
            {
                l_max = l;
                l = int( (l_min + l_max)/2 );
            }
    				// If I did not find a solution of length l, try longer substring
            else
            {
                l_min = l;
                l = int(l_min + l_max)/2;
            }
            if (l_max - l_min <= 1)
                break;
    
            
        }
    
        return ans;
    }