Sort by

recency

|

180 Discussions

|

3 months ago+ 0 comments

My initial solution of simply searching through from the minimum possible length to the three-quarters of the length of gene (the maximum possible length of a sub-string requiring replace which would occur all the letters where the same; like a string of all 'A's) did not work; it was too slow for the later tests cases. It seems that other people have created sufficiently performant solutions using this approach in C++ but, I couldn't get it to work in Python. So, I binary-searched for the answer instead

`def works (o, L, l, dd): c = { 'A': 0, 'C': 0, 'G': 0, 'T': 0, }

c['A'] = o['A']
c['C'] = o['C']
c['G'] = o['G']
c['T'] = o['T']

i = 0 
while i < L - l:
    if c['A'] >= dd['A'] and c['C'] >= dd['C'] and c['G'] >= dd['G'] and c['T'] >= dd['T']:
        return True

    c[gene[i]] -= 1
    c[gene[i+l]] += 1
    i += 1

return False

def steadyGene(gene): L = len(gene) d = { 'A': 0, 'C': 0, 'G': 0, 'T': 0 }

for g in gene:
    d[g] += 1

dd = {
    'A': 0,
    'C': 0,
    'G': 0,
    'T': 0
}

ml = 0
for c, v in d.items():
    ddd = max(0, v - L // 4)
    dd[c] = ddd
    ml += ddd 

low, high = ml, 3*L//4 
while low < high:
    mid = (low + high) // 2

    o = {
        'A': 0,
        'C': 0,
        'G': 0,
        'T': 0
    }

    for s in range(0, mid):
        o[gene[s]] += 1

    if works(o, L, mid, dd):
        high = mid
    else:
        low = mid + 1

return low`

4 months ago+ 1 comment

If you imagine us initially having every character in the gene, we can 'reverse' the problem to try and remove the fewest characters possible, while still having the other characters be below the required limit of . This transforms the problem into a classic sliding window, where we need to find the smallest window to 'remove', such that the characters outside of the window are still valid.

def steadyGene(gene):
    N = len(gene)
    goal = N // 4
    curr = Counter(gene)
    ans = N
    l = 0
    for r in range(N):
        curr[gene[r]] -= 1
        while l < N and max(curr.values()) <= goal:
            ans = min(ans, r - l + 1)
            curr[gene[l]] += 1
            l += 1
    return ans

8 months ago+ 0 comments

include

using namespace std;

int test(int n, string s) { int required_count = n / 4; map mp; for (char it : s) { mp[it]++; } bool ok = true; for (auto tam : mp) { if (tam.second > required_count) { ok = false; break; } } if (ok) return 0; int min_val = n; int start = 0; for (int end = 0; end < n; end++) { mp[s[end]]--; while (mp['A'] <= required_count && mp['C'] <= required_count && mp['G'] <= required_count && mp['T'] <= required_count) { min_val = min(min_val, end - start + 1); mp[s[start]]++; start++; } } return min_val; }

int main() { int n; cin >> n; string s; cin >> s; cout << test(n, s) << endl; return 0; }

9 months ago+ 0 comments

O(n) cpp solution, using two indices to keep track of min substrings, and pushing both indices until we find a new valid substring.

bool isSatisfied(std::map<char, int> reqs, std::map<char, int> vals) {
    for (auto e : reqs) {
        if (vals[e.first] < e.second) {
            return false;
        }
    }
    return true;
}

int steadyGene(string gene) {
    // each gene occurs exactly n/4 times, so we know how many subs we need for each letter.
    // once a gene is subbed, it becomes a "wildcard" that can match any gene.
    // because of this we only need to change the genes are overrepresented, and they can fill
    // any of the underrepresented genes, we don't need to calculate that.
    std::map<char, int> gene_cnt = {};
    gene_cnt['A'] = 0;
    gene_cnt['C'] = 0;
    gene_cnt['G'] = 0;
    gene_cnt['T'] = 0;
    int target_cnt = gene.length()/4;
    // iterate through the input once to build the excess map
    for (int i = 0; i < gene.length(); i++) {
        char c = gene[i];
        if (gene_cnt.find(c) != gene_cnt.end()) {
            gene_cnt[c]++;
        }
    }
    // Now the gene_cnt contains the excess chars required subs to make a gene steady.
    std::map<char, int> rolling_cnt = {}, excess = {};
    for (auto entry : gene_cnt) {
        if (entry.second > target_cnt) {
            rolling_cnt[entry.first] = 0;
            excess[entry.first] = entry.second - target_cnt;
        }
    }
    // check base case of gene is already steady
    if (excess.empty()) {
        return 0;
    }
    int start_it = 0, end_it = 0;
    // find first instances, moves both iterators to starting positions at the first char we need to remove.
    while (start_it < gene.length()) {
        if (excess.find(gene[start_it]) != excess.end()) {
            rolling_cnt[gene[start_it]]++;
            end_it++;
            break;
        }
        start_it++;
        end_it++;
    }
    int min_cut = gene.length();
    // now move the end iterator through the string until we find a satisfactory cut, then push
    // the end iterator until the substring is unsatisfactory, etc until both iterators move through the string.
    while (end_it <= gene.length() && start_it < end_it) {
        if (!isSatisfied(excess, rolling_cnt)) {
            // push end_it if the substring is not satisfactory.
            end_it++;
            fflush(stdout);
            if (rolling_cnt.find(gene[end_it - 1]) != rolling_cnt.end()) {
                rolling_cnt[gene[end_it - 1]]++;
            }
        } else {
            // record a satisfactory string if it is a new min.
            if (min_cut > end_it - start_it) {
                min_cut = end_it - start_it;
            }
            // push the start_it to the next character we should remove and continue. The substr may
            // still be satisfactory if the initial char is overrepresented at the beginning of the
            // string.
            rolling_cnt[gene[start_it]]--;
            start_it++;
            while(rolling_cnt.find(gene[start_it]) == rolling_cnt.end() && start_it < end_it) {
                start_it++;
            }
        }
    }
    return min_cut;
}

9 months ago+ 0 comments

I find the optimal length of the substring to be replaced using binary search and for a given substring length I swipe through all possible substrings. The length of the substring to be replaced must be larger than the number of letters which I need to add to the rest of the string to compensate for the unequal number of letters in the latter. The solution scales as N*log(N)

 
const int N_letters = 4;

int my_map(char c)
{   
        if (c == 'A')
            return 0;
        if (c == 'C')
            return 1;
        if (c == 'T')
            return 2;
        if (c == 'G')
            return 3;
    
        return -1;
}


// Obtain array with the number of each kind of letters in the string

array<int , N_letters>  Get_letters_count(string gene)
{
    array <int, N_letters> arr = {};
    
    for (char c : gene)
    {
        arr[my_map(c)]++;
       
    }
    return arr;
}

// How many letters need to be replaced from looking at the rest of the string
int Get_num_of_letters_to_replace( array<int , N_letters> letters_count )
{
    int ans = 0;
    
    int max_lett = *max_element(letters_count.begin() , letters_count.end());
    
    for (auto a : letters_count)
    {
        ans += max_lett - a;    
    }
    
    return ans;
}

/////////////////////////////////////////////////////////


int steadyGene(string gene) 
{   
    int N  = gene.length();
    
    array<int , N_letters> letters_count = Get_letters_count( gene );
    int lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
    if (lett_to_replace == 0)
        return 0;
    
    
    // Start binary search (analogous to bisection method)
    int ans = N;
    int l_min = 0;
    int l_max = N;
    int l = int(N/2);
   
    while(l >= 1)
    {        
				// Look at the substring of length l starting at the beginning of the gene
        string s1 = ""; 
        string s2 = gene.substr(l, N-l);
            
        letters_count = Get_letters_count( s2 );
        lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
            
        
        for (int i = 0 ; i < N - l ; i++)
        {
				    // Shift the substring by 1 element and update it
            if (i > 0)
            {
                letters_count[my_map(gene[i-1])] ++;
                letters_count[my_map(gene[i+l-1])] --;
                lett_to_replace =  Get_num_of_letters_to_replace (letters_count);
            }
            
						// If I found the solution for the string of length l then try the solution for a shorter length
            if (l >= lett_to_replace)
            {
                ans = l;
                break;
            }
        }
        
        if (ans == l)
        {
            l_max = l;
            l = int( (l_min + l_max)/2 );
        }
				// If I did not find a solution of length l, try longer substring
        else
        {
            l_min = l;
            l = int(l_min + l_max)/2;
        }
        if (l_max - l_min <= 1)
            break;

        
    }

    return ans;
}

Sort by

|

180 Discussions

|

include

Cookie support is required to access HackerRank