Day 7: Spearman's Rank Correlation Coefficient

Sort by

recency

|

96 Discussions

|

  • + 0 comments

    To solve in C++, and also not to fall into spaghetti, I defined quite a few helper functions.

    #include <cstdio>
    #include <vector>
    #include <set>
    #include <iostream>
    #include <iomanip>
    #include <algorithm>
    using namespace std;
    
    float mean(vector<float> data){
        auto length = data.size();
        float res{0};
        for(int i = 0; i<length; i++){
            res+=data[i];
        }
        return res / length;
    }
    
    float covariance(vector<float>x,vector<float>y){
        int n = x.size();
        float mx{mean(x)}, my{mean(y)};
        float res{0};
        for(int i=0;i<n;i++){
            res+=(x[i]-mx)*(y[i]-my);
        }
        return res / n;
    }
    
    float sdev(vector<float>d){
        int n = d.size();
        float md{mean(d)};
        float res{0};
        for(int i=0;i<n;i++){
            res+=pow((d[i]-md),2);
        }
        return sqrt(res/n);
    }
    
    float pearson(vector<float>x, vector<float>y){
        return covariance(x, y) / (sdev(x)*sdev(y));
    }
    
    int set_index(set<float> s, float val)
    {
        int i = 1;
     
        for (auto u : s) {
            if (u == val){
               return i; 
            }   
            i++;
        }
        return -1;
    }
    
    vector<float>rank_vector(vector<float>d){
        set<float>s;
        for(auto el : d){
            s.insert(el);
        }
        int n = d.size();
        vector<float>res(n);
        for(int i = 0; i<n; i++){
            res[i]=(float)set_index(s, d[i]);
        }
        return res;
    }
    
    
    int main() {
        int n{};
        cin >> n;
        vector<float> x(n), y(n);
        for(int i =0; i < n; i++){
            cin >> x[i];
        }
        for(int i =0; i < n; i++){
            cin >> y[i];
        }
        cout << fixed << setprecision(3)
             << pearson(rank_vector(x), rank_vector(y)) << endl;  
        return 0;
    }
    
  • + 1 comment
    #python
    n=int(input())
    X=list(map(float, input().split()))
    Y=list(map(float, input().split()))
    res=[(sorted(X).index(X[i])-sorted(Y).index(Y[i]))**2 for i in range(n)]
    srcc=1-(6*sum(res)/(n*(n**2-1)))
    print(round(srcc, 3))
    
  • + 0 comments

    C++

    #include <cmath>
    #include <cstdio>
    #include <vector>
    #include <iostream>
    #include <algorithm>
    #include <iomanip>
    using namespace std;
    
    int getIndexOf(double a, vector<double> y)
    {
        int count=1;
        for (int j=0; j<y.size(); ++j)
        {
            if (a==y[j])
                break;
            else
                count++;
        }
        return count;
    }
    
    vector<int> getRank(vector<double> x)
    {
        vector<int> xVar(x.size());
        vector<double> x_sorted(x.size());
        partial_sort_copy(begin(x), end(x), begin(x_sorted), end(x_sorted));
        for(int i=0; i<x.size(); ++i)
        {
            int index=getIndexOf(x[i], x_sorted);
            xVar[i]=index;
        }
        return xVar;
    }
    
    int main() {
        /* Enter your code here. Read input from STDIN. Print output to STDOUT */   
        int n;
        cin>>n;
        
        vector<double> x,y;
        double num=0;
        for(int i=0; i<n; ++i){
            cin>>num;
            x.push_back(num);
        }
        
        for(int i=0; i<n; ++i){
            cin>>num;
            y.push_back(num);
        }
        vector<int> rankX=getRank(x);
        vector<int> rankY=getRank(y);
        
        double r=0;
        for (int i=0; i<n; ++i)
        {
            r+=pow((rankX[i]-rankY[i]),2.);
        }
        cout<<fixed<<setprecision(3);
        cout<<1-6*r/(n*n*n-n);
        return 0;
    }
    
  • + 0 comments
    n = int(input())
    X = list(map(float, input().split()))
    Y = list(map(float, input().split()))
    
    X_sort = sorted(X)
    Y_sort = sorted(Y)
    
    rank_X = []
    rank_Y = []
    
    for i in range(n):
        for j in range(n):
            if X[i] == X_sort[j]:
                rank_X.append(j+1)
            if Y[i] == Y_sort[j]:
                rank_Y.append(j+1)
    
    d = 0
    for i in range(n):
        d += pow((rank_X[i] - rank_Y[i]), 2)
        
    Spearman_rank_coef = 1 - 6 * d / (n * (n * n - 1))
    print(round(Spearman_rank_coef, 3))
    
  • + 0 comments
    n = int(input())
    a = list(map(float, input().split()))
    b = list(map(float, input().split()))
    ranks = lambda x : [sorted(x).index(i)+1 for i in x]
    spear = lambda n,a,b : 1-6*sum([(i[0]-i[1])**2 for i in list(zip(ranks(a),ranks(b)))]) / (n*(n**2-1))
    print(round(spear(n,a,b),3))