Day 7: Spearman's Rank Correlation Coefficient

  • + 0 comments

    To solve in C++, and also not to fall into spaghetti, I defined quite a few helper functions.

    #include <cstdio>
    #include <vector>
    #include <set>
    #include <iostream>
    #include <iomanip>
    #include <algorithm>
    using namespace std;
    
    float mean(vector<float> data){
        auto length = data.size();
        float res{0};
        for(int i = 0; i<length; i++){
            res+=data[i];
        }
        return res / length;
    }
    
    float covariance(vector<float>x,vector<float>y){
        int n = x.size();
        float mx{mean(x)}, my{mean(y)};
        float res{0};
        for(int i=0;i<n;i++){
            res+=(x[i]-mx)*(y[i]-my);
        }
        return res / n;
    }
    
    float sdev(vector<float>d){
        int n = d.size();
        float md{mean(d)};
        float res{0};
        for(int i=0;i<n;i++){
            res+=pow((d[i]-md),2);
        }
        return sqrt(res/n);
    }
    
    float pearson(vector<float>x, vector<float>y){
        return covariance(x, y) / (sdev(x)*sdev(y));
    }
    
    int set_index(set<float> s, float val)
    {
        int i = 1;
     
        for (auto u : s) {
            if (u == val){
               return i; 
            }   
            i++;
        }
        return -1;
    }
    
    vector<float>rank_vector(vector<float>d){
        set<float>s;
        for(auto el : d){
            s.insert(el);
        }
        int n = d.size();
        vector<float>res(n);
        for(int i = 0; i<n; i++){
            res[i]=(float)set_index(s, d[i]);
        }
        return res;
    }
    
    
    int main() {
        int n{};
        cin >> n;
        vector<float> x(n), y(n);
        for(int i =0; i < n; i++){
            cin >> x[i];
        }
        for(int i =0; i < n; i++){
            cin >> y[i];
        }
        cout << fixed << setprecision(3)
             << pearson(rank_vector(x), rank_vector(y)) << endl;  
        return 0;
    }