Discussion on Day 7: Pearson Correlation Coefficient I Challenge

Sort by

recency

|

103 Discussions

|

6 months ago+ 0 comments

#python
from statistics import pstdev, mean
n=int(input())
X=list(map(float, input().split()))
Y=list(map(float, input().split()))
res=[(X[i]-mean(X))*(Y[i]-mean(Y)) for i in range(n)]
p=sum(res)/(n*pstdev(X)*pstdev(Y))
print(round(p, 3))

6 months ago+ 0 comments

I am bit confused why its not mentioned to take the population standard deviation instead of the sample standarddeviation

# Enter your code here. Read input from STDIN. Print output to STDOUT
import math
import statistics
n = int(input())


X =list(map(float, input().split()))


Y =list(map(float, input().split()))
    

def calcpearsonCov(dataset1, dataset2):
    averagex= statistics.mean(dataset1)
    averagey = statistics.mean(dataset2)
    result = 0
    for i in range(0, len(dataset1)):
        result += (dataset1[i] - averagex) * (dataset2[i]- averagey)
    #result = result *(1/len(dataset1))
    stdevx = statistics.pstdev(dataset1)
    stdevy = statistics.pstdev(dataset2)
    result = result/(n*(stdevx*stdevy))
    return result
    
result = calcpearsonCov(X, Y)
print(round(result, 3))

11 months ago+ 0 comments

in R could be

stdin <- file('stdin')
open(stdin)
n <- as.integer(trimws(readLines(stdin, n = 1, warn = FALSE), which = "both"))
dataX <- as.numeric(strsplit(trimws(readLines(stdin, n = 1, warn = FALSE), which = "right"), " ")[[1]])
dataY <- as.numeric(strsplit(trimws(readLines(stdin, n = 1, warn = FALSE), which = "right"), " ")[[1]])
correlation = cor(dataX, dataY, method = 'pearson')
cat(round(correlation, 3))

1 year ago+ 0 comments

Easy Solution:

# import statistics as stat
from sys import stdin, stdout
import math

def mean(arr):
    return sum(arr) / len(arr)

def sd(arr, mean):
    squared_diff_sum = sum((x - mean)**2 for x in arr)
    variance = squared_diff_sum / len(arr)
    return math.sqrt(variance)

def covariance(X, Y):
    mean_X = mean(X)
    mean_Y = mean(Y)
    covariance_sum = sum((X[i] - mean_X) * (Y[i] - mean_Y) for i in range(len(X)))
    cov = covariance_sum / len(X)
    return cov

def pearson_correlation(X, Y):
    cov_XY = covariance(X, Y)
    st_dev_X = sd(X, mean(X))
    st_dev_Y = sd(Y, mean(Y))
    coefficient = cov_XY / (st_dev_X * st_dev_Y)
    return coefficient

n = int(stdin.readline().strip())
X = list(map(float, stdin.readline().strip().split()))
Y = list(map(float, stdin.readline().strip().split()))

if len(X) != len(Y):
    print("Error: Data sets X and Y must have equal lengths")
else:
    t = pearson_correlation(X, Y)
    print(round(t, 3))

1 year ago+ 0 comments

n = int(input())
a = list(map(float, input().split()))
b = list(map(float, input().split()))
mean = lambda x: sum(x)/len(x)
std = lambda x: (sum([(i-mean(x))**2 for i in x])/len(x))**0.5
prsn = lambda n, a, b: sum((i[0]-mean(a))*(i[1]-mean(b)) for i in list(zip(a,b))) / (n*std(a)*std(b))
print(f'{prsn(n,a,b):.3f}')

Sort by

|

103 Discussions

|

Cookie support is required to access HackerRank