let calcCoeff (setX : int[]) (setY : int[]) =
let GetRanks set n =
// Parameters:
// ranks: the output list of the ranks, it is infered with type float
// due to the float division in the 2nd statement
// freq: the occurances of the same value in the list
// curRank: the current rank starting from n increasing to 1
// acc: the sum of the repeated values in the set, this value is divided
// by freq to calculate the shared rank between more than 1 value
// set: the sorted set of data (implicitly passed into the function as
// this is used with pattern matching)
let rec CalculateRanks ranks freq curRank acc = function
| [] -> List.toArray(ranks)
| hd :: tl when tl <> [] && tl.Head = hd -> CalculateRanks ranks (freq+1) (curRank - 1) (acc + curRank) tl
| hd :: tl when freq > 1 -> CalculateRanks ((List.replicate freq ((float(acc + curRank)) / float(freq))) @ ranks) 1 (curRank-1) 0 tl
| hd :: tl -> CalculateRanks (float(curRank) :: ranks) 1 (curRank-1) 0 tl
let sorted = Array.sort(set)
// Create a new array, containing the values and ranks as pairs (2-value tuple)
// The array returned needs reversing as the cons operator (::) prepends an item to the list
let ranks = Array.zip (Array.rev(CalculateRanks [] 1 n 0 (Array.toList sorted) )) sorted
// Create a blank array with the same amount of elements
let orderedRanks = Array.zeroCreate n
// Find the value in the original list by searching for the value
// (2nd item in the tuple (snd) and take the rank (1st item in the tuple (fst))
for i in 0 .. n-1 do
let originalIndex = Array.findIndex (fun x -> snd x = set.[i]) ranks
orderedRanks.[i] <- (fst ranks.[originalIndex])
orderedRanks
if setX.Length <> setY.Length then raise (new System.ArgumentException("Both collections of data must contain an equal number of elements"))
let n = setX.Length
let ranksX = GetRanks setX n
let ranksY = GetRanks setY n
let sigmaDiff = (Array.zip ranksX ranksY) |> Array.sumBy (fun x -> (fst x - snd x) ** 2.0)
1.0 - ((6.0 * sigmaDiff) / ((float(n) ** 3.0) - float(n)))
let main() =
let setX = [| 72; 112; 46; 97; 46; 46; 52 |]
let setY = [| 20; 2; 7; 11; 4; 12; 7 |]
let rho = calcCoeff setX setY
printfn "Spearman's Rank Correlation Coefficient: %f" rho
main()
Spearman's Rank Correlation Coefficient: -0.062500On inspecting the performance of both C# and F# versions (using System.Diagnostics.Stopwatch), F# came in at just a few milliseconds faster than the C# version. Note also that I used type annotation for the 2 array parameters to constrain the array to type int.