import Data.List (scanl')
import Text.Printf (printf)
-- Define a record to hold the statistics state.
-- Using ! (bang patterns) to make the fields strict, which is often better for performance
-- in this kind of tight numerical loop.
data Stats = Stats {
count :: !Int,
sumV :: !Double,
mean :: !Double,
m2 :: !Double, -- The running sum of squares of differences from the mean
minV :: !Double,
maxV :: !Double
}
-- A custom Show instance for prettier printing of the stats record.
instance Show Stats where
show s = printf "Stats {count = %d, sum = %.1f, mean = %.1f, vari = %.1f, min = %.1f, max = %.1f}"
(count s)
(sumV s)
(mean s)
(variance s) -- Calculate variance on-the-fly for display
(if count s > 0 then minV s else 0.0/0.0) -- Handle NaN for empty
(if count s > 0 then maxV s else 0.0/0.0)
-- Calculate sample variance from the final state.
variance :: Stats -> Double
variance s = if count s > 1 then m2 s / fromIntegral (count s - 1) else 0.0
-- Define the initial, empty state for the calculation.
initialStats :: Stats
initialStats = Stats {
count = 0,
sumV = 0.0,
mean = 0.0,
m2 = 0.0,
minV = 1/0, -- Positive infinity for min
maxV = -1/0 -- Negative infinity for max
}
-- The pure reducer function that updates the stats for each new value.
-- It takes the old state and a new value, and returns the new state.
updateStats :: Stats -> Double -> Stats
updateStats acc x =
let newCount = count acc + 1
-- Welford's online algorithm for mean and variance
delta = x - mean acc
newMean = mean acc + delta / fromIntegral newCount
delta2 = x - newMean
newM2 = m2 acc + delta * delta2
in Stats {
count = newCount,
sumV = sumV acc + x,
mean = newMean,
m2 = newM2,
minV = min (minV acc) x,
maxV = max (maxV acc) x
}
-- The main function to demonstrate the streaming calculation.
main :: IO ()
main = do
putStrLn "Demonstrating streaming statistics calculation in Haskell:"
-- Our simulated stream of data
let dataStream = [2, 4, 4, 4, 5, 5, 7, 9]
-- `scanl'` is like a fold, but it returns a list of the intermediate results.
-- This is a perfect way to show the state of the calculation at each step of the stream.
let history = scanl' updateStats initialStats dataStream
-- Print the state after each item is processed.
-- We add a type annotation `[1 :: Int ..]` to resolve the ambiguity.
mapM_
(\ (i, (val, stats)) -> printf "After adding %.1f (stream item #%d): %s\n" val i (show stats))
$ zip [0 :: Int ..] (zip dataStream history)
putStrLn "\n--- Final Result ---"
-- The final result is the last element of the history list.
let finalStats = last history
printf "Final Stats: %s\n" (show finalStats)