Running Summary Stats

Run Settings
LanguageHaskell
Language Version
Run Command
import Data.List (scanl') import Text.Printf (printf) -- Define a record to hold the statistics state. -- Using ! (bang patterns) to make the fields strict, which is often better for performance -- in this kind of tight numerical loop. data Stats = Stats { count :: !Int, sumV :: !Double, mean :: !Double, m2 :: !Double, -- The running sum of squares of differences from the mean minV :: !Double, maxV :: !Double } -- A custom Show instance for prettier printing of the stats record. instance Show Stats where show s = printf "Stats {count = %d, sum = %.1f, mean = %.1f, vari = %.1f, min = %.1f, max = %.1f}" (count s) (sumV s) (mean s) (variance s) -- Calculate variance on-the-fly for display (if count s > 0 then minV s else 0.0/0.0) -- Handle NaN for empty (if count s > 0 then maxV s else 0.0/0.0) -- Calculate sample variance from the final state. variance :: Stats -> Double variance s = if count s > 1 then m2 s / fromIntegral (count s - 1) else 0.0 -- Define the initial, empty state for the calculation. initialStats :: Stats initialStats = Stats { count = 0, sumV = 0.0, mean = 0.0, m2 = 0.0, minV = 1/0, -- Positive infinity for min maxV = -1/0 -- Negative infinity for max } -- The pure reducer function that updates the stats for each new value. -- It takes the old state and a new value, and returns the new state. updateStats :: Stats -> Double -> Stats updateStats acc x = let newCount = count acc + 1 -- Welford's online algorithm for mean and variance delta = x - mean acc newMean = mean acc + delta / fromIntegral newCount delta2 = x - newMean newM2 = m2 acc + delta * delta2 in Stats { count = newCount, sumV = sumV acc + x, mean = newMean, m2 = newM2, minV = min (minV acc) x, maxV = max (maxV acc) x } -- The main function to demonstrate the streaming calculation. main :: IO () main = do putStrLn "Demonstrating streaming statistics calculation in Haskell:" -- Our simulated stream of data let dataStream = [2, 4, 4, 4, 5, 5, 7, 9] -- `scanl'` is like a fold, but it returns a list of the intermediate results. -- This is a perfect way to show the state of the calculation at each step of the stream. let history = scanl' updateStats initialStats dataStream -- Print the state after each item is processed. -- We add a type annotation `[1 :: Int ..]` to resolve the ambiguity. mapM_ (\ (i, (val, stats)) -> printf "After adding %.1f (stream item #%d): %s\n" val i (show stats)) $ zip [0 :: Int ..] (zip dataStream history) putStrLn "\n--- Final Result ---" -- The final result is the last element of the history list. let finalStats = last history printf "Final Stats: %s\n" (show finalStats)
Editor Settings
Theme
Key bindings
Full width
Lines