Running Summary Stats

Run Settings
LanguagePython
Language Version
Run Command
import math class StreamingStats: """ A class to calculate summary statistics on a stream of data. It uses Welford's online algorithm to update statistics one data point at a time, which is memory-efficient for large datasets or streams. """ def __init__(self): """Initializes the statistics calculator.""" self.count = 0 self.sum = 0 self.mean = 0 # _M2 is the sum of squares of differences from the current mean. # It's used for a numerically stable, one-pass variance calculation. self._M2 = 0 self.min = float('inf') self.max = float('-inf') def update(self, x): """Updates the statistics with a new data point from the stream.""" # The first value initializes min and max if self.count == 0: self.min = x self.max = x else: self.min = min(self.min, x) self.max = max(self.max, x) self.count += 1 self.sum += x # Welford's online algorithm for mean and variance delta = x - self.mean self.mean += delta / self.count delta2 = x - self.mean self._M2 += delta * delta2 def get_stats(self): """ Returns the current calculated statistics. :return: A dictionary containing the count, sum, mean, sample variance, min, and max. """ # Sample variance is _M2 / (n - 1). It's 0 if there's only one data point. variance = self._M2 / (self.count - 1) if self.count > 1 else 0 return { "count": self.count, "sum": self.sum, "mean": self.mean, "vari": variance, "min": self.min if self.count > 0 else float('nan'), "max": self.max if self.count > 0 else float('nan'), } # --- Example Usage: Simulating a Data Stream --- print("Demonstrating streaming statistics calculation in Python:") # 1. Create a new stats calculator instance my_stream_stats = StreamingStats() print(f"Initial Stats: {my_stream_stats.get_stats()}") # 2. Simulate a stream of data arriving one value at a time data_stream = [2, 4, 4, 4, 5, 5, 7, 9] for i, value in enumerate(data_stream): my_stream_stats.update(value) print(f"After adding {value} (stream item #{i + 1}): {my_stream_stats.get_stats()}") print("\n--- Final Result ---") final_result = my_stream_stats.get_stats() print(f"Final Stats: {final_result}") # Expected final result: {'count': 8, 'sum': 40, 'mean': 5.0, 'vari': 4.0, 'min': 2, 'max': 9}
Editor Settings
Theme
Key bindings
Full width
Lines