import math
class StreamingStats:
"""
A class to calculate summary statistics on a stream of data.
It uses Welford's online algorithm to update statistics one data point at a time,
which is memory-efficient for large datasets or streams.
"""
def __init__(self):
"""Initializes the statistics calculator."""
self.count = 0
self.sum = 0
self.mean = 0
# _M2 is the sum of squares of differences from the current mean.
# It's used for a numerically stable, one-pass variance calculation.
self._M2 = 0
self.min = float('inf')
self.max = float('-inf')
def update(self, x):
"""Updates the statistics with a new data point from the stream."""
# The first value initializes min and max
if self.count == 0:
self.min = x
self.max = x
else:
self.min = min(self.min, x)
self.max = max(self.max, x)
self.count += 1
self.sum += x
# Welford's online algorithm for mean and variance
delta = x - self.mean
self.mean += delta / self.count
delta2 = x - self.mean
self._M2 += delta * delta2
def get_stats(self):
"""
Returns the current calculated statistics.
:return: A dictionary containing the count, sum, mean, sample variance, min, and max.
"""
# Sample variance is _M2 / (n - 1). It's 0 if there's only one data point.
variance = self._M2 / (self.count - 1) if self.count > 1 else 0
return {
"count": self.count,
"sum": self.sum,
"mean": self.mean,
"vari": variance,
"min": self.min if self.count > 0 else float('nan'),
"max": self.max if self.count > 0 else float('nan'),
}
# --- Example Usage: Simulating a Data Stream ---
print("Demonstrating streaming statistics calculation in Python:")
# 1. Create a new stats calculator instance
my_stream_stats = StreamingStats()
print(f"Initial Stats: {my_stream_stats.get_stats()}")
# 2. Simulate a stream of data arriving one value at a time
data_stream = [2, 4, 4, 4, 5, 5, 7, 9]
for i, value in enumerate(data_stream):
my_stream_stats.update(value)
print(f"After adding {value} (stream item #{i + 1}): {my_stream_stats.get_stats()}")
print("\n--- Final Result ---")
final_result = my_stream_stats.get_stats()
print(f"Final Stats: {final_result}")
# Expected final result: {'count': 8, 'sum': 40, 'mean': 5.0, 'vari': 4.0, 'min': 2, 'max': 9}