@@ -147,21 +147,17 @@ class StatisticsError(ValueError):
147147
148148# === Private utilities ===
149149
150- def _sum (data , start = 0 ):
151- """_sum(data [, start] ) -> (type, sum, count)
150+ def _sum (data ):
151+ """_sum(data) -> (type, sum, count)
152152
153153 Return a high-precision sum of the given numeric data as a fraction,
154154 together with the type to be converted to and the count of items.
155155
156- If optional argument ``start`` is given, it is added to the total.
157- If ``data`` is empty, ``start`` (defaulting to 0) is returned.
158-
159-
160156 Examples
161157 --------
162158
163- >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75 )
164- (<class 'float'>, Fraction(11, 1 ), 5)
159+ >>> _sum([3, 2.25, 4.5, -0.5, 0.25] )
160+ (<class 'float'>, Fraction(19, 2 ), 5)
165161
166162 Some sources of round-off error will be avoided:
167163
@@ -184,10 +180,9 @@ def _sum(data, start=0):
184180 allowed.
185181 """
186182 count = 0
187- n , d = _exact_ratio (start )
188- partials = {d : n }
183+ partials = {}
189184 partials_get = partials .get
190- T = _coerce ( int , type ( start ))
185+ T = int
191186 for typ , values in groupby (data , type ):
192187 T = _coerce (T , typ ) # or raise TypeError
193188 for n , d in map (_exact_ratio , values ):
@@ -200,8 +195,7 @@ def _sum(data, start=0):
200195 assert not _isfinite (total )
201196 else :
202197 # Sum all the partial sums using builtin sum.
203- # FIXME is this faster if we sum them in order of the denominator?
204- total = sum (Fraction (n , d ) for d , n in sorted (partials .items ()))
198+ total = sum (Fraction (n , d ) for d , n in partials .items ())
205199 return (T , total , count )
206200
207201
@@ -252,27 +246,19 @@ def _exact_ratio(x):
252246 x is expected to be an int, Fraction, Decimal or float.
253247 """
254248 try :
255- # Optimise the common case of floats. We expect that the most often
256- # used numeric type will be builtin floats, so try to make this as
257- # fast as possible.
258- if type (x ) is float or type (x ) is Decimal :
259- return x .as_integer_ratio ()
260- try :
261- # x may be an int, Fraction, or Integral ABC.
262- return (x .numerator , x .denominator )
263- except AttributeError :
264- try :
265- # x may be a float or Decimal subclass.
266- return x .as_integer_ratio ()
267- except AttributeError :
268- # Just give up?
269- pass
249+ return x .as_integer_ratio ()
250+ except AttributeError :
251+ pass
270252 except (OverflowError , ValueError ):
271253 # float NAN or INF.
272254 assert not _isfinite (x )
273255 return (x , None )
274- msg = "can't convert type '{}' to numerator/denominator"
275- raise TypeError (msg .format (type (x ).__name__ ))
256+ try :
257+ # x may be an Integral ABC.
258+ return (x .numerator , x .denominator )
259+ except AttributeError :
260+ msg = f"can't convert type '{ type (x ).__name__ } ' to numerator/denominator"
261+ raise TypeError (msg )
276262
277263
278264def _convert (value , T ):
@@ -730,18 +716,20 @@ def _ss(data, c=None):
730716 if c is not None :
731717 T , total , count = _sum ((d := x - c ) * d for x in data )
732718 return (T , total )
733- # Compute the mean accurate to within 1/2 ulp
734- c = mean (data )
735- # Initial computation for the sum of square deviations
736- T , total , count = _sum ((d := x - c ) * d for x in data )
737- # Correct any remaining inaccuracy in the mean c.
738- # The following sum should mathematically equal zero,
739- # but due to the final rounding of the mean, it may not.
740- U , error , count2 = _sum ((x - c ) for x in data )
741- assert count == count2
742- correction = error * error / len (data )
743- total -= correction
744- assert not total < 0 , 'negative sum of square deviations: %f' % total
719+ T , total , count = _sum (data )
720+ mean_n , mean_d = (total / count ).as_integer_ratio ()
721+ partials = Counter ()
722+ for n , d in map (_exact_ratio , data ):
723+ diff_n = n * mean_d - d * mean_n
724+ diff_d = d * mean_d
725+ partials [diff_d * diff_d ] += diff_n * diff_n
726+ if None in partials :
727+ # The sum will be a NAN or INF. We can ignore all the finite
728+ # partials, and just look at this special one.
729+ total = partials [None ]
730+ assert not _isfinite (total )
731+ else :
732+ total = sum (Fraction (n , d ) for d , n in partials .items ())
745733 return (T , total )
746734
747735
@@ -845,6 +833,9 @@ def stdev(data, xbar=None):
845833 1.0810874155219827
846834
847835 """
836+ # Fixme: Despite the exact sum of squared deviations, some inaccuracy
837+ # remain because there are two rounding steps. The first occurs in
838+ # the _convert() step for variance(), the second occurs in math.sqrt().
848839 var = variance (data , xbar )
849840 try :
850841 return var .sqrt ()
@@ -861,6 +852,9 @@ def pstdev(data, mu=None):
861852 0.986893273527251
862853
863854 """
855+ # Fixme: Despite the exact sum of squared deviations, some inaccuracy
856+ # remain because there are two rounding steps. The first occurs in
857+ # the _convert() step for pvariance(), the second occurs in math.sqrt().
864858 var = pvariance (data , mu )
865859 try :
866860 return var .sqrt ()
0 commit comments