diff options
author | Jan Gelety <jgelety@cisco.com> | 2019-11-12 05:27:43 +0100 |
---|---|---|
committer | Jan Gelety <jgelety@cisco.com> | 2019-11-28 18:26:21 +0100 |
commit | d68951ac245150eeefa6e0f4156e4c1b5c9e9325 (patch) | |
tree | 487554a7547218d27f0a61ec02b70502c32cdcb4 /resources/libraries/python/PLRsearch | |
parent | ed0258a440cfad7023d643f717ab78ac568dc59b (diff) |
Python3: resources and libraries
Change-Id: I1392c06b1d64f62b141d24c0d42a8e36913b15e2
Signed-off-by: Jan Gelety <jgelety@cisco.com>
Diffstat (limited to 'resources/libraries/python/PLRsearch')
-rw-r--r-- | resources/libraries/python/PLRsearch/Integrator.py | 59 | ||||
-rw-r--r-- | resources/libraries/python/PLRsearch/PLRsearch.py | 204 | ||||
-rw-r--r-- | resources/libraries/python/PLRsearch/log_plus.py | 20 | ||||
-rw-r--r-- | resources/libraries/python/PLRsearch/stat_trackers.py | 73 |
4 files changed, 190 insertions, 166 deletions
diff --git a/resources/libraries/python/PLRsearch/Integrator.py b/resources/libraries/python/PLRsearch/Integrator.py index 86181eaa56..331bd8475b 100644 --- a/resources/libraries/python/PLRsearch/Integrator.py +++ b/resources/libraries/python/PLRsearch/Integrator.py @@ -23,6 +23,7 @@ import copy import traceback import dill + from numpy import random # TODO: Teach FD.io CSIT to use multiple dirs in PYTHONPATH, @@ -58,7 +59,7 @@ def try_estimate_nd(communication_pipe, scale_coeff=8.0, trace_enabled=False): # so we have to catch them all. traceback_string = traceback.format_exc() communication_pipe.send(traceback_string) - # After sendig, re-raise, so usages other than "one process per call" + # After sending, re-raise, so usages other than "one process per call" # keep behaving correctly. raise @@ -86,7 +87,8 @@ def generate_sample(averages, covariance_matrix, dimension, scale_coeff): covariance_matrix[first][second] *= scale_coeff while 1: sample_point = random.multivariate_normal( - averages, covariance_matrix, 1)[0].tolist() + averages, covariance_matrix, 1 + )[0].tolist() # Multivariate Gauss can fall outside (-1, 1) interval for first in range(dimension): sample_coordinate = sample_point[first] @@ -187,14 +189,15 @@ def estimate_nd(communication_pipe, scale_coeff=8.0, trace_enabled=False): :raises numpy.linalg.LinAlgError: If the focus shape gets singular (due to rounding errors). Try changing scale_coeff. """ - debug_list = list() trace_list = list() # Block until input object appears. dimension, dilled_function, param_focus_tracker, max_samples = ( - communication_pipe.recv()) - debug_list.append("Called with param_focus_tracker {tracker!r}" - .format(tracker=param_focus_tracker)) + communication_pipe.recv() + ) + debug_list.append( + f"Called with param_focus_tracker {param_focus_tracker!r}" + ) def trace(name, value): """ @@ -210,7 +213,7 @@ def estimate_nd(communication_pipe, scale_coeff=8.0, trace_enabled=False): :type value: object """ if trace_enabled: - trace_list.append(name + " " + repr(value)) + trace_list.append(f"{name} {value!r}") value_logweight_function = dill.loads(dilled_function) samples = 0 @@ -235,33 +238,39 @@ def estimate_nd(communication_pipe, scale_coeff=8.0, trace_enabled=False): break sample_point = generate_sample( param_focus_tracker.averages, param_focus_tracker.covariance_matrix, - dimension, scale_coeff) - trace("sample_point", sample_point) + dimension, scale_coeff + ) + trace(u"sample_point", sample_point) samples += 1 - trace("samples", samples) + trace(u"samples", samples) value, log_weight = value_logweight_function(trace, *sample_point) - trace("value", value) - trace("log_weight", log_weight) - trace("focus tracker before adding", param_focus_tracker) + trace(u"value", value) + trace(u"log_weight", log_weight) + trace(u"focus tracker before adding", param_focus_tracker) # Update focus related statistics. param_distance = param_focus_tracker.add_without_dominance_get_distance( - sample_point, log_weight) + sample_point, log_weight + ) # The code above looked at weight (not importance). # The code below looks at importance (not weight). log_rarity = param_distance / 2.0 - trace("log_rarity", log_rarity) + trace(u"log_rarity", log_rarity) log_importance = log_weight + log_rarity - trace("log_importance", log_importance) + trace(u"log_importance", log_importance) value_tracker.add(value, log_importance) # Update sampled statistics. param_sampled_tracker.add_get_shift(sample_point, log_importance) - debug_list.append("integrator used " + str(samples) + " samples") - debug_list.append(" ".join([ - "value_avg", str(value_tracker.average), - "param_sampled_avg", repr(param_sampled_tracker.averages), - "param_sampled_cov", repr(param_sampled_tracker.covariance_matrix), - "value_log_variance", str(value_tracker.log_variance), - "value_log_secondary_variance", - str(value_tracker.secondary.log_variance)])) + debug_list.append(f"integrator used {samples!s} samples") + debug_list.append( + u" ".join([ + u"value_avg", str(value_tracker.average), + u"param_sampled_avg", repr(param_sampled_tracker.averages), + u"param_sampled_cov", repr(param_sampled_tracker.covariance_matrix), + u"value_log_variance", str(value_tracker.log_variance), + u"value_log_secondary_variance", + str(value_tracker.secondary.log_variance) + ]) + ) communication_pipe.send( - (value_tracker, param_focus_tracker, debug_list, trace_list, samples)) + (value_tracker, param_focus_tracker, debug_list, trace_list, samples) + ) diff --git a/resources/libraries/python/PLRsearch/PLRsearch.py b/resources/libraries/python/PLRsearch/PLRsearch.py index b7c9344391..e20d293d3c 100644 --- a/resources/libraries/python/PLRsearch/PLRsearch.py +++ b/resources/libraries/python/PLRsearch/PLRsearch.py @@ -17,20 +17,22 @@ import logging import math import multiprocessing import time + from collections import namedtuple import dill + from scipy.special import erfcx, erfc # TODO: Teach FD.io CSIT to use multiple dirs in PYTHONPATH, # then switch to absolute imports within PLRsearch package. # Current usage of relative imports is just a short term workaround. from . import Integrator -from .log_plus import log_plus, log_minus from . import stat_trackers +from .log_plus import log_plus, log_minus -class PLRsearch(object): +class PLRsearch: """A class to encapsulate data relevant for the search method. The context is performance testing of packet processing systems. @@ -41,7 +43,7 @@ class PLRsearch(object): Two constants are stored as class fields for speed. - Method othed than search (and than __init__) + Method other than search (and than __init__) are just internal code structure. TODO: Those method names should start with underscore then. @@ -168,20 +170,23 @@ class PLRsearch(object): stop_time = time.time() + self.timeout min_rate = float(min_rate) max_rate = float(max_rate) - logging.info("Started search with min_rate %(min)r, max_rate %(max)r", - {"min": min_rate, "max": max_rate}) + logging.info( + f"Started search with min_rate {min_rate!r}, " + f"max_rate {max_rate!r}" + ) trial_result_list = list() trial_number = self.trial_number_offset focus_trackers = (None, None) transmit_rate = (min_rate + max_rate) / 2.0 lossy_loads = [max_rate] - zeros = 0 # How many cosecutive zero loss results are happening. + zeros = 0 # How many consecutive zero loss results are happening. while 1: trial_number += 1 - logging.info("Trial %(number)r", {"number": trial_number}) + logging.info(f"Trial {trial_number!r}") results = self.measure_and_compute( self.trial_duration_per_trial * trial_number, transmit_rate, - trial_result_list, min_rate, max_rate, focus_trackers) + trial_result_list, min_rate, max_rate, focus_trackers + ) measurement, average, stdev, avg1, avg2, focus_trackers = results zeros += 1 # TODO: Ratio of fill rate to drain rate seems to have @@ -212,9 +217,10 @@ class PLRsearch(object): # in order to get to usable loses at higher loads. if len(lossy_loads) > 3: lossy_loads = lossy_loads[3:] - logging.debug("Zeros %(z)r orig %(o)r next %(n)r loads %(s)r", - {"z": zeros, "o": (avg1 + avg2) / 2.0, - "n": next_load, "s": lossy_loads}) + logging.debug( + f"Zeros {zeros!r} orig {(avg1 + avg2) / 2.0!r} " + f"next {next_load!r} loads {lossy_loads!r}" + ) transmit_rate = min(max_rate, max(min_rate, next_load)) @staticmethod @@ -255,21 +261,22 @@ class PLRsearch(object): # TODO: chi is from https://en.wikipedia.org/wiki/Nondimensionalization chi = (load - mrr) / spread chi0 = -mrr / spread - trace("stretch: load", load) - trace("mrr", mrr) - trace("spread", spread) - trace("chi", chi) - trace("chi0", chi0) + trace(u"stretch: load", load) + trace(u"mrr", mrr) + trace(u"spread", spread) + trace(u"chi", chi) + trace(u"chi0", chi0) if chi > 0: log_lps = math.log( - load - mrr + (log_plus(0, -chi) - log_plus(0, chi0)) * spread) - trace("big loss direct log_lps", log_lps) + load - mrr + (log_plus(0, -chi) - log_plus(0, chi0)) * spread + ) + trace(u"big loss direct log_lps", log_lps) else: two_positive = log_plus(chi, 2 * chi0 - log_2) two_negative = log_plus(chi0, 2 * chi - log_2) if two_positive <= two_negative: log_lps = log_minus(chi, chi0) + log_spread - trace("small loss crude log_lps", log_lps) + trace(u"small loss crude log_lps", log_lps) return log_lps two = log_minus(two_positive, two_negative) three_positive = log_plus(two_positive, 3 * chi - log_3) @@ -277,11 +284,11 @@ class PLRsearch(object): three = log_minus(three_positive, three_negative) if two == three: log_lps = two + log_spread - trace("small loss approx log_lps", log_lps) + trace(u"small loss approx log_lps", log_lps) else: log_lps = math.log(log_plus(0, chi) - log_plus(0, chi0)) log_lps += log_spread - trace("small loss direct log_lps", log_lps) + trace(u"small loss direct log_lps", log_lps) return log_lps @staticmethod @@ -320,26 +327,26 @@ class PLRsearch(object): # TODO: The stretch sign is just to have less minuses. Worth changing? chi = (mrr - load) / spread chi0 = mrr / spread - trace("Erf: load", load) - trace("mrr", mrr) - trace("spread", spread) - trace("chi", chi) - trace("chi0", chi0) + trace(u"Erf: load", load) + trace(u"mrr", mrr) + trace(u"spread", spread) + trace(u"chi", chi) + trace(u"chi0", chi0) if chi >= -1.0: - trace("positive, b roughly bigger than m", None) + trace(u"positive, b roughly bigger than m", None) if chi > math.exp(10): first = PLRsearch.log_xerfcx_10 + 2 * (math.log(chi) - 10) - trace("approximated first", first) + trace(u"approximated first", first) else: first = math.log(PLRsearch.xerfcx_limit - chi * erfcx(chi)) - trace("exact first", first) + trace(u"exact first", first) first -= chi * chi second = math.log(PLRsearch.xerfcx_limit - chi * erfcx(chi0)) second -= chi0 * chi0 intermediate = log_minus(first, second) - trace("first", first) + trace(u"first", first) else: - trace("negative, b roughly smaller than m", None) + trace(u"negative, b roughly smaller than m", None) exp_first = PLRsearch.xerfcx_limit + chi * erfcx(-chi) exp_first *= math.exp(-chi * chi) exp_first -= 2 * chi @@ -350,11 +357,11 @@ class PLRsearch(object): second = math.log(PLRsearch.xerfcx_limit - chi * erfcx(chi0)) second -= chi0 * chi0 intermediate = math.log(exp_first - math.exp(second)) - trace("exp_first", exp_first) - trace("second", second) - trace("intermediate", intermediate) + trace(u"exp_first", exp_first) + trace(u"second", second) + trace(u"intermediate", intermediate) result = intermediate + math.log(spread) - math.log(erfc(-chi0)) - trace("result", result) + trace(u"result", result) return result @staticmethod @@ -385,7 +392,7 @@ class PLRsearch(object): :type lfit_func: Function from 3 floats to float. :type min_rate: float :type max_rate: float - :type log_lps_target: float + :type loss_ratio_target: float :type mrr: float :type spread: float :returns: Load [pps] which achieves the target with given parameters. @@ -397,17 +404,17 @@ class PLRsearch(object): loss_ratio = -1 while loss_ratio != loss_ratio_target: rate = (rate_hi + rate_lo) / 2.0 - if rate == rate_hi or rate == rate_lo: + if rate in (rate_hi, rate_lo): break loss_rate = math.exp(lfit_func(trace, rate, mrr, spread)) loss_ratio = loss_rate / rate if loss_ratio > loss_ratio_target: - trace("halving down", rate) + trace(u"halving down", rate) rate_hi = rate elif loss_ratio < loss_ratio_target: - trace("halving up", rate) + trace(u"halving up", rate) rate_lo = rate - trace("found", rate) + trace(u"found", rate) return rate @staticmethod @@ -428,36 +435,39 @@ class PLRsearch(object): :param trace: A multiprocessing-friendly logging function (closure). :param lfit_func: Fitting function, typically lfit_spread or lfit_erf. - :param result_list: List of trial measurement results. + :param trial_result_list: List of trial measurement results. :param mrr: The mrr parameter for the fitting function. :param spread: The spread parameter for the fittinmg function. :type trace: function (str, object) -> None :type lfit_func: Function from 3 floats to float. - :type result_list: list of MLRsearch.ReceiveRateMeasurement + :type trial_result_list: list of MLRsearch.ReceiveRateMeasurement :type mrr: float :type spread: float :returns: Logarithm of result weight for given function and parameters. :rtype: float """ log_likelihood = 0.0 - trace("log_weight for mrr", mrr) - trace("spread", spread) + trace(u"log_weight for mrr", mrr) + trace(u"spread", spread) for result in trial_result_list: - trace("for tr", result.target_tr) - trace("lc", result.loss_count) - trace("d", result.duration) + trace(u"for tr", result.target_tr) + trace(u"lc", result.loss_count) + trace(u"d", result.duration) log_avg_loss_per_second = lfit_func( - trace, result.target_tr, mrr, spread) + trace, result.target_tr, mrr, spread + ) log_avg_loss_per_trial = ( - log_avg_loss_per_second + math.log(result.duration)) + log_avg_loss_per_second + math.log(result.duration) + ) # Poisson probability computation works nice for logarithms. log_trial_likelihood = ( result.loss_count * log_avg_loss_per_trial - - math.exp(log_avg_loss_per_trial)) + - math.exp(log_avg_loss_per_trial) + ) log_trial_likelihood -= math.lgamma(1 + result.loss_count) log_likelihood += log_trial_likelihood - trace("avg_loss_per_trial", math.exp(log_avg_loss_per_trial)) - trace("log_trial_likelihood", log_trial_likelihood) + trace(u"avg_loss_per_trial", math.exp(log_avg_loss_per_trial)) + trace(u"log_trial_likelihood", log_trial_likelihood) return log_likelihood def measure_and_compute( @@ -512,12 +522,11 @@ class PLRsearch(object): :rtype: _ComputeResult """ logging.debug( - "measure_and_compute started with self %(self)r, trial_duration " - "%(dur)r, transmit_rate %(tr)r, trial_result_list %(trl)r, " - "max_rate %(mr)r, focus_trackers %(track)r, max_samples %(ms)r", - {"self": self, "dur": trial_duration, "tr": transmit_rate, - "trl": trial_result_list, "mr": max_rate, "track": focus_trackers, - "ms": max_samples}) + f"measure_and_compute started with self {self!r}, trial_duration " + f"{trial_duration!r}, transmit_rate {transmit_rate!r}, " + f"trial_result_list {trial_result_list!r}, max_rate {max_rate!r}, " + f"focus_trackers {focus_trackers!r}, max_samples {max_samples!r}" + ) # Preparation phase. dimension = 2 stretch_focus_tracker, erf_focus_tracker = focus_trackers @@ -536,11 +545,10 @@ class PLRsearch(object): start computation, return the boss pipe end. :param fitting_function: lfit_erf or lfit_stretch. - :param bias_avg: Tuple of floats to start searching around. - :param bias_cov: Covariance matrix defining initial focus shape. + :param focus_tracker: Tracker initialized to speed up the numeric + computation. :type fitting_function: Function from 3 floats to float. - :type bias_avg: 2-tuple of floats - :type bias_cov: 2-tuple of 2-tuples of floats + :type focus_tracker: None or stat_trackers.VectorStatTracker :returns: Boss end of communication pipe. :rtype: multiprocessing.Connection """ @@ -579,27 +587,31 @@ class PLRsearch(object): mrr = max_rate * (1.0 / (x_mrr + 1.0) - 0.5) + 1.0 spread = math.exp((x_spread + 1.0) / 2.0 * math.log(mrr)) logweight = self.log_weight( - trace, fitting_function, trial_result_list, mrr, spread) - value = math.log(self.find_critical_rate( - trace, fitting_function, min_rate, max_rate, - self.packet_loss_ratio_target, mrr, spread)) + trace, fitting_function, trial_result_list, mrr, spread + ) + value = math.log( + self.find_critical_rate( + trace, fitting_function, min_rate, max_rate, + self.packet_loss_ratio_target, mrr, spread + ) + ) return value, logweight dilled_function = dill.dumps(value_logweight_func) boss_pipe_end, worker_pipe_end = multiprocessing.Pipe() boss_pipe_end.send( - (dimension, dilled_function, focus_tracker, max_samples)) + (dimension, dilled_function, focus_tracker, max_samples) + ) worker = multiprocessing.Process( - target=Integrator.try_estimate_nd, args=( - worker_pipe_end, 10.0, self.trace_enabled)) + target=Integrator.try_estimate_nd, + args=(worker_pipe_end, 10.0, self.trace_enabled) + ) worker.daemon = True worker.start() return boss_pipe_end - erf_pipe = start_computing( - self.lfit_erf, erf_focus_tracker) - stretch_pipe = start_computing( - self.lfit_stretch, stretch_focus_tracker) + erf_pipe = start_computing(self.lfit_erf, erf_focus_tracker) + stretch_pipe = start_computing(self.lfit_stretch, stretch_focus_tracker) # Measurement phase. measurement = self.measurer.measure(trial_duration, transmit_rate) @@ -623,38 +635,38 @@ class PLRsearch(object): """ pipe.send(None) if not pipe.poll(10.0): - raise RuntimeError( - "Worker {name} did not finish!".format(name=name)) + raise RuntimeError(f"Worker {name} did not finish!") result_or_traceback = pipe.recv() try: value_tracker, focus_tracker, debug_list, trace_list, sampls = ( - result_or_traceback) + result_or_traceback + ) except ValueError: raise RuntimeError( - "Worker {name} failed with the following traceback:\n{tr}" - .format(name=name, tr=result_or_traceback)) - logging.info("Logs from worker %(name)r:", {"name": name}) + f"Worker {name} failed with the following traceback:\n" + f"{result_or_traceback}" + ) + logging.info(f"Logs from worker {name!r}:") for message in debug_list: logging.info(message) for message in trace_list: logging.debug(message) - logging.debug("trackers: value %(val)r focus %(foc)r", { - "val": value_tracker, "foc": focus_tracker}) + logging.debug( + f"trackers: value {value_tracker!r} focus {focus_tracker!r}" + ) return _PartialResult(value_tracker, focus_tracker, sampls) - stretch_result = stop_computing("stretch", stretch_pipe) - erf_result = stop_computing("erf", erf_pipe) + stretch_result = stop_computing(u"stretch", stretch_pipe) + erf_result = stop_computing(u"erf", erf_pipe) result = PLRsearch._get_result(measurement, stretch_result, erf_result) logging.info( - "measure_and_compute finished with trial result %(res)r " - "avg %(avg)r stdev %(stdev)r stretch %(a1)r erf %(a2)r " - "new trackers %(nt)r old trackers %(ot)r stretch samples %(ss)r " - "erf samples %(es)r", - {"res": result.measurement, - "avg": result.avg, "stdev": result.stdev, - "a1": result.stretch_exp_avg, "a2": result.erf_exp_avg, - "nt": result.trackers, "ot": old_trackers, - "ss": stretch_result.samples, "es": erf_result.samples}) + f"measure_and_compute finished with trial result " + f"{result.measurement!r} avg {result.avg!r} stdev {result.stdev!r} " + f"stretch {result.stretch_exp_avg!r} erf {result.erf_exp_avg!r} " + f"new trackers {result.trackers!r} old trackers {old_trackers!r} " + f"stretch samples {stretch_result.samples!r} erf samples " + f"{erf_result.samples!r}" + ) return result @staticmethod @@ -692,7 +704,8 @@ class PLRsearch(object): # Named tuples, for multiple local variables to be passed as return value. _PartialResult = namedtuple( - "_PartialResult", "value_tracker focus_tracker samples") + u"_PartialResult", u"value_tracker focus_tracker samples" +) """Two stat trackers and sample counter. :param value_tracker: Tracker for the value (critical load) being integrated. @@ -704,8 +717,9 @@ _PartialResult = namedtuple( """ _ComputeResult = namedtuple( - "_ComputeResult", - "measurement avg stdev stretch_exp_avg erf_exp_avg trackers") + u"_ComputeResult", + u"measurement avg stdev stretch_exp_avg erf_exp_avg trackers" +) """Measurement, 4 computation result values, pair of trackers. :param measurement: The trial measurement result obtained during computation. diff --git a/resources/libraries/python/PLRsearch/log_plus.py b/resources/libraries/python/PLRsearch/log_plus.py index 1c802a5599..62378f6f2c 100644 --- a/resources/libraries/python/PLRsearch/log_plus.py +++ b/resources/libraries/python/PLRsearch/log_plus.py @@ -24,7 +24,7 @@ functions of this module use None as -inf. TODO: Figure out a more performant way of handling -inf. -The functions handle the common task of adding or substracting +The functions handle the common task of adding or subtracting two numbers where both operands and the result is given in logarithm form. There are conditionals to make sure overflow does not happen (if possible) during the computation.""" @@ -33,7 +33,7 @@ import math def log_plus(first, second): - """Return logarithm of the sum of two exponentials. + """Return logarithm of the sum of two exponents. Basically math.log(math.exp(first) + math.exp(second)) which avoids overflow and uses None as math.log(0.0). @@ -47,19 +47,19 @@ def log_plus(first, second): :returns: Logarithm of the sum (or None if zero). :rtype: float """ - if first is None: return second if second is None: return first if second > first: - return second + math.log(1.0 + math.exp(first - second)) + retval = second + math.log(1.0 + math.exp(first - second)) else: - return first + math.log(1.0 + math.exp(second - first)) + retval = first + math.log(1.0 + math.exp(second - first)) + return retval def log_minus(first, second): - """Return logarithm of the difference of two exponentials. + """Return logarithm of the difference of two exponents. Basically math.log(math.exp(first) - math.exp(second)) which avoids overflow and uses None as math.log(0.0). @@ -75,18 +75,18 @@ def log_minus(first, second): :rtype: float :raises RuntimeError: If the difference would be non-positive. """ - if first is None: - raise RuntimeError("log_minus: does not suport None first") + raise RuntimeError(u"log_minus: does not support None first") if second is None: return first if second >= first: - raise RuntimeError("log_minus: first has to be bigger than second") + raise RuntimeError(u"log_minus: first has to be bigger than second") factor = -math.expm1(second - first) if factor <= 0.0: - raise RuntimeError("log_minus: non-positive number to log") + msg = u"log_minus: non-positive number to log" else: return first + math.log(factor) + raise RuntimeError(msg) def safe_exp(log_value): diff --git a/resources/libraries/python/PLRsearch/stat_trackers.py b/resources/libraries/python/PLRsearch/stat_trackers.py index 58ad98fd2e..2a7a05cae6 100644 --- a/resources/libraries/python/PLRsearch/stat_trackers.py +++ b/resources/libraries/python/PLRsearch/stat_trackers.py @@ -32,7 +32,7 @@ import numpy from .log_plus import log_plus, safe_exp -class ScalarStatTracker(object): +class ScalarStatTracker: """Class for tracking one-dimensional samples. Variance of one-dimensional data cannot be negative, @@ -61,13 +61,11 @@ class ScalarStatTracker(object): def __repr__(self): """Return string, which interpreted constructs state of self. - :returns: Expression contructing an equivalent instance. + :returns: Expression constructing an equivalent instance. :rtype: str """ - return ("ScalarStatTracker(log_sum_weight={lsw!r},average={a!r}," - "log_variance={lv!r})".format( - lsw=self.log_sum_weight, a=self.average, - lv=self.log_variance)) + return f"ScalarStatTracker(log_sum_weight={self.log_sum_weight!r}," \ + f"average={self.average!r},log_variance={self.log_variance!r})" def copy(self): """Return new ScalarStatTracker instance with the same state as self. @@ -79,7 +77,8 @@ class ScalarStatTracker(object): :rtype: ScalarStatTracker """ return ScalarStatTracker( - self.log_sum_weight, self.average, self.log_variance) + self.log_sum_weight, self.average, self.log_variance + ) def add(self, scalar_value, log_weight=0.0): """Return updated stats corresponding to addition of another sample. @@ -134,7 +133,6 @@ class ScalarDualStatTracker(ScalarStatTracker): One typical use is for Monte Carlo integrator to decide whether the partial sums so far are reliable enough. """ - def __init__( self, log_sum_weight=None, average=0.0, log_variance=None, log_sum_secondary_weight=None, secondary_average=0.0, @@ -168,7 +166,8 @@ class ScalarDualStatTracker(ScalarStatTracker): # so in case of diamond inheritance mismatch would be probable. ScalarStatTracker.__init__(self, log_sum_weight, average, log_variance) self.secondary = ScalarStatTracker( - log_sum_secondary_weight, secondary_average, log_secondary_variance) + log_sum_secondary_weight, secondary_average, log_secondary_variance + ) self.max_log_weight = max_log_weight def __repr__(self): @@ -178,14 +177,12 @@ class ScalarDualStatTracker(ScalarStatTracker): :rtype: str """ sec = self.secondary - return ( - "ScalarDualStatTracker(log_sum_weight={lsw!r},average={a!r}," - "log_variance={lv!r},log_sum_secondary_weight={lssw!r}," - "secondary_average={sa!r},log_secondary_variance={lsv!r}," - "max_log_weight={mlw!r})".format( - lsw=self.log_sum_weight, a=self.average, lv=self.log_variance, - lssw=sec.log_sum_weight, sa=sec.average, lsv=sec.log_variance, - mlw=self.max_log_weight)) + return f"ScalarDualStatTracker(log_sum_weight={self.log_sum_weight!r},"\ + f"average={self.average!r},log_variance={self.log_variance!r}," \ + f"log_sum_secondary_weight={sec.log_sum_weight!r}," \ + f"secondary_average={sec.average!r}," \ + f"log_secondary_variance={sec.log_variance!r}," \ + f"max_log_weight={self.max_log_weight!r})" def add(self, scalar_value, log_weight=0.0): """Return updated both stats after addition of another sample. @@ -209,7 +206,6 @@ class ScalarDualStatTracker(ScalarStatTracker): primary.add(scalar_value, log_weight) return self - def get_pessimistic_variance(self): """Return estimate of variance reflecting weight effects. @@ -231,7 +227,7 @@ class ScalarDualStatTracker(ScalarStatTracker): return var_combined -class VectorStatTracker(object): +class VectorStatTracker: """Class for tracking multi-dimensional samples. Contrary to one-dimensional data, multi-dimensional covariance matrix @@ -248,11 +244,11 @@ class VectorStatTracker(object): def __init__( self, dimension=2, log_sum_weight=None, averages=None, covariance_matrix=None): - """Initialize new tracker instance, two-dimenstional empty by default. + """Initialize new tracker instance, two-dimensional empty by default. If any of latter two arguments is None, it means the tracker state is invalid. Use reset method - to create empty tracker of constructed dimentionality. + to create empty tracker of constructed dimensionality. :param dimension: Number of scalar components of samples. :param log_sum_weight: Natural logarithm of sum of weights @@ -273,14 +269,13 @@ class VectorStatTracker(object): def __repr__(self): """Return string, which interpreted constructs state of self. - :returns: Expression contructing an equivalent instance. + :returns: Expression constructing an equivalent instance. :rtype: str """ - return ( - "VectorStatTracker(dimension={d!r},log_sum_weight={lsw!r}," - "averages={a!r},covariance_matrix={cm!r})".format( - d=self.dimension, lsw=self.log_sum_weight, a=self.averages, - cm=self.covariance_matrix)) + return f"VectorStatTracker(dimension={self.dimension!r}," \ + f"log_sum_weight={self.log_sum_weight!r}," \ + f"averages={self.averages!r}," \ + f"covariance_matrix={self.covariance_matrix!r})" def copy(self): """Return new instance with the same state as self. @@ -293,7 +288,8 @@ class VectorStatTracker(object): """ return VectorStatTracker( self.dimension, self.log_sum_weight, self.averages[:], - copy.deepcopy(self.covariance_matrix)) + copy.deepcopy(self.covariance_matrix) + ) def reset(self): """Return state set to empty data of proper dimensionality. @@ -303,8 +299,9 @@ class VectorStatTracker(object): """ self.averages = [0.0 for _ in range(self.dimension)] # TODO: Examine whether we can gain speed by tracking triangle only. - self.covariance_matrix = [[0.0 for _ in range(self.dimension)] - for _ in range(self.dimension)] + self.covariance_matrix = [ + [0.0 for _ in range(self.dimension)] for _ in range(self.dimension) + ] # TODO: In Python3, list comprehensions are generators, # so they are not indexable. Put list() when converting. return self @@ -338,10 +335,12 @@ class VectorStatTracker(object): old_log_sum_weight = self.log_sum_weight old_averages = self.averages if not old_averages: - shift = [0.0 for index in range(dimension)] + shift = [0.0 for _ in range(dimension)] else: - shift = [vector_value[index] - old_averages[index] - for index in range(dimension)] + shift = [ + vector_value[index] - old_averages[index] + for index in range(dimension) + ] if old_log_sum_weight is None: # First sample. self.log_sum_weight = log_weight @@ -352,8 +351,10 @@ class VectorStatTracker(object): new_log_sum_weight = log_plus(old_log_sum_weight, log_weight) data_ratio = math.exp(old_log_sum_weight - new_log_sum_weight) sample_ratio = math.exp(log_weight - new_log_sum_weight) - new_averages = [old_averages[index] + shift[index] * sample_ratio - for index in range(dimension)] + new_averages = [ + old_averages[index] + shift[index] * sample_ratio + for index in range(dimension) + ] # It is easier to update covariance matrix in-place. for second in range(dimension): for first in range(dimension): @@ -375,7 +376,7 @@ class VectorStatTracker(object): If the weight of the incoming sample is far bigger than the weight of all the previous data together, - convariance matrix would suffer from underflows. + covariance matrix would suffer from underflow. To avoid that, this method manipulates both weights before calling add(). |