One thing to remember is that if the statistics map onto a negative number in linear space then it will be impossible to take the logarithm; therefore, these are invalid operations and you have to consider this before trying to retrieve any numbers from this program. I hope someone finds this useful!
package edu.jhu.clsp.ws11.rerank.utils; import java.util.Arrays; /** * This class returns distributional statistics given a list of numbers. The numbers are assumed to * be in logarithmic space, and all of the computation is done on numbers converted from log to linear * space; the results are returned again in log space. * @author Nate Glenn * */ public class LogDistributionalStats { private double[] numbers; private int N;//number of numbers input private double logN;//log(N) private double min; private double median; private double max; private double mean; private double avgAbsDeviation = 0; private double standardDeviation = 0; private double variance = 0; private double skew = 0; private double kurtosis = 0; private double sum; /** * Compute statistics on nums. If norm is true, then compute statistics after normalizing * the array, except for min, mean, and max. * */ public LogDistributionalStats(double[] nums, boolean norm){ N = nums.length; //must make new array so as to avoid overwriting the input. numbers = new double[N]; for(int i = 0; i < numbers.length; i++) numbers[i] = nums[i]; logN = Math.log(N); //compute sum, mean, min, and max before normalization (if done at all) sum = sumAsLinear(); mean = sum - logN; Arrays.sort(numbers); min = numbers[0]; max = numbers[N-1]; if(norm) ArrayUtils.minusAll(numbers,sum); double deviation; if(N > 1){ for(double d : numbers){ deviation = LogMath.linearDifference(mean, d); avgAbsDeviation = LogMath.addAsLinear(avgAbsDeviation, deviation); variance += deviation*2; skew += deviation*3; kurtosis += deviation*4; } variance -= Math.log(N-1); standardDeviation = variance/2; skew -= logN+variance+standardDeviation; //don't do negative 3 calculation here. kurtosis = kurtosis-(logN + 2*variance); } else{ for(double d : numbers){ deviation = LogMath.linearDifference(mean, d); avgAbsDeviation = LogMath.addAsLinear(avgAbsDeviation, deviation); } variance = Double.NaN; standardDeviation = Double.NaN; skew = Double.NaN; kurtosis = Double.NaN; } avgAbsDeviation -= logN; int mid = N/2; if(N % 2 == 0) median = LogMath.addAsLinear(numbers[mid-1], numbers[mid]) - Math.log(2); else median = numbers[mid]; } /** * * @param nums * @return Linear space sum of all numbers in nums */ private double sumAsLinear() { double total = 0; for(double d : numbers) total = LogMath.addAsLinear(total, d); return total; } public double getMin() { return min; } public double getMax() { return max; } public double getMean() { return mean; } public double getStandardDeviation() { return standardDeviation; } public double getVariance() { return variance; } public double getSkew() { return skew; } public double getSum() { return sum; } /** * Kurtosis is not calculated with any linear combinations (subtracting three) * This is because it is often impossible to convert this to log space, since * the final product is so often negative. If you want the minus three back again, you can * try to minus it yourself and handle any exceptions (use LogMath.minusAsLinear()). */ public double getKurtosis() { return kurtosis; } public double getMedian() { return median; } }
No comments:
Post a Comment