datavec/datavec-api/src/main/java/org/datavec/api/transform/analysis/counter/LongAnalysisCounter.java
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.datavec.api.transform.analysis.counter;
import com.tdunning.math.stats.TDigest;
import lombok.AllArgsConstructor;
import lombok.Data;
import org.datavec.api.transform.analysis.AnalysisCounter;
import org.datavec.api.writable.Writable;
@AllArgsConstructor
@Data
public class LongAnalysisCounter implements AnalysisCounter<LongAnalysisCounter> {
private StatCounter counter = new StatCounter();
private long countZero = 0;
private long countMinValue = 0;
private long countMaxValue = 0;
private long countPositive = 0;
private long countNegative = 0;
/**
* A histogram structure that will record a sketch of a distribution.
*
* The compression argument regulates how accuracy should be traded for size? A value of N here
* will give quantile errors almost always less than 3/N with considerably smaller errors expected
* for extreme quantiles. Conversely, you should expect to track about 5 N centroids for this
* accuracy.
*/
private TDigest digest = TDigest.createDigest(100);
public LongAnalysisCounter() {}
public long getMinValueSeen() {
return (long) counter.getMin();
}
public long getMaxValueSeen() {
return (long) counter.getMax();
}
public long getSum() {
return (long) counter.getSum();
}
public long getCountTotal() {
return counter.getCount();
}
public double getSampleStdev() {
return counter.getStddev(false);
}
public double getMean() {
return counter.getMean();
}
public double getSampleVariance() {
return counter.getVariance(false);
}
@Override
public LongAnalysisCounter add(Writable writable) {
long value = writable.toLong();
if (value == 0)
countZero++;
if (value == getMinValueSeen())
countMinValue++;
else if (value < getMinValueSeen()) {
countMinValue = 1;
}
if (value == getMaxValueSeen())
countMaxValue++;
else if (value > getMaxValueSeen()) {
countMaxValue = 1;
}
if (value >= 0) {
countPositive++;
} else {
countNegative++;
}
digest.add((double) value);
counter.add((double) value);
return this;
}
public LongAnalysisCounter merge(LongAnalysisCounter other) {
long otherMin = other.getMinValueSeen();
long newCountMinValue;
if (getMinValueSeen() == otherMin) {
newCountMinValue = countMinValue + other.getCountMinValue();
} else if (getMinValueSeen() > otherMin) {
//Keep other, take count from other
newCountMinValue = other.getCountMinValue();
} else {
//Keep this min, no change to count
newCountMinValue = countMinValue;
}
long otherMax = other.getMaxValueSeen();
long newCountMaxValue;
if (getMaxValueSeen() == otherMax) {
newCountMaxValue = countMaxValue + other.getCountMaxValue();
} else if (getMaxValueSeen() < otherMax) {
//Keep other, take count from other
newCountMaxValue = other.getCountMaxValue();
} else {
//Keep this max, no change to count
newCountMaxValue = countMaxValue;
}
digest.add(other.getDigest());
return new LongAnalysisCounter(counter.merge(other.getCounter()), countZero + other.getCountZero(),
newCountMinValue, newCountMaxValue, countPositive + other.getCountPositive(),
countNegative + other.getCountNegative(), digest);
}
}