whylabs/whylogs-python

View on GitHub
proto/v0/v0_messages.proto

Summary

Maintainability
Test Coverage
syntax = "proto3";

import "google/protobuf/wrappers.proto";

option java_package = "com.whylogs.v0.core.message";
option java_outer_classname = "Messages";
option java_multiple_files = true;

message CountersV0 {
  int64 count = 1;

  google.protobuf.Int64Value true_count = 2;
  google.protobuf.Int64Value null_count = 3 [deprecated = true];
}

message InferredType {
  enum Type {
    UNKNOWN = 0;
    NULL = 1;
    FRACTIONAL = 2;
    INTEGRAL = 3;
    BOOLEAN = 4;
    STRING = 5;
  }

  Type type = 1;
  double ratio = 2;
}

message DoublesMessage {
  int64 count = 1;
  double min = 2;
  double max = 3;
  double sum = 4;
}

message LongsMessage {
  int64 count = 1;
  int64 min = 2;
  int64 max = 3;
  int64 sum = 4;
}

message VarianceMessage {
  int64 count = 1;
  double sum = 2; // sample variance * (n-1)
  double mean = 3;
}

message FrequentNumbersSketchMessage {
  bytes sketch = 1;
  int32 lg_max_k = 2;
}

message FrequentItemsSketchMessageV0 {
  bytes sketch = 1;
  int32 lg_max_k = 2;
}

message NumbersMessageV0 {
  VarianceMessage variance = 1;
  oneof numbers {
    DoublesMessage doubles = 2;
    LongsMessage longs = 3;
  }

  // sketches
  bytes histogram = 4;
  bytes theta = 5;
  bytes compact_theta = 6;
  FrequentNumbersSketchMessage frequent_numbers = 7;
}

message CharPosMessage {
  string char_list = 1;
  map<string, NumbersMessageV0> char_pos_map = 2;
}

message StringsMessageV0 {
  int64 count = 1;

  // sketches
  bytes theta = 2;
  bytes items = 3;
  bytes compact_theta = 4;
  NumbersMessageV0 length = 5;
  NumbersMessageV0 token_length = 6;
  CharPosMessage char_pos_tracker = 7;

}


message SchemaMessageV0 {
  map<int32, int64> typeCounts = 1;
  InferredType inferred_type = 2;
}

message ColumnMessageV0 {
  string name = 1;
  CountersV0 counters = 2;
  SchemaMessageV0 schema = 3;
  NumbersMessageV0 numbers = 4;
  StringsMessageV0 strings = 5;
  InferredType inferred_type = 6;
  FrequentItemsSketchMessageV0 frequent_items = 7;
  HllSketchMessageV0 cardinality_tracker = 8;
}

message DatasetPropertiesV0 {
  uint32 schema_major_version = 1;
  uint32 schema_minor_version = 2;

  string session_id = 3;
  int64 session_timestamp = 4;
  int64 data_timestamp = 5;
  map<string, string> tags = 6;
  map<string, string> metadata = 7;
  // TODO: store other configuration here
}

message ScoreMatrixMessage {
  repeated string labels = 1;
  string prediction_field = 2;
  string target_field = 3;
  string score_field = 4;

  // a flattened NxN matrix (N = len(labels))
  repeated NumbersMessageV0 scores = 10;
}

message RegressionMetricsMessage{
  string prediction_field = 1;
  string target_field = 2;
  uint64 count = 3;
  double sum_abs_diff = 4;
  double sum_diff = 5;
  double sum2_diff = 6;
}

enum ModelType {
  UNKNOWN = 0;
  CLASSIFICATION = 1;
  REGRESSION = 2;
  EMBEDDINGS = 3;
  NLP= 4;
}

message ModelMetricsMessage {
  ScoreMatrixMessage scoreMatrix = 1;
  ModelType modelType = 2;
  RegressionMetricsMessage regressionMetrics = 3;
}

message ModelProfileMessage {
  repeated string output_fields = 1;
  // Reserving fields for ModelMessage

  ModelMetricsMessage metrics = 10;
}

message DatasetProfileMessageV0 {
  DatasetPropertiesV0 properties = 1;
  map<string, ColumnMessageV0> columns = 2;
  // reserve other fields for dataset level data
  ModelProfileMessage modeProfile = 10;
}

/**
 * The follow section is for transmission and reconstruction of the dataset
 * in WhyLogs backend
 */
message ColumnsChunkSegment {
  // UUID is required to aggregate to the original message
  // This should map back to the original dataset
  string marker = 1;
  repeated ColumnMessageV0 columns = 2;
}

message DatasetMetadataSegment {
  string marker = 1;
  DatasetPropertiesV0 properties = 2;
}

// A segment of a dataset profile. This can be used to composed the
// original object back
message MessageSegment {
  string marker = 1;
  oneof item {
    DatasetMetadataSegment metadata = 2;
    ColumnsChunkSegment columns = 3;
  }
}

message HllSketchMessageV0 {
  bytes sketch = 1;
  int32 lg_k = 2;
}

message KllFloatsSketchMessage {
  bytes sketch = 1;
  int32 lg_k = 2;
}