whylabs/whylogs-python

View on GitHub
proto/src/whylogs_messages.proto

Summary

Maintainability
Test Coverage
syntax = "proto3";
import "google/protobuf/any.proto";

import "google/protobuf/struct.proto";

option java_package = "com.whylogs.core.message";
option java_outer_classname = "Messages";
option java_multiple_files = true;

message DataType {
  enum Type {
    UNKNOWN = 0;
    NULL = 1;
    FRACTIONAL = 2;
    INTEGRAL = 3;
    BOOLEAN = 4;
    STRING = 5;
  }

  Type type = 1;
}

message HllSketchMessage {
  bytes sketch = 1;
}

message FrequentItemsSketchMessage {
  bytes sketch = 1;
}

message KllSketchMessage {
  bytes sketch = 1;
}

message CpcSketchMessage {
  bytes sketch = 1;
}

message MetricComponentMessage {
  uint32 type_id = 1; // type ID. It might determine how a field gets serder and the aggregation logic

  oneof value {
    // first class citizens
    int64 n = 2;
    double d = 3;
    FrequentItemsSketchMessage frequent_items = 4;
    HllSketchMessage hll = 5;
    KllSketchMessage kll = 6;
    CpcSketchMessage cpc = 7;

    // extension points
    bytes serialized_bytes = 10;
    google.protobuf.Struct dataclass_param = 11;
    google.protobuf.Any msg = 12;
  }
}

message MetricMessage {
  map<string, MetricComponentMessage> metric_components = 1;
}

message ColumnMessage {
  map<string, MetricComponentMessage> metric_components = 1;
}

message DatasetProperties {
  uint32 schema_major_version = 1;
  uint32 schema_minor_version = 2;

  uint64 creation_timestamp = 4;
  uint64 dataset_timestamp = 5;
  map<string, string> tags = 6;
  map<string, string> metadata = 7;
}

message ChunkOffsets {
  repeated uint64 offsets = 1;
}

message ChunkMessage {
  map<uint32, MetricComponentMessage> metric_components = 1;
}

message ChunkHeader {
  enum ChunkType {
    DATASET = 0;
    COLUMN = 2;
  }
  ChunkType type = 1;
  string tag = 2;
  uint32 length = 3;
}

message DatasetProfileHeader {
  DatasetProperties properties = 1;
  map<string, ChunkOffsets> column_offsets = 2;
  repeated ChunkOffsets metric_offsets = 3;
  uint64 length = 4; // total length of the rest of the data for this profile.
  map<uint32, string> indexed_metric_paths = 5;
}

message SegmentTag {
  string key = 1;
  string value = 2;
}

message Segment {
  repeated SegmentTag tags = 1;
  string partition_id = 2;
}

message SegmentationPartition {
  string name = 1;
  string id = 2;
}

message DatasetSegmentHeader {
  // whether the file contains segmented data or not
  bool has_segments = 1;

  // list of segments
  repeated Segment segments = 4;

  // we want the indices of the map of offsets to have static size, and thus we use fixed32 bytes
  map<uint32, uint64> offsets = 5;
}