micromegas_analytics/lakehouse/partition.rs
1use super::view::ViewMetadata;
2use crate::time::TimeRange;
3use chrono::{DateTime, Utc};
4
5/// Partition metadata (without embedded file_metadata for performance)
6/// Use load_partition_metadata() to load metadata on-demand when needed
7#[derive(Clone, Debug)]
8pub struct Partition {
9 /// Metadata about the view this partition belongs to.
10 pub view_metadata: ViewMetadata,
11 /// The insert time range for this partition.
12 pub insert_time_range: TimeRange,
13 /// The event time range for this partition. None for empty partitions.
14 pub event_time_range: Option<TimeRange>,
15 /// The last time this partition was updated.
16 pub updated: DateTime<Utc>,
17 /// The path to the Parquet file for this partition. None for empty partitions.
18 pub file_path: Option<String>,
19 /// The size of the Parquet file in bytes. 0 for empty partitions.
20 pub file_size: i64,
21 /// A hash of the source data that generated this partition.
22 pub source_data_hash: Vec<u8>,
23 /// The number of rows in this partition. 0 for empty partitions.
24 pub num_rows: i64,
25}
26
27impl Partition {
28 /// Returns true if this partition has no data (num_rows = 0).
29 pub fn is_empty(&self) -> bool {
30 self.num_rows == 0
31 }
32
33 /// Returns the min event time, if this partition has data.
34 pub fn min_event_time(&self) -> Option<DateTime<Utc>> {
35 self.event_time_range.as_ref().map(|r| r.begin)
36 }
37
38 /// Returns the max event time, if this partition has data.
39 pub fn max_event_time(&self) -> Option<DateTime<Utc>> {
40 self.event_time_range.as_ref().map(|r| r.end)
41 }
42
43 /// Returns the beginning of the insert time range.
44 pub fn begin_insert_time(&self) -> DateTime<Utc> {
45 self.insert_time_range.begin
46 }
47
48 /// Returns the end of the insert time range.
49 pub fn end_insert_time(&self) -> DateTime<Utc> {
50 self.insert_time_range.end
51 }
52
53 /// Validates partition invariants. Returns error if partition is inconsistent.
54 ///
55 /// Invariants:
56 /// - Non-empty partitions (num_rows > 0) MUST have both event_time_range and file_path
57 /// - Empty partitions (num_rows = 0) MUST NOT have event_time_range or file_path
58 /// - num_rows must not be negative
59 pub fn validate(&self) -> anyhow::Result<()> {
60 if self.num_rows > 0 {
61 // Non-empty partition must have event_time_range and file_path
62 if self.event_time_range.is_none() {
63 anyhow::bail!(
64 "non-empty partition (num_rows={}) has no event_time_range",
65 self.num_rows
66 );
67 }
68 if self.file_path.is_none() {
69 anyhow::bail!(
70 "non-empty partition (num_rows={}) has no file_path",
71 self.num_rows
72 );
73 }
74 } else if self.num_rows == 0 {
75 // Empty partition must NOT have event_time_range or file_path
76 if self.event_time_range.is_some() {
77 anyhow::bail!("empty partition has event_time_range");
78 }
79 if self.file_path.is_some() {
80 anyhow::bail!("empty partition has file_path");
81 }
82 } else {
83 anyhow::bail!("partition has negative num_rows: {}", self.num_rows);
84 }
85 Ok(())
86 }
87}