micromegas_analytics/lakehouse/
partition.rs

1use super::view::ViewMetadata;
2use crate::time::TimeRange;
3use chrono::{DateTime, Utc};
4
5/// Partition metadata (without embedded file_metadata for performance)
6/// Use load_partition_metadata() to load metadata on-demand when needed
7#[derive(Clone, Debug)]
8pub struct Partition {
9    /// Metadata about the view this partition belongs to.
10    pub view_metadata: ViewMetadata,
11    /// The insert time range for this partition.
12    pub insert_time_range: TimeRange,
13    /// The event time range for this partition. None for empty partitions.
14    pub event_time_range: Option<TimeRange>,
15    /// The last time this partition was updated.
16    pub updated: DateTime<Utc>,
17    /// The path to the Parquet file for this partition. None for empty partitions.
18    pub file_path: Option<String>,
19    /// The size of the Parquet file in bytes. 0 for empty partitions.
20    pub file_size: i64,
21    /// A hash of the source data that generated this partition.
22    pub source_data_hash: Vec<u8>,
23    /// The number of rows in this partition. 0 for empty partitions.
24    pub num_rows: i64,
25}
26
27impl Partition {
28    /// Returns true if this partition has no data (num_rows = 0).
29    pub fn is_empty(&self) -> bool {
30        self.num_rows == 0
31    }
32
33    /// Returns the min event time, if this partition has data.
34    pub fn min_event_time(&self) -> Option<DateTime<Utc>> {
35        self.event_time_range.as_ref().map(|r| r.begin)
36    }
37
38    /// Returns the max event time, if this partition has data.
39    pub fn max_event_time(&self) -> Option<DateTime<Utc>> {
40        self.event_time_range.as_ref().map(|r| r.end)
41    }
42
43    /// Returns the beginning of the insert time range.
44    pub fn begin_insert_time(&self) -> DateTime<Utc> {
45        self.insert_time_range.begin
46    }
47
48    /// Returns the end of the insert time range.
49    pub fn end_insert_time(&self) -> DateTime<Utc> {
50        self.insert_time_range.end
51    }
52
53    /// Validates partition invariants. Returns error if partition is inconsistent.
54    ///
55    /// Invariants:
56    /// - Non-empty partitions (num_rows > 0) MUST have both event_time_range and file_path
57    /// - Empty partitions (num_rows = 0) MUST NOT have event_time_range or file_path
58    /// - num_rows must not be negative
59    pub fn validate(&self) -> anyhow::Result<()> {
60        if self.num_rows > 0 {
61            // Non-empty partition must have event_time_range and file_path
62            if self.event_time_range.is_none() {
63                anyhow::bail!(
64                    "non-empty partition (num_rows={}) has no event_time_range",
65                    self.num_rows
66                );
67            }
68            if self.file_path.is_none() {
69                anyhow::bail!(
70                    "non-empty partition (num_rows={}) has no file_path",
71                    self.num_rows
72                );
73            }
74        } else if self.num_rows == 0 {
75            // Empty partition must NOT have event_time_range or file_path
76            if self.event_time_range.is_some() {
77                anyhow::bail!("empty partition has event_time_range");
78            }
79            if self.file_path.is_some() {
80                anyhow::bail!("empty partition has file_path");
81            }
82        } else {
83            anyhow::bail!("partition has negative num_rows: {}", self.num_rows);
84        }
85        Ok(())
86    }
87}