Skip to content

Commit e18a51b

Browse files
committed
fix c/python statistic
1 parent 6bfe1ef commit e18a51b

File tree

8 files changed

+467
-252
lines changed

8 files changed

+467
-252
lines changed

cpp/src/cwrapper/tsfile_cwrapper.cc

Lines changed: 168 additions & 93 deletions
Large diffs are not rendered by default.

cpp/src/cwrapper/tsfile_cwrapper.h

Lines changed: 67 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -105,49 +105,86 @@ typedef struct device_schema {
105105
} DeviceSchema;
106106

107107
/**
108-
* @brief Aggregated statistic for one timeseries (subset of C++ Statistic).
108+
* @brief Common header for all statistic variants (first member of each
109+
* TsFile*Statistic struct; also aliases the start of TimeseriesStatistic::u).
109110
*
110-
* String pointers str_* are allocated with malloc; freed by
111-
* tsfile_free_device_timeseries_metadata_map (do not free individually).
111+
* When @p has_statistic is false, @p type is undefined. Otherwise @p type
112+
* selects which @ref TimeseriesStatisticUnion member is active (INT32/DATE/
113+
* INT64/TIMESTAMP share @c int_s). @c sum exists only on @c bool_s, @c int_s,
114+
* and @c float_s. Heap strings in string_s/text_s are
115+
* freed by tsfile_free_device_timeseries_metadata_map only.
112116
*/
113-
typedef struct TimeseriesStatistic {
117+
typedef struct TsFileStatisticBase {
114118
bool has_statistic;
119+
TSDataType type;
115120
int32_t row_count;
116121
int64_t start_time;
117122
int64_t end_time;
118-
/** True when @p sum is meaningful (numeric / boolean aggregate types). */
119-
bool sum_valid;
120-
/** Sum when sum_valid; boolean uses sum of true as int-like aggregate. */
123+
} TsFileStatisticBase;
124+
125+
typedef struct TsFileBoolStatistic {
126+
TsFileStatisticBase base;
121127
double sum;
128+
bool first_bool;
129+
bool last_bool;
130+
} TsFileBoolStatistic;
122131

123-
/** INT32, DATE, INT64, TIMESTAMP: min/max/first/last in int64_t form. */
124-
bool int_range_valid;
132+
typedef struct TsFileIntStatistic {
133+
TsFileStatisticBase base;
134+
double sum;
125135
int64_t min_int64;
126136
int64_t max_int64;
127137
int64_t first_int64;
128138
int64_t last_int64;
139+
} TsFileIntStatistic;
129140

130-
/** FLOAT, DOUBLE: min/max/first/last. */
131-
bool float_range_valid;
141+
typedef struct TsFileFloatStatistic {
142+
TsFileStatisticBase base;
143+
double sum;
132144
double min_float64;
133145
double max_float64;
134146
double first_float64;
135147
double last_float64;
148+
} TsFileFloatStatistic;
136149

137-
/** BOOLEAN: first/last sample values. */
138-
bool bool_ext_valid;
139-
bool first_bool;
140-
bool last_bool;
141-
142-
/** STRING: min/max lexicographic; TEXT: first/last only (min/max unused).
143-
*/
144-
bool str_ext_valid;
150+
typedef struct TsFileStringStatistic {
151+
TsFileStatisticBase base;
145152
char* str_min;
146153
char* str_max;
147154
char* str_first;
148155
char* str_last;
156+
} TsFileStringStatistic;
157+
158+
typedef struct TsFileTextStatistic {
159+
TsFileStatisticBase base;
160+
char* str_first;
161+
char* str_last;
162+
} TsFileTextStatistic;
163+
164+
/**
165+
* @brief One of the typed layouts; active member follows @c base.type.
166+
*/
167+
typedef union TimeseriesStatisticUnion {
168+
TsFileBoolStatistic bool_s;
169+
TsFileIntStatistic int_s;
170+
TsFileFloatStatistic float_s;
171+
TsFileStringStatistic string_s;
172+
TsFileTextStatistic text_s;
173+
} TimeseriesStatisticUnion;
174+
175+
/**
176+
* @brief Aggregated statistic for one timeseries (subset of C++ Statistic).
177+
*
178+
* Read common fields via @c tsfile_statistic_base(s). Type-specific fields
179+
* via @c s->u.int_s, @c s->u.float_s, etc., per @c base.type.
180+
*/
181+
typedef struct TimeseriesStatistic {
182+
TimeseriesStatisticUnion u;
149183
} TimeseriesStatistic;
150184

185+
/** Pointer to the common header at the start of @p s->u (any active arm). */
186+
#define tsfile_statistic_base(s) ((TsFileStatisticBase*)&(s)->u)
187+
151188
/**
152189
* @brief One measurement's metadata as exposed to C.
153190
*/
@@ -161,24 +198,24 @@ typedef struct TimeseriesMetadata {
161198
/**
162199
* @brief Device identity from IDeviceID (path, table name, segments).
163200
*
164-
* Heap fields are freed by tsfile_device_details_free_contents or
165-
* tsfile_free_device_details_array, or as part of
201+
* Heap fields are freed by tsfile_device_id_free_contents or
202+
* tsfile_free_device_id_array, or as part of
166203
* tsfile_free_device_timeseries_metadata_map for entries.
167204
*/
168-
typedef struct TsDeviceDetails {
205+
typedef struct DeviceID {
169206
char* path;
170207
char* table_name;
171208
uint32_t segment_count;
172209
char** segments;
173-
} TsDeviceDetails;
210+
} DeviceID;
174211

175212
/**
176-
* @brief One device's timeseries metadata list plus TsDeviceDetails.
213+
* @brief One device's timeseries metadata list plus DeviceID.
177214
*
178215
* @p device heap fields freed by tsfile_free_device_timeseries_metadata_map.
179216
*/
180217
typedef struct DeviceTimeseriesMetadataEntry {
181-
TsDeviceDetails device;
218+
DeviceID device;
182219
TimeseriesMetadata* timeseries;
183220
uint32_t timeseries_count;
184221
} DeviceTimeseriesMetadataEntry;
@@ -193,7 +230,7 @@ typedef struct DeviceTimeseriesMetadataMap {
193230
} DeviceTimeseriesMetadataMap;
194231

195232
/** Frees path, table_name, and segments inside @p d; zeros @p d. */
196-
void tsfile_device_details_free_contents(TsDeviceDetails* d);
233+
void tsfile_device_id_free_contents(DeviceID* d);
197234

198235
typedef struct result_set_meta_data {
199236
char** column_names;
@@ -411,14 +448,13 @@ ERRNO tsfile_reader_close(TsFileReader reader);
411448
* @brief Lists all devices (path, table name, segments from IDeviceID).
412449
*
413450
* @param out_devices [out] Allocated array; caller frees with
414-
* tsfile_free_device_details_array.
451+
* tsfile_free_device_id_array.
415452
*/
416453
ERRNO tsfile_reader_get_all_devices(TsFileReader reader,
417-
TsDeviceDetails** out_devices,
454+
DeviceID** out_devices,
418455
uint32_t* out_length);
419456

420-
void tsfile_free_device_details_array(TsDeviceDetails* details,
421-
uint32_t length);
457+
void tsfile_free_device_id_array(DeviceID* devices, uint32_t length);
422458

423459
/**
424460
* @brief Timeseries metadata for all devices in the file.
@@ -434,7 +470,7 @@ ERRNO tsfile_reader_get_timeseries_metadata_all(
434470
* For each entry, @p path must be non-NULL (canonical device path).
435471
*/
436472
ERRNO tsfile_reader_get_timeseries_metadata_for_devices(
437-
TsFileReader reader, const TsDeviceDetails* devices, uint32_t length,
473+
TsFileReader reader, const DeviceID* devices, uint32_t length,
438474
DeviceTimeseriesMetadataMap* out_map);
439475

440476
void tsfile_free_device_timeseries_metadata_map(

cpp/test/cwrapper/cwrapper_metadata_test.cc

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) {
6565
ASSERT_EQ(RET_OK, code);
6666
ASSERT_NE(nullptr, reader);
6767

68-
TsDeviceDetails* details = nullptr;
68+
DeviceID* details = nullptr;
6969
uint32_t n_det = 0;
7070
ASSERT_EQ(RET_OK, tsfile_reader_get_all_devices(reader, &details, &n_det));
7171
ASSERT_EQ(1u, n_det);
@@ -77,7 +77,7 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) {
7777
ASSERT_NE(nullptr, details[0].segments);
7878
EXPECT_STREQ("root.sg", details[0].segments[0]);
7979
EXPECT_STREQ("d1", details[0].segments[1]);
80-
tsfile_free_device_details_array(details, n_det);
80+
tsfile_free_device_id_array(details, n_det);
8181

8282
DeviceTimeseriesMetadataMap map{};
8383
ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map));
@@ -95,17 +95,17 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) {
9595
TimeseriesMetadata& tm = map.entries[0].timeseries[0];
9696
ASSERT_STREQ(m_int, tm.measurement_name);
9797
ASSERT_EQ(TS_DATATYPE_INT32, tm.data_type);
98-
ASSERT_TRUE(tm.statistic.has_statistic);
99-
EXPECT_EQ(3, tm.statistic.row_count);
100-
EXPECT_EQ(1, tm.statistic.start_time);
101-
EXPECT_EQ(3, tm.statistic.end_time);
102-
ASSERT_TRUE(tm.statistic.sum_valid);
103-
EXPECT_DOUBLE_EQ(60.0, tm.statistic.sum);
104-
ASSERT_TRUE(tm.statistic.int_range_valid);
105-
EXPECT_EQ(10, tm.statistic.min_int64);
106-
EXPECT_EQ(30, tm.statistic.max_int64);
107-
EXPECT_EQ(10, tm.statistic.first_int64);
108-
EXPECT_EQ(30, tm.statistic.last_int64);
98+
TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic);
99+
ASSERT_TRUE(sb->has_statistic);
100+
EXPECT_EQ(3, sb->row_count);
101+
EXPECT_EQ(1, sb->start_time);
102+
EXPECT_EQ(3, sb->end_time);
103+
EXPECT_DOUBLE_EQ(60.0, tm.statistic.u.int_s.sum);
104+
ASSERT_EQ(TS_DATATYPE_INT32, sb->type);
105+
EXPECT_EQ(10, tm.statistic.u.int_s.min_int64);
106+
EXPECT_EQ(30, tm.statistic.u.int_s.max_int64);
107+
EXPECT_EQ(10, tm.statistic.u.int_s.first_int64);
108+
EXPECT_EQ(30, tm.statistic.u.int_s.last_int64);
109109

110110
tsfile_free_device_timeseries_metadata_map(&map);
111111

@@ -115,7 +115,7 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) {
115115
EXPECT_EQ(0u, empty.device_count);
116116
EXPECT_EQ(nullptr, empty.entries);
117117

118-
TsDeviceDetails q{};
118+
DeviceID q{};
119119
q.path = const_cast<char*>(device);
120120
q.table_name = nullptr;
121121
q.segment_count = 0;
@@ -168,12 +168,12 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataBooleanStatistic) {
168168
TimeseriesMetadata& tm = map.entries[0].timeseries[0];
169169
ASSERT_STREQ(m_b, tm.measurement_name);
170170
ASSERT_EQ(TS_DATATYPE_BOOLEAN, tm.data_type);
171-
ASSERT_TRUE(tm.statistic.has_statistic);
172-
ASSERT_TRUE(tm.statistic.sum_valid);
173-
EXPECT_DOUBLE_EQ(2.0, tm.statistic.sum);
174-
ASSERT_TRUE(tm.statistic.bool_ext_valid);
175-
EXPECT_TRUE(tm.statistic.first_bool);
176-
EXPECT_TRUE(tm.statistic.last_bool);
171+
TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic);
172+
ASSERT_TRUE(sb->has_statistic);
173+
EXPECT_DOUBLE_EQ(2.0, tm.statistic.u.bool_s.sum);
174+
ASSERT_EQ(TS_DATATYPE_BOOLEAN, sb->type);
175+
EXPECT_TRUE(tm.statistic.u.bool_s.first_bool);
176+
EXPECT_TRUE(tm.statistic.u.bool_s.last_bool);
177177

178178
tsfile_free_device_timeseries_metadata_map(&map);
179179
ASSERT_EQ(RET_OK, tsfile_reader_close(reader));
@@ -220,16 +220,17 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataStringStatistic) {
220220
TimeseriesMetadata& tm = map.entries[0].timeseries[0];
221221
ASSERT_STREQ(m_str, tm.measurement_name);
222222
ASSERT_EQ(TS_DATATYPE_STRING, tm.data_type);
223-
ASSERT_TRUE(tm.statistic.has_statistic);
224-
ASSERT_TRUE(tm.statistic.str_ext_valid);
225-
ASSERT_NE(nullptr, tm.statistic.str_min);
226-
ASSERT_NE(nullptr, tm.statistic.str_max);
227-
ASSERT_NE(nullptr, tm.statistic.str_first);
228-
ASSERT_NE(nullptr, tm.statistic.str_last);
229-
EXPECT_STREQ("aa", tm.statistic.str_min);
230-
EXPECT_STREQ("cc", tm.statistic.str_max);
231-
EXPECT_STREQ("aa", tm.statistic.str_first);
232-
EXPECT_STREQ("bb", tm.statistic.str_last);
223+
TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic);
224+
ASSERT_TRUE(sb->has_statistic);
225+
ASSERT_EQ(TS_DATATYPE_STRING, sb->type);
226+
ASSERT_NE(nullptr, tm.statistic.u.string_s.str_min);
227+
ASSERT_NE(nullptr, tm.statistic.u.string_s.str_max);
228+
ASSERT_NE(nullptr, tm.statistic.u.string_s.str_first);
229+
ASSERT_NE(nullptr, tm.statistic.u.string_s.str_last);
230+
EXPECT_STREQ("aa", tm.statistic.u.string_s.str_min);
231+
EXPECT_STREQ("cc", tm.statistic.u.string_s.str_max);
232+
EXPECT_STREQ("aa", tm.statistic.u.string_s.str_first);
233+
EXPECT_STREQ("bb", tm.statistic.u.string_s.str_last);
233234

234235
tsfile_free_device_timeseries_metadata_map(&map);
235236
ASSERT_EQ(RET_OK, tsfile_reader_close(reader));
@@ -250,7 +251,7 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataNullDevicePath) {
250251
TsFileReader reader = tsfile_reader_new(filename, &code);
251252
ASSERT_EQ(RET_OK, code);
252253

253-
TsDeviceDetails bad{};
254+
DeviceID bad{};
254255
bad.path = nullptr;
255256
bad.table_name = nullptr;
256257
bad.segment_count = 0;

python/tests/test_reader_metadata.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@
2222

2323
from tsfile import Field, RowRecord, TimeseriesSchema, TsFileReader, TsFileWriter
2424
from tsfile import TSDataType
25-
from tsfile.schema import DeviceDetails
25+
from tsfile.schema import (
26+
BoolTimeseriesStatistic,
27+
DeviceID,
28+
IntTimeseriesStatistic,
29+
StringTimeseriesStatistic,
30+
)
2631

2732

2833
def test_get_all_devices_segments():
@@ -100,21 +105,20 @@ def test_get_all_devices_and_timeseries_metadata_statistic():
100105
assert m.measurement_name == "m_int"
101106
assert m.data_type == TSDataType.INT32
102107
st = m.statistic
108+
assert isinstance(st, IntTimeseriesStatistic)
103109
assert st.has_statistic
104110
assert st.row_count == 3
105111
assert st.start_time == 1
106112
assert st.end_time == 3
107-
assert st.sum_valid
108113
assert st.sum == pytest.approx(60.0)
109-
assert st.int_range_valid
110114
assert st.min_int64 == 10
111115
assert st.max_int64 == 30
112116
assert st.first_int64 == 10
113117
assert st.last_int64 == 30
114118

115119
assert reader.get_timeseries_metadata([]) == {}
116120

117-
sub = reader.get_timeseries_metadata([DeviceDetails(device, "", ())])
121+
sub = reader.get_timeseries_metadata([DeviceID(device, None, ())])
118122
assert device in sub
119123
assert len(sub[device].timeseries) == 1
120124

@@ -153,10 +157,9 @@ def test_get_timeseries_metadata_boolean_statistic():
153157
try:
154158
meta_all = reader.get_timeseries_metadata(None)
155159
st = meta_all[device].timeseries[0].statistic
160+
assert isinstance(st, BoolTimeseriesStatistic)
156161
assert st.has_statistic
157-
assert st.sum_valid
158162
assert st.sum == pytest.approx(2.0)
159-
assert st.bool_ext_valid
160163
assert st.first_bool is True
161164
assert st.last_bool is True
162165
finally:
@@ -195,8 +198,8 @@ def test_get_timeseries_metadata_string_statistic():
195198
assert m.measurement_name == "m_str"
196199
assert m.data_type == TSDataType.STRING
197200
st = m.statistic
201+
assert isinstance(st, StringTimeseriesStatistic)
198202
assert st.has_statistic
199-
assert st.str_ext_valid
200203
assert st.str_min == "aa"
201204
assert st.str_max == "cc"
202205
assert st.str_first == "aa"

0 commit comments

Comments
 (0)