Skip to content

Commit 1604a4d

Browse files
authored
feat: Upgrade clp submodule to y-scope/clp@67276c0; Add support for FormattedFloat and DictionaryFloat. (#37)
1 parent 12521a1 commit 1604a4d

8 files changed

Lines changed: 439 additions & 11 deletions

File tree

CMake/resolve_dependency_modules/clp.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ include_guard(GLOBAL)
1616
FetchContent_Declare(
1717
clp
1818
GIT_REPOSITORY https://github.com/y-scope/clp.git
19-
GIT_TAG bfd4f60ffe9c5d69618cc8416ec6729c76ee9862)
19+
GIT_TAG 67276c09acbd48dd502454288f40072c44628726)
2020

2121
set(CLP_BUILD_CLP_REGEX_UTILS
2222
OFF

CMake/resolve_dependency_modules/ystdlib_cpp.cmake renamed to CMake/resolve_dependency_modules/ystdlib.cmake

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,23 @@
1313
# limitations under the License.
1414
include_guard(GLOBAL)
1515

16+
set(VELOX_YSTDLIB_BUILD_VERSION 9ed78cd)
17+
set(VELOX_YSTDLIB_BUILD_SHA256_CHECKSUM
18+
65990dc2bcc4a355c2181bfe31a7800f492309d1bcd340f52a34e85047e61bc8)
19+
set(VELOX_YSTDLIB_SOURCE_URL
20+
"https://github.com/y-scope/ystdlib-cpp/archive/${VELOX_YSTDLIB_BUILD_VERSION}.tar.gz"
21+
)
22+
23+
velox_resolve_dependency_url(YSTDLIB)
24+
25+
message(STATUS "Building ystdlib from source")
26+
1627
FetchContent_Declare(
17-
ystdlib_cpp
18-
GIT_REPOSITORY https://github.com/y-scope/ystdlib-cpp.git
19-
GIT_TAG 0ae886c6a7ee706a3c6e1950262b63d72f71fe63)
28+
ystdlib
29+
URL ${VELOX_YSTDLIB_SOURCE_URL}
30+
URL_HASH ${VELOX_YSTDLIB_BUILD_SHA256_CHECKSUM}
31+
OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM)
2032

21-
FetchContent_Populate(ystdlib_cpp)
33+
set(ystdlib_BUILD_TESTING OFF)
2234

23-
set(CLP_YSTDLIB_SOURCE_DIRECTORY "${ystdlib_cpp_SOURCE_DIR}")
24-
include_directories(${ystdlib_cpp_SOURCE_DIR}/src)
35+
FetchContent_MakeAvailable(ystdlib)

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -544,8 +544,8 @@ if(${VELOX_ENABLE_CLP_CONNECTOR})
544544
velox_set_source(log_surgeon)
545545
velox_resolve_dependency(log_surgeon)
546546

547-
set(ystdlib_cpp_SOURCE BUNDLED)
548-
velox_resolve_dependency(ystdlib_cpp)
547+
velox_set_source(ystdlib)
548+
velox_resolve_dependency(ystdlib)
549549

550550
set(clp_SOURCE BUNDLED)
551551
velox_resolve_dependency(clp)

velox/connectors/clp/search_lib/ClpTimestampsUtils.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,13 @@ inline auto convertToVeloxTimestamp(double timestamp) -> Timestamp {
7979
break;
8080
}
8181
double seconds{std::floor(timestamp)};
82-
double nanoseconds{(timestamp - seconds) * Timestamp::kNanosInSecond};
82+
// Due to IEEE 754 rounding, we drop nanosecond precision to ensure
83+
// correctness
84+
double microseconds{(timestamp - seconds) * Timestamp::kMicrosecondsInSecond};
8385
return Timestamp(
84-
static_cast<int64_t>(seconds), static_cast<uint64_t>(nanoseconds));
86+
static_cast<int64_t>(seconds),
87+
static_cast<int64_t>(std::round(microseconds)) *
88+
Timestamp::kNanosecondsInMicrosecond);
8589
}
8690

8791
/// Converts an integer value into a Velox timestamp.

velox/connectors/clp/search_lib/archive/ClpArchiveVectorLoader.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ void ClpArchiveVectorLoader::populateTimestampData(
6969
bool supportedNodeType{false};
7070
switch (Type) {
7171
case clp_s::NodeType::Float:
72+
case clp_s::NodeType::FormattedFloat:
73+
case clp_s::NodeType::DictionaryFloat:
7274
case clp_s::NodeType::Integer:
7375
case clp_s::NodeType::DateString:
7476
supportedNodeType = true;
@@ -92,6 +94,20 @@ void ClpArchiveVectorLoader::populateTimestampData(
9294
vectorIndex,
9395
convertToVeloxTimestamp(
9496
std::get<double>(reader->extract_value(messageIndex))));
97+
} else if (clp_s::NodeType::FormattedFloat == Type) {
98+
auto reader =
99+
static_cast<clp_s::FormattedFloatColumnReader*>(columnReader_);
100+
vector->set(
101+
vectorIndex,
102+
convertToVeloxTimestamp(
103+
std::get<double>(reader->extract_value(messageIndex))));
104+
} else if (clp_s::NodeType::DictionaryFloat == Type) {
105+
auto reader =
106+
static_cast<clp_s::DictionaryFloatColumnReader*>(columnReader_);
107+
vector->set(
108+
vectorIndex,
109+
convertToVeloxTimestamp(
110+
std::get<double>(reader->extract_value(messageIndex))));
95111
} else if (clp_s::NodeType::Integer == Type) {
96112
auto reader = static_cast<clp_s::Int64ColumnReader*>(columnReader_);
97113
vector->set(
@@ -204,6 +220,16 @@ void ClpArchiveVectorLoader::loadInternal(
204220
} else if (
205221
nullptr != dynamic_cast<clp_s::FloatColumnReader*>(columnReader_)) {
206222
populateTimestampData<clp_s::NodeType::Float>(rows, timestampVector);
223+
} else if (
224+
nullptr !=
225+
dynamic_cast<clp_s::FormattedFloatColumnReader*>(columnReader_)) {
226+
populateTimestampData<clp_s::NodeType::FormattedFloat>(
227+
rows, timestampVector);
228+
} else if (
229+
nullptr !=
230+
dynamic_cast<clp_s::DictionaryFloatColumnReader*>(columnReader_)) {
231+
populateTimestampData<clp_s::NodeType::DictionaryFloat>(
232+
rows, timestampVector);
207233
} else {
208234
populateTimestampData<clp_s::NodeType::Unknown>(rows, timestampVector);
209235
}

0 commit comments

Comments
 (0)