-
-
Save paleolimbot/ec2a2067198f0de1901c107c783d3b26 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Licensed to the Apache Software Foundation (ASF) under one | |
// or more contributor license agreements. See the NOTICE file | |
// distributed with this work for additional information | |
// regarding copyright ownership. The ASF licenses this file | |
// to you under the Apache License, Version 2.0 (the | |
// "License"); you may not use this file except in compliance | |
// with the License. You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, | |
// software distributed under the License is distributed on an | |
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
// KIND, either express or implied. See the License for the | |
// specific language governing permissions and limitations | |
// under the License. | |
#include <cerrno> | |
#include <deque> | |
#include <iostream> | |
#include <string> | |
#include <unordered_map> | |
#include <utility> | |
#include "nanoarrow/nanoarrow_ipc.hpp" | |
#include "nanoarrow/nanoarrow_testing.hpp" | |
void print_help() { | |
std::cerr << "nanoarrow version " << ArrowNanoarrowVersion() << "\n"; | |
std::cerr << " Usage: integration_test_util convert\n"; | |
std::cerr << " --from [json|ipc] [file or -]\n"; | |
std::cerr << " [[--to [json] [-]] OR [--check [json|ipc] [file or -]]]\n"; | |
} | |
class ArgumentParser { | |
public: | |
ArrowErrorCode parse(int argc, char* argv[]) { | |
std::deque<std::string> args; | |
for (int i = 0; i < argc; i++) { | |
args.push_back(argv[i]); | |
} | |
// executable name is first | |
if (!args.empty()) { | |
args.pop_front(); | |
} | |
while (!args.empty()) { | |
std::string item = args.front(); | |
args.pop_front(); | |
if (item.substr(0, 2) == "--") { | |
if (item != "--from" && item != "--to" && item != "--check") { | |
std::cerr << "Unknown kwarg: '" << item << "'\n"; | |
} | |
if (args.size() < 2) { | |
std::cerr << "kwarg " << item << ": expected following [format] [file or -]\n"; | |
return EINVAL; | |
} | |
std::string format = args.front(); | |
args.pop_front(); | |
std::string ref = args.front(); | |
args.pop_front(); | |
kwargs_[item.substr(2)] = {format, ref}; | |
} else { | |
std::cerr << "Unexpected arg: '" << item << "'\n"; | |
return EINVAL; | |
} | |
} | |
if (!has_kwarg("from")) { | |
std::cerr << "--from is a required argument\n"; | |
return EINVAL; | |
} | |
if (has_kwarg("to") && has_kwarg("check")) { | |
std::cerr << "--to with --check is not supported"; | |
} | |
return NANOARROW_OK; | |
} | |
bool has_kwarg(const std::string& key) const { | |
return kwargs_.find(key) != kwargs_.end(); | |
} | |
const std::pair<std::string, std::string>& kwarg(const std::string& key) const { | |
return kwargs_.find(key)->second; | |
} | |
private: | |
std::unordered_map<std::string, std::pair<std::string, std::string>> kwargs_; | |
}; | |
ArrowErrorCode Open(const std::string& ref, ArrowIpcInputStream* out, ArrowError* error) { | |
FILE* file_ptr; | |
if (ref == "-") { | |
file_ptr = freopen(NULL, "rb", stdin); | |
} else { | |
file_ptr = fopen(ref.c_str(), "rb"); | |
} | |
NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcInputStreamInitFile(out, file_ptr, true), | |
error); | |
return NANOARROW_OK; | |
} | |
ArrowErrorCode GetArrayStream(const std::string& format, ArrowIpcInputStream* input, | |
ArrowArrayStream* out, ArrowError* error) { | |
if (format == "ipc") { | |
NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcArrayStreamReaderInit(out, input, nullptr), | |
error); | |
return NANOARROW_OK; | |
} else if (format == "json") { | |
// Read input | |
std::stringstream ss; | |
int64_t bytes_read = 0; | |
uint8_t buf[1024]; | |
do { | |
ss << std::string(reinterpret_cast<char*>(buf), bytes_read); | |
NANOARROW_RETURN_NOT_OK(input->read(input, buf, sizeof(buf), &bytes_read, error)); | |
} while (bytes_read > 0); | |
// Parse it | |
nanoarrow::testing::TestingJSONReader json_reader; | |
NANOARROW_RETURN_NOT_OK(json_reader.ReadDataFile(ss.str(), out, error)); | |
return NANOARROW_OK; | |
} else { | |
std::cerr << "Unknown or unsupported format --from " << format << "\n"; | |
print_help(); | |
return EINVAL; | |
} | |
} | |
ArrowErrorCode WriteArrayStream(const std::string& format, ArrowArrayStream* stream, | |
ArrowError* error) { | |
if (format == "json") { | |
nanoarrow::testing::TestingJSONWriter writer; | |
NANOARROW_RETURN_NOT_OK_WITH_ERROR(writer.WriteDataFile(std::cout, stream), error); | |
return NANOARROW_OK; | |
} else { | |
std::cerr << "Unknown or unsupported format --to " << format << "\n"; | |
print_help(); | |
return EINVAL; | |
} | |
} | |
ArrowErrorCode CheckArrayStream(const std::string& format, const std::string& ref, | |
ArrowArrayStream* actual, ArrowError* error) { | |
nanoarrow::ipc::UniqueInputStream check; | |
NANOARROW_RETURN_NOT_OK(Open(ref, check.get(), error)); | |
nanoarrow::UniqueArrayStream expected; | |
NANOARROW_RETURN_NOT_OK(GetArrayStream(format, check.get(), expected.get(), error)); | |
nanoarrow::testing::TestingJSONComparison comparison; | |
NANOARROW_RETURN_NOT_OK(comparison.CompareArrayStream(actual, expected.get(), error)); | |
if (comparison.num_differences() > 0) { | |
std::cerr << comparison.num_differences() | |
<< " Difference(s) found between --from and --check:\n"; | |
comparison.WriteDifferences(std::cerr); | |
return EINVAL; | |
} | |
return NANOARROW_OK; | |
} | |
int DoMain(const ArgumentParser& args, ArrowError* error) { | |
nanoarrow::ipc::UniqueInputStream from; | |
NANOARROW_RETURN_NOT_OK(Open(args.kwarg("from").second, from.get(), error)); | |
nanoarrow::UniqueArrayStream stream; | |
NANOARROW_RETURN_NOT_OK( | |
GetArrayStream(args.kwarg("from").first, from.get(), stream.get(), error)); | |
if (args.has_kwarg("to")) { | |
if (args.kwarg("to").second != "-") { | |
std::cerr << "--to output is only supported to stdout ('-')\n"; | |
print_help(); | |
return EINVAL; | |
} | |
NANOARROW_RETURN_NOT_OK( | |
WriteArrayStream(args.kwarg("to").first, stream.get(), error)); | |
} else if (args.has_kwarg("check")) { | |
NANOARROW_RETURN_NOT_OK(CheckArrayStream( | |
args.kwarg("check").first, args.kwarg("check").second, stream.get(), error)); | |
} else { | |
std::cerr << "One of --check or --to must be specified"; | |
print_help(); | |
return EINVAL; | |
} | |
return NANOARROW_OK; | |
} | |
int main(int argc, char* argv[]) { | |
ArrowError error; | |
error.message[0] = '\0'; | |
ArgumentParser args; | |
int result = args.parse(argc, argv); | |
if (result != NANOARROW_OK) { | |
print_help(); | |
return result; | |
} | |
result = DoMain(args, &error); | |
if (result != NANOARROW_OK) { | |
std::cerr << error.message << "\n"; | |
return result; | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an | |
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
# KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations | |
# under the License. | |
if [ ${VERBOSE:-0} -gt 0 ]; then | |
set -x | |
fi | |
if [ -z "$NANOARROW_ARROW_TESTING_DIR" ]; then | |
echo "NANOARROW_ARROW_TESTING_DIR env is not set" | |
exit 1 | |
fi | |
INTEGRATION_1_0_0="${NANOARROW_ARROW_TESTING_DIR}/data/arrow-ipc-stream/integration/1.0.0-littleendian" | |
JSON_GZ_FILES=$(find "${INTEGRATION_1_0_0}" -name "*.json.gz") | |
N_FAIL=0 | |
for json_gz_file in ${JSON_GZ_FILES} ; do | |
json_file=$(echo "${json_gz_file}" | sed -e s/.json.gz/.json/) | |
ipc_file=$(echo "${json_gz_file}" | sed -e s/.json.gz/.stream/) | |
json_gz_label=$(basename ${json_gz_file}) | |
ipc_label=$(basename ${ipc_file}) | |
# Unzip the .json.gz file | |
gzip --decompress -c "${json_gz_file}" > "${json_file}" | |
# Skip dictionary test files for now to keep the noise down | |
if echo "${json_gz_file}" | grep -e "dictionary" >/dev/null; then | |
echo "[SKIP] ${json_gz_label}" | |
continue | |
fi | |
# Read IPC, check against IPC | |
./integration_test_util \ | |
--from ipc "${ipc_file}" \ | |
--check ipc "${ipc_file}" | |
if [ $? -eq 0 ]; then | |
echo "[PASS] ${ipc_label} --check ${ipc_label}" | |
else | |
echo "[FAIL] ${ipc_label} --check ${ipc_label}" | |
N_FAIL=$((N_FAIL+1)) | |
fi | |
# Read JSON, check against JSON | |
./integration_test_util \ | |
--from json "${json_file}" \ | |
--check json "${json_file}" | |
if [ $? -eq 0 ]; then | |
echo "[PASS] ${json_gz_label} --check ${json_gz_label}" | |
else | |
echo "[FAIL] ${json_gz_label} --check ${json_gz_label}" | |
N_FAIL=$((N_FAIL+1)) | |
fi | |
# Read JSON, check against IPC | |
./integration_test_util \ | |
--from json "${json_file}" \ | |
--check ipc "${ipc_file}" | |
if [ $? -eq 0 ]; then | |
echo "[PASS] ${json_gz_label} --check ${ipc_label}" | |
else | |
echo "[FAIL] ${json_gz_label} --check ${ipc_label}" | |
N_FAIL=$((N_FAIL+1)) | |
fi | |
# Clean up the json file | |
rm "${json_file}" | |
done | |
exit $N_FAIL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment