diff --git a/src/fdb5/rules/Schema.cc b/src/fdb5/rules/Schema.cc index c61bcf913..f9de512be 100644 --- a/src/fdb5/rules/Schema.cc +++ b/src/fdb5/rules/Schema.cc @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -197,7 +196,14 @@ void Schema::load(const eckit::PathName& path, const bool replace) { throw ex; } - load(in, replace); + try { + load(in, replace); + } + catch (SchemaParser::Error& spe) { + std::stringstream buf; + buf << "Error loading FDB schema file: " << path << ". Underlying issue: " << spe.what(); + throw SchemaParser::Error(buf.str()); + } } void Schema::load(std::istream& s, const bool replace) { diff --git a/src/fdb5/rules/SchemaParser.cc b/src/fdb5/rules/SchemaParser.cc index ae71d71ed..74de81719 100644 --- a/src/fdb5/rules/SchemaParser.cc +++ b/src/fdb5/rules/SchemaParser.cc @@ -14,6 +14,7 @@ /// @date April 2016 #include "fdb5/rules/SchemaParser.h" +#include "eckit/parser/StreamParser.h" #include "fdb5/rules/ExcludeAll.h" #include "fdb5/rules/MatchAlways.h" #include "fdb5/rules/MatchAny.h" @@ -24,12 +25,30 @@ #include "fdb5/rules/Rule.h" #include +#include +#include #include namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- +bool SchemaParser::isAscii(char c) { + return static_cast(c) < 128; +} + +char SchemaParser::peek(bool spaces) { + const char peeked = parser_.peek(spaces); + + if (peeked != 0 && !isAscii(peeked)) { + std::stringstream buf; + buf << "Schema file contained non-ASCII character which are not supported." << std::endl; + throw eckit::StreamParser::Error(buf.str(), parser_.line() + 1); + } + + return peeked; +} + std::string SchemaParser::parseIdent(bool value, bool emptyOK) { std::string s; for (;;) { @@ -45,19 +64,20 @@ std::string SchemaParser::parseIdent(bool value, bool emptyOK) { case ']': case '?': if (s.empty() && !emptyOK) { - throw StreamParser::Error("Syntax error: found '" + std::to_string(c) + "'", line_ + 1); + throw eckit::StreamParser::Error("Syntax error: found '" + std::string{c} + "'", + parser_.line() + 1); } return s; case '-': if (s.empty() && !emptyOK) { - throw StreamParser::Error("Syntax error: found '-'", line_ + 1); + throw eckit::StreamParser::Error("Syntax error: found '-'", parser_.line() + 1); } if (!value) { return s; } [[fallthrough]]; default: - consume(c); + parser_.consume(c); s += c; break; } @@ -73,19 +93,19 @@ std::unique_ptr SchemaParser::parsePredicate(eckit::StringDict& types char c = peek(); if (c == ':') { - consume(c); + parser_.consume(c); ASSERT(types.find(k) == types.end()); types[k] = parseIdent(false, false); c = peek(); } if (c == '?') { - consume(c); + parser_.consume(c); return std::make_unique(k, new MatchOptional(parseIdent(true, true))); } if (c == '-') { - consume(c); + parser_.consume(c); if (types.find(k) == types.end()) { // Register ignore type types[k] = "Ignore"; @@ -94,7 +114,7 @@ std::unique_ptr SchemaParser::parsePredicate(eckit::StringDict& types } if (c != ',' && c != '[' && c != ']') { - consume("="); + parser_.consume("="); std::string val = parseIdent(true, false); exclude = val[0] == '!'; @@ -107,7 +127,7 @@ std::unique_ptr SchemaParser::parsePredicate(eckit::StringDict& types } while ((c = peek()) == '/') { - consume(c); + parser_.consume(c); values.insert(parseIdent(true, false)); } } @@ -126,16 +146,27 @@ std::unique_ptr SchemaParser::parsePredicate(eckit::StringDict& types } void SchemaParser::parseTypes(eckit::StringDict& types) { - for (;;) { - const auto name = parseIdent(false, true); - if (name.empty()) { - break; + + try { + for (;;) { + const auto name = parseIdent(false, true); + if (name.empty()) { + break; + } + parser_.consume(':'); + const auto type = parseIdent(false, false); + parser_.consume(';'); + ASSERT(types.find(name) == types.end()); + types[name] = type; } - consume(':'); - const auto type = parseIdent(false, false); - consume(';'); - ASSERT(types.find(name) == types.end()); - types[name] = type; + } + catch (eckit::StreamParser::Error& spe) { + std::stringstream buf; + buf << "SchemaParser::parseTypes: Error during parsing of types in schema, check the definitions: ': " + ";'." + << " Underlying issue: " << spe.what(); + + throw Error(buf.str(), parser_.line() + 1); } } @@ -143,13 +174,13 @@ std::unique_ptr SchemaParser::parseDatum() { Rule::Predicates predicates; eckit::StringDict types; - consume('['); + parser_.consume('['); - const std::size_t line = line_ + 1; + const std::size_t line = parser_.line() + 1; char c = peek(); if (c == ']') { - consume(c); + parser_.consume(c); return std::make_unique(line, predicates, types); } @@ -159,13 +190,13 @@ std::unique_ptr SchemaParser::parseDatum() { predicates.emplace_back(parsePredicate(types)); while ((c = peek()) == ',') { - consume(c); + parser_.consume(c); predicates.emplace_back(parsePredicate(types)); } c = peek(); if (c == ']') { - consume(c); + parser_.consume(c); return std::make_unique(line, predicates, types); } } @@ -176,13 +207,13 @@ std::unique_ptr SchemaParser::parseIndex() { eckit::StringDict types; RuleIndex::Child rule; - consume('['); + parser_.consume('['); - const std::size_t line = line_ + 1; + const std::size_t line = parser_.line() + 1; char c = peek(); if (c == ']') { - consume(c); + parser_.consume(c); return std::make_unique(line, predicates, types, std::move(rule)); } @@ -196,14 +227,14 @@ std::unique_ptr SchemaParser::parseIndex() { else { predicates.emplace_back(parsePredicate(types)); while ((c = peek()) == ',') { - consume(c); + parser_.consume(c); predicates.emplace_back(parsePredicate(types)); } } c = peek(); if (c == ']') { - consume(c); + parser_.consume(c); return std::make_unique(line, predicates, types, std::move(rule)); } } @@ -214,37 +245,47 @@ std::unique_ptr SchemaParser::parseDatabase() { eckit::StringDict types; RuleDatabase::Children rules; - consume('['); + try { + parser_.consume('['); - const std::size_t line = line_ + 1; + const std::size_t line = parser_.line() + 1; - char c = peek(); - if (c == ']') { - consume(c); - return std::make_unique(line, predicates, types, rules); - } + char c = peek(); + if (c == ']') { + parser_.consume(c); + return std::make_unique(line, predicates, types, rules); + } - for (;;) { + for (;;) { - c = peek(); + c = peek(); - if (c == '[') { - rules.emplace_back(parseIndex()); - } - else { - predicates.emplace_back(parsePredicate(types)); - while ((c = peek()) == ',') { - consume(c); + if (c == '[') { + rules.emplace_back(parseIndex()); + } + else { predicates.emplace_back(parsePredicate(types)); + while ((c = peek()) == ',') { + parser_.consume(c); + predicates.emplace_back(parsePredicate(types)); + } } - } - c = peek(); - if (c == ']') { - consume(c); - return std::make_unique(line, predicates, types, rules); + c = peek(); + if (c == ']') { + parser_.consume(c); + return std::make_unique(line, predicates, types, rules); + } } } + catch (eckit::StreamParser::Error& spe) { + std::stringstream buf; + buf << "SchemaParser::parseDatabase: Error during parsing of rules in schema, check for closing brackets and " + "definitions." + << " Underlying issue: " << spe.what(); + + throw Error(buf.str(), parser_.line() + 1); + } } void SchemaParser::parse(RuleList& result, TypesRegistry& registry) { @@ -261,10 +302,25 @@ void SchemaParser::parse(RuleList& result, TypesRegistry& registry) { } if (c) { - throw StreamParser::Error(std::string("Error parsing rules: remaining char: ") + c); + throw Error(std::string("SchemaParser::parse: Error parsing rules: remaining char: ") + c, parser_.line()); + } + + + if (result.size() == 0) { + std::stringstream buf; + buf << "SchemaParser::parse: Empty rule list. Didn't find any rule in the provided schema file." << std::endl; + throw Error(buf.str(), parser_.line()); } } //---------------------------------------------------------------------------------------------------------------------- +SchemaParser::Error::Error(const std::string& what, size_t line) : eckit::StreamParser::Error(what, line) { + if (line) { + std::ostringstream oss; + oss << "Line: " << line << " " << what; + reason(oss.str()); + } +} + } // namespace fdb5 diff --git a/src/fdb5/rules/SchemaParser.h b/src/fdb5/rules/SchemaParser.h index 034f37dae..6c91d2061 100644 --- a/src/fdb5/rules/SchemaParser.h +++ b/src/fdb5/rules/SchemaParser.h @@ -14,6 +14,7 @@ #ifndef fdb5_SchemaParser_h #define fdb5_SchemaParser_h +#include #include #include #include @@ -27,16 +28,26 @@ namespace fdb5 { //---------------------------------------------------------------------------------------------------------------------- -class SchemaParser : public eckit::StreamParser { +class SchemaParser { public: // methods - SchemaParser(std::istream& in) : StreamParser(in, true) {} + class Error : public eckit::StreamParser::Error { + public: + + Error(const std::string& what, size_t line = 0); + }; + + SchemaParser(std::istream& in) : parser_(eckit::StreamParser(in)) {} void parse(RuleList& result, TypesRegistry& registry); private: // methods + bool isAscii(char c); + + char peek(bool spaces = false); + std::string parseIdent(bool value, bool emptyOK); std::unique_ptr parseDatum(); @@ -48,6 +59,10 @@ class SchemaParser : public eckit::StreamParser { std::unique_ptr parsePredicate(eckit::StringDict& types); void parseTypes(eckit::StringDict& types); + +private: // members + + eckit::StreamParser parser_; }; //---------------------------------------------------------------------------------------------------------------------- diff --git a/tests/fdb/CMakeLists.txt b/tests/fdb/CMakeLists.txt index e045ed513..bed0e5e68 100644 --- a/tests/fdb/CMakeLists.txt +++ b/tests/fdb/CMakeLists.txt @@ -70,6 +70,7 @@ add_subdirectory( api ) add_subdirectory( database ) add_subdirectory( type ) add_subdirectory( daos ) +add_subdirectory( parsing ) if (HAVE_FDB_BUILD_TOOLS) add_subdirectory( timespan ) diff --git a/tests/fdb/parsing/CMakeLists.txt b/tests/fdb/parsing/CMakeLists.txt new file mode 100644 index 000000000..67b8bf13f --- /dev/null +++ b/tests/fdb/parsing/CMakeLists.txt @@ -0,0 +1,13 @@ +set( _test_environment ${test_environment}) + +list( APPEND _test_environment + FDB_HOME=${PROJECT_BINARY_DIR} ) + +file(COPY data DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +ecbuild_add_test( TARGET fdb_test_schema_parsing + SOURCES + test_schema_parsing.cc + LIBS + fdb5 + ENVIRONMENT "${_test_environment}") diff --git a/tests/fdb/parsing/data/broken_schema_no_rule b/tests/fdb/parsing/data/broken_schema_no_rule new file mode 100644 index 000000000..ee9e605b3 --- /dev/null +++ b/tests/fdb/parsing/data/broken_schema_no_rule @@ -0,0 +1,6 @@ +param: Param; +step: Step; +date: Date; +levelist: Double; +expver: Expver; +time: Time; diff --git a/tests/fdb/parsing/data/broken_types_missing_semicolon b/tests/fdb/parsing/data/broken_types_missing_semicolon new file mode 100644 index 000000000..067c64a02 --- /dev/null +++ b/tests/fdb/parsing/data/broken_types_missing_semicolon @@ -0,0 +1,6 @@ +param: Param; +step: Step; +date: Date; +levelist: Double; +expver: Expver +time: Time; diff --git a/tests/fdb/parsing/data/broken_types_no_name b/tests/fdb/parsing/data/broken_types_no_name new file mode 100644 index 000000000..d6c07bdca --- /dev/null +++ b/tests/fdb/parsing/data/broken_types_no_name @@ -0,0 +1,6 @@ +param: Param; +: Step; +date: Date; +levelist: Double; +expver: Expver +time: Time; diff --git a/tests/fdb/parsing/data/broken_types_no_type b/tests/fdb/parsing/data/broken_types_no_type new file mode 100644 index 000000000..836d1972a --- /dev/null +++ b/tests/fdb/parsing/data/broken_types_no_type @@ -0,0 +1,6 @@ +param: Param; +step: ; +date: Date; +levelist: Double; +expver: Expver +time: Time; diff --git a/tests/fdb/parsing/data/non_ascii_chars b/tests/fdb/parsing/data/non_ascii_chars new file mode 100644 index 000000000..6ccef48f3 --- /dev/null +++ b/tests/fdb/parsing/data/non_ascii_chars @@ -0,0 +1,11 @@ +param: Param; +step: Step; +date: Date; +levelist: Double; +expver: Expver; +time: Time; + +[ class=ce, expver, stream=efas/wfas, date, time, model, domain +   [ type, levtype, origin, anoffset? +     [ step, number?, levelist?, param ]] +] diff --git a/tests/fdb/parsing/data/schema b/tests/fdb/parsing/data/schema new file mode 100644 index 000000000..d2dcc812b --- /dev/null +++ b/tests/fdb/parsing/data/schema @@ -0,0 +1,11 @@ +param: Param; +step: Step; +date: Date; +levelist: Double; +expver: Expver; +time: Time; + +[ class, expver=xxxx, stream=oper, date, time, domain? + [ type, levtype + [ step, levelist?, param ]] +] \ No newline at end of file diff --git a/tests/fdb/parsing/data/schema_incomplete_rule b/tests/fdb/parsing/data/schema_incomplete_rule new file mode 100644 index 000000000..087e188b3 --- /dev/null +++ b/tests/fdb/parsing/data/schema_incomplete_rule @@ -0,0 +1,10 @@ +param: Param; +step: Step; +date: Date; +levelist: Double; +expver: Expver; +time: Time; + +[ class, expver=xxxx, stream=oper, date, time, domain? + [ type, levtype + [ step, levelist?, param ]] diff --git a/tests/fdb/parsing/test_schema_parsing.cc b/tests/fdb/parsing/test_schema_parsing.cc new file mode 100644 index 000000000..0cf2f88f0 --- /dev/null +++ b/tests/fdb/parsing/test_schema_parsing.cc @@ -0,0 +1,106 @@ + +#include "eckit/testing/Test.h" +#include "fdb5/rules/Rule.h" +#include "fdb5/rules/Schema.h" +#include "fdb5/rules/SchemaParser.h" + +namespace { + +//---------------------------------------------------------------------------------------------------------------------- + +CASE("Broken schema - Non-existing schema file") { + EXPECT_THROWS_AS(fdb5::Schema("./data/non-existing"), eckit::CantOpenFile); + try { + fdb5::Schema schema("./data/non-existing"); + } + catch (eckit::Exception& ex) { + std::cout << ex.what() << std::endl; + EXPECT(std::string(ex.what()).find("Cannot open") != std::string::npos); + } +} + +CASE("Broken schema - No Rule") { + EXPECT_THROWS(fdb5::Schema schema("./data/broken_schema_no_rule")); + try { + fdb5::Schema("./data/broken_schema_no_rule"); + } + catch (fdb5::SchemaParser::Error& ex) { + std::cout << ex.what() << std::endl; + EXPECT(std::string(ex.what()).find("SchemaParser::parse: Empty rule list") != std::string::npos); + } +} + +CASE("Broken schema - Missing semicolon types") { + EXPECT_THROWS_AS(fdb5::Schema("./data/broken_types_missing_semicolon"), fdb5::SchemaParser::Error); + try { + fdb5::Schema("./data/broken_types_missing_semicolon"); + } + catch (fdb5::SchemaParser::Error& ex) { + std::cout << ex.what() << std::endl; + EXPECT(std::string(ex.what()).find("SchemaParser::parseTypes") != std::string::npos); + } +} + +CASE("Broken schema - No type name") { + EXPECT_THROWS_AS(fdb5::Schema("./data/broken_types_no_name"), fdb5::SchemaParser::Error); + try { + fdb5::Schema("./data/broken_types_no_name"); + } + catch (fdb5::SchemaParser::Error& ex) { + std::cout << ex.what() << std::endl; + EXPECT(std::string(ex.what()).find("Error parsing rules") != std::string::npos); + } +} + +CASE("Broken schema - No type type") { + EXPECT_THROWS_AS(fdb5::Schema("./data/broken_types_no_type"), fdb5::SchemaParser::Error); + try { + fdb5::Schema("./data/broken_types_no_type"); + } + catch (fdb5::SchemaParser::Error& ex) { + std::cout << ex.what() << std::endl; + EXPECT(std::string(ex.what()).find("SchemaParser::parseTypes") != std::string::npos); + } +} + +CASE("Broken schema - Missing closing bracket") { + + EXPECT_THROWS_AS(fdb5::Schema("./data/schema_incomplete_rule"), fdb5::SchemaParser::Error); + try { + fdb5::Schema("./data/schema_incomplete_rule"); + } + catch (fdb5::SchemaParser::Error& ex) { + std::cout << ex.what() << std::endl; + EXPECT(std::string(ex.what()).find("SchemaParser::parseDatabase") != std::string::npos); + } +} + +CASE("Broken schema - Non-ASCII chars") { + + EXPECT_THROWS_AS(fdb5::Schema("./data/non_ascii_chars"), fdb5::SchemaParser::Error); + try { + fdb5::Schema("./data/non_ascii_chars"); + } + catch (fdb5::SchemaParser::Error& ex) { + std::cout << ex.what() << std::endl; + EXPECT(std::string(ex.what()).find("non-ASCII") != std::string::npos); + } +} + + +CASE("Correct schema - Production Schema") { + + EXPECT_NO_THROW(fdb5::Schema("./data/schema")); +} + + +//---------------------------------------------------------------------------------------------------------------------- + +} // anonymous namespace + +int main(int argc, char** argv) { + + eckit::Log::info() << ::getenv("FDB_HOME") << std::endl; + + return ::eckit::testing::run_tests(argc, argv); +}