Fix #11236 - CsvParser and ScheduleFile handling of edge cases to avoid crash#11249
Fix #11236 - CsvParser and ScheduleFile handling of edge cases to avoid crash#11249
Conversation
| std::vector<std::pair<std::string, bool>> const &CsvParser::warnings() | ||
| { | ||
| return warnings_; | ||
| } | ||
|
|
||
| bool CsvParser::hasWarnings() | ||
| { | ||
| return !warnings_.empty(); | ||
| } |
There was a problem hiding this comment.
Putting back warnings in CsvParser
| if (column_num < num_columns) { | ||
| columns.at(column_num).push_back(parse_value(csv, index)); | ||
| } else { | ||
| // Just parse and ignore the value | ||
| parse_value(csv, index); | ||
| has_extra_columns = true; | ||
| } |
There was a problem hiding this comment.
Avoid crashing here if you end up finding more values on the row than the number of columns we've determined by parsing the first data row (after header if present)
We setup the has_extra_columns to true here, so we can register a warning
| if (has_extra_columns) { | ||
| warnings_.emplace_back( | ||
| fmt::format("CsvParser - Line {} - Expected {} columns, got {}. Ignored extra columns. Error in following line.", | ||
| this_cur_line_num, | ||
| num_columns, | ||
| parsed_values), | ||
| false); | ||
| warnings_.emplace_back(getCurrentLine(), true); | ||
| } else if (parsed_values != num_columns) { | ||
| success = false; | ||
|
|
||
| size_t found_index = csv.find_first_of("\r\n", this_beginning_of_line_index); | ||
| std::string line; | ||
| if (found_index != std::string::npos) { | ||
| line = csv.substr(this_beginning_of_line_index, found_index - this_beginning_of_line_index); | ||
| } | ||
| errors_.emplace_back( | ||
| fmt::format( | ||
| "CsvParser - Line {} - Expected {} columns, got {}. Error in following line.", this_cur_line_num, num_columns, parsed_values), | ||
| false); | ||
| errors_.emplace_back(line, true); | ||
| errors_.emplace_back(getCurrentLine(), true); |
There was a problem hiding this comment.
When we reach the end of the line, we issue a warning if has_extra_columns, otherwise an error if the resulting number of parsed values is not the expected one.
| } else if (token == Token::DELIMITER) { | ||
| next_token(csv, index); | ||
| token = look_ahead(csv, index); | ||
| if (token == Token::DELIMITER) { | ||
| // Two delimiters in a row means a blank value | ||
| // This is not yet an error, in case the user is not using this column... It will crash later if they do try to cast it to a number | ||
| size_t const next_col = column_num + 1; | ||
| if (next_col < num_columns) { | ||
| // Push a nan for blank value | ||
| columns.at(next_col).push_back(json::value_t::null); | ||
| warnings_.emplace_back(fmt::format("CsvParser - Line {} Column {} - Blank value found, setting to null. Error in following line.", | ||
| this_cur_line_num, | ||
| next_col + 1), | ||
| false); | ||
| warnings_.emplace_back(getCurrentLine(), true); | ||
| } else { | ||
| has_extra_columns = true; | ||
| } | ||
| ++parsed_values; | ||
| } |
There was a problem hiding this comment.
In the Delimiter case, we scan ahead to check if another delimiter is coming up, in which case we emplace a null.
This is a warning, because unless you actually try to use that column, it's fine.
| for (const auto &[warning, isContinued] : csvParser.warnings()) { | ||
| if (isContinued) { | ||
| ShowContinueError(state, warning); | ||
| } else { | ||
| ShowWarningError(state, warning); | ||
| } | ||
| } |
There was a problem hiding this comment.
Schedule:File:Shading, print the new warnings if any.
| TEST_F(EnergyPlusFixture, ScheduleFile_Blanks_OnlyWarnIfNotUsingThatColumn) | ||
| { | ||
| // On the third line (second data record after header), there is a blank in the second column | ||
| // Hour,Value1,Value2 | ||
| // 0,0.01,0.01 | ||
| // 1,,0.33 | ||
| // 2,0.37,0.37 | ||
| fs::path scheduleFile = FileSystem::makeNativePath(configured_source_directory() / "tst/EnergyPlus/unit/Resources/schedule_file_with_blank.csv"); | ||
|
|
||
| // Here I am requested a column that is properly filled, and it should work fine |
There was a problem hiding this comment.
New test. it works if I have a blank value but I don't try to use that column specifically.
| TEST_F(EnergyPlusFixture, ScheduleFile_MissingValue) | ||
| { | ||
| // On the third line (second data record after header), there is a blank in the second column, no extra delimiter. | ||
| // Hour,Value1 | ||
| // 0,0.01 | ||
| // 1, | ||
| // 2,0.37 | ||
| fs::path scheduleFile = | ||
| FileSystem::makeNativePath(configured_source_directory() / "tst/EnergyPlus/unit/Resources/schedule_file_with_missing_value.csv"); | ||
|
|
||
| // In this case, the csvParser registers an error and that one is thrown |
There was a problem hiding this comment.
I have a completely missing value here, so I throw
| TEST_F(EnergyPlusFixture, ScheduleFile_ExtraColumn) | ||
| { | ||
| // On the third line (second data record after header), there is an extra column | ||
|
|
||
| // Hour,Value1, | ||
| // 0,0.01, | ||
| // 1,0.04,0.33 | ||
| // 2,0.37, | ||
| fs::path scheduleFile = | ||
| FileSystem::makeNativePath(configured_source_directory() / "tst/EnergyPlus/unit/Resources/schedule_file_with_extra_column.csv"); | ||
|
|
||
| // I am requesting column 2, so it should warn but work |
There was a problem hiding this comment.
I have an extra column, I warn and move on since it's not being used.
| TEST_F(EnergyPlusFixture, ScheduleFile_RequestNonExistingColumn) | ||
| { | ||
|
|
||
| // This a properly formed CSV file with two columns | ||
| // Datetime,Value | ||
| // 1/1 01:00:00,1.0 | ||
| // 1/1 02:00:00,1.0 | ||
| // [...] | ||
| // 12/31 23:00:00,0.0 | ||
| // 12/31 24:00:00,0.0 | ||
|
|
||
| fs::path scheduleFile = FileSystem::makeNativePath(configured_source_directory() / "tst/EnergyPlus/unit/Resources/schedule_file1.csv"); | ||
|
|
||
| // I am requesting column 100, so it should NOT work | ||
|
|
||
| std::string const idf_objects = delimited_string({ | ||
| "ScheduleTypeLimits,", | ||
| " Any Number; !- Name", | ||
|
|
||
| "Schedule:File,", | ||
| " Test1, !- Name", | ||
| " Any Number, !- Schedule Type Limits Name", | ||
| " " + scheduleFile.string() + ", !- File Name", | ||
| " 100, !- Column Number", | ||
| " 1, !- Rows to Skip at Top", | ||
| " 8760, !- Number of Hours of Data", | ||
| " Comma, !- Column Separator", | ||
| " No, !- Interpolate to Timestep", | ||
| " 60, !- Minutes per item", | ||
| " Yes; !- Adjust Schedule for Daylight Savings", | ||
| }); | ||
| ASSERT_TRUE(process_idf(idf_objects)); | ||
|
|
||
| auto &s_glob = state->dataGlobal; | ||
|
|
||
| s_glob->TimeStepsInHour = 4; // must initialize this to get schedules initialized | ||
| s_glob->MinutesInTimeStep = 15; // must initialize this to get schedules initialized | ||
| s_glob->TimeStepZone = 0.25; | ||
| s_glob->TimeStepZoneSec = s_glob->TimeStepZone * Constant::rSecsInHour; | ||
| state->dataEnvrn->CurrentYearIsLeapYear = false; | ||
|
|
||
| EXPECT_THROW(state->init_state(*state), EnergyPlus::FatalError); // read schedules | ||
|
|
||
| const std::string expected_error = delimited_string({ | ||
| " ** Severe ** ProcessScheduleInput: Schedule:File = TEST1", | ||
| " ** ~~~ ** Requested column number 100, but found only 2 columns.", |
There was a problem hiding this comment.
Requesting a column that doesn't exist: fatal
| TEST_F(EnergyPlusFixture, ShadowCalculation_CSV_broken) | ||
| { | ||
| // This file has one more header than data columns | ||
| // Surface Name,EAST SIDE TREE,WEST SIDE TREE | ||
| // 01/01 00:15,0, | ||
| // 01/01 00:30,0, | ||
|
|
||
| // a CSV exported with the extra '()' at the end (22.2.0 and below) should still be importable in E+ without crashing | ||
| const fs::path scheduleFile = configured_source_directory() / "tst/EnergyPlus/unit/Resources/shading_data_2220_broken.csv"; | ||
|
|
||
| std::string const idf_objects = delimited_string({ | ||
| "Schedule:File:Shading,", | ||
| " " + scheduleFile.string() + "; !- Name of File", | ||
| }); | ||
| ASSERT_TRUE(process_idf(idf_objects)); | ||
|
|
||
| auto &s_glob = state->dataGlobal; | ||
|
|
||
| s_glob->TimeStepsInHour = 4; // must initialize this to get schedules initialized | ||
| s_glob->MinutesInTimeStep = 15; // must initialize this to get schedules initialized | ||
| s_glob->TimeStepZone = 0.25; | ||
| s_glob->TimeStepZoneSec = s_glob->TimeStepZone * Constant::rSecsInHour; | ||
| state->dataEnvrn->CurrentYearIsLeapYear = false; | ||
|
|
||
| EXPECT_THROW(state->init_state(*state), EnergyPlus::FatalError); // read schedules | ||
|
|
||
| const std::string expected_error = delimited_string({ | ||
| " ** Severe ** ProcessScheduleInput: Schedule:File:Shading = shading_data_2220_broken.csv", | ||
| " ** ~~~ ** For header 'WEST SIDE TREE', Requested column number 3, but found only 2 columns.", | ||
| " ** ~~~ ** Error Occurred in " + scheduleFile.string(), | ||
| " ** Fatal ** Program terminates due to previous condition.", | ||
| " ...Summary of Errors that led to program termination:", | ||
| " ..... Reference severe error count=1", | ||
| " ..... Last severe error=ProcessScheduleInput: Schedule:File:Shading = shading_data_2220_broken.csv", | ||
| }); | ||
| compare_err_stream(expected_error); | ||
| } |
There was a problem hiding this comment.
Test edge case for Schedule:File:Shading where you have more headers than data columns
|
Hum, this is conflicted. Going to rebase, hopefully I don't botch it |
aee1647 to
4668403
Compare
…ull values (two consecutive delimiters) These situations are not yet fatal, until you try to use a column with a null value or a column that is past the parsed one.
…ber of columns of values
4668403 to
fe3fe57
Compare
Pull request overview
Description of the purpose of this PR
Pull Request Author
Reviewer