@@ -91,6 +91,7 @@ final class XmlSuite extends AnyFunSuite with BeforeAndAfterAll {
9191 private val whitespaceError = resDir + " whitespace_error.xml"
9292 private val mapAttribute = resDir + " map-attribute.xml"
9393 private val structWithOptChild = resDir + " struct_with_optional_child.xml"
94+ private val manualSchemaCorruptRecord = resDir + " manual_schema_corrupt_record.xml"
9495
9596 private val booksTag = " book"
9697 private val booksRootTag = " books"
@@ -1316,6 +1317,57 @@ final class XmlSuite extends AnyFunSuite with BeforeAndAfterAll {
13161317 assert(df.selectExpr(" SIZE(Bar)" ).collect().head.getInt(0 ) === 2 )
13171318 }
13181319
1320+ test(" Manual schema with corrupt record field works on permissive mode failure" ) {
1321+ // See issue #517
1322+ val schema = StructType (List (
1323+ StructField (" _id" , StringType ),
1324+ StructField (" _space" , StringType ),
1325+ StructField (" c2" , DoubleType ),
1326+ StructField (" c3" , StringType ),
1327+ StructField (" c4" , StringType ),
1328+ StructField (" c5" , StringType ),
1329+ StructField (" c6" , StringType ),
1330+ StructField (" c7" , StringType ),
1331+ StructField (" c8" , StringType ),
1332+ StructField (" c9" , DoubleType ),
1333+ StructField (" c11" , DoubleType ),
1334+ StructField (" c20" , ArrayType (StructType (List (
1335+ StructField (" _VALUE" , StringType ),
1336+ StructField (" _m" , IntegerType )))
1337+ )),
1338+ StructField (" c46" , StringType ),
1339+ StructField (" c76" , StringType ),
1340+ StructField (" c78" , StringType ),
1341+ StructField (" c85" , DoubleType ),
1342+ StructField (" c93" , StringType ),
1343+ StructField (" c95" , StringType ),
1344+ StructField (" c99" , ArrayType (StructType (List (
1345+ StructField (" _VALUE" , StringType ),
1346+ StructField (" _m" , IntegerType )))
1347+ )),
1348+ StructField (" c100" , ArrayType (StructType (List (
1349+ StructField (" _VALUE" , StringType ),
1350+ StructField (" _m" , IntegerType )))
1351+ )),
1352+ StructField (" c108" , StringType ),
1353+ StructField (" c192" , DoubleType ),
1354+ StructField (" c193" , StringType ),
1355+ StructField (" c194" , StringType ),
1356+ StructField (" c195" , StringType ),
1357+ StructField (" c196" , StringType ),
1358+ StructField (" c197" , DoubleType ),
1359+ StructField (" _corrupt_record" , StringType )))
1360+
1361+ val df = spark.read
1362+ .option(" inferSchema" , false )
1363+ .option(" rowTag" , " row" )
1364+ .schema(schema)
1365+ .xml(manualSchemaCorruptRecord)
1366+
1367+ // Assert it works at all
1368+ assert(df.collect().head.getAs[String ](" _corrupt_record" ) !== null )
1369+ }
1370+
13191371 private def getLines (path : Path ): Seq [String ] = {
13201372 val source = Source .fromFile(path.toFile)
13211373 try {
0 commit comments