Skip to content

Commit f37f467

Browse files
committed
control chars
1 parent 76639b7 commit f37f467

5 files changed

Lines changed: 53 additions & 41 deletions

File tree

common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,7 @@ public static class IntWrapper implements Serializable {
10631063
}
10641064

10651065
/**
1066-
* Parses this UTF8String to long.
1066+
* Parses this UTF8String(trimmed if needed) to long.
10671067
*
10681068
* Note that, in this method we accumulate the result in negative format, and convert it to
10691069
* positive format at the end, if this string is not started with '-'. This is because min value
@@ -1077,20 +1077,17 @@ public static class IntWrapper implements Serializable {
10771077
* @return true if the parsing was successful else false
10781078
*/
10791079
public boolean toLong(LongWrapper toLongResult) {
1080-
if (this.numBytes == 0) return false;
10811080
int offset = 0;
1082-
while (offset < this.numBytes && getByte(offset) == ' ') offset++;
1081+
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
10831082
if (offset == this.numBytes) return false;
10841083

10851084
int end = this.numBytes - 1;
1086-
while (end > offset && getByte(end) == ' ') end--;
1087-
1088-
int numBytes = end - offset + 1;
1085+
while (end > offset && getByte(end) <= ' ') end--;
10891086

10901087
byte b = getByte(offset);
10911088
final boolean negative = b == '-';
10921089
if (negative || b == '+') {
1093-
if (numBytes == 1) {
1090+
if (end - offset == 0) {
10941091
return false;
10951092
}
10961093
offset++;
@@ -1156,7 +1153,7 @@ public boolean toLong(LongWrapper toLongResult) {
11561153
}
11571154

11581155
/**
1159-
* Parses this UTF8String to int.
1156+
* Parses this UTF8String(trimmed if needed) to int.
11601157
*
11611158
* Note that, in this method we accumulate the result in negative format, and convert it to
11621159
* positive format at the end, if this string is not started with '-'. This is because min value
@@ -1173,20 +1170,17 @@ public boolean toLong(LongWrapper toLongResult) {
11731170
* @return true if the parsing was successful else false
11741171
*/
11751172
public boolean toInt(IntWrapper intWrapper) {
1176-
if (this.numBytes == 0) return false;
11771173
int offset = 0;
1178-
while (offset < this.numBytes && getByte(offset) == ' ') offset++;
1174+
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
11791175
if (offset == this.numBytes) return false;
11801176

11811177
int end = this.numBytes - 1;
1182-
while (end > offset && getByte(end) == ' ') end--;
1183-
1184-
int numBytes = end - offset + 1;
1178+
while (end > offset && getByte(end) <= ' ') end--;
11851179

11861180
byte b = getByte(offset);
11871181
final boolean negative = b == '-';
11881182
if (negative || b == '+') {
1189-
if (numBytes == 1) {
1183+
if (end - offset == 0) {
11901184
return false;
11911185
}
11921186
offset++;

sql/core/src/test/resources/sql-tests/inputs/cast.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ SELECT CAST(interval 3 month 1 hour AS string);
6363

6464
-- trim string before cast to numeric
6565
select cast(' 1' as tinyint);
66+
select cast(' 1\t' as tinyint);
6667
select cast(' 1' as smallint);
6768
select cast(' 1' as INT);
6869
select cast(' 1' as bigint);

sql/core/src/test/resources/sql-tests/inputs/comparator.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ select x'00' < x'ff';
44

55
-- trim string to numeric
66
select '1 ' = 1Y;
7+
select '\t1 ' = 1Y;
78
select '1 ' = 1S;
89
select '1 ' = 1;
910
select ' 1' = 1L;

sql/core/src/test/resources/sql-tests/results/cast.sql.out

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 42
2+
-- Number of queries: 43
33

44

55
-- !query 0
@@ -298,48 +298,56 @@ struct<CAST( 1 AS TINYINT):tinyint>
298298

299299

300300
-- !query 36
301-
select cast(' 1' as smallint)
301+
select cast(' 1\t' as tinyint)
302302
-- !query 36 schema
303-
struct<CAST( 1 AS SMALLINT):smallint>
303+
struct<CAST( 1 AS TINYINT):tinyint>
304304
-- !query 36 output
305305
1
306306

307307

308308
-- !query 37
309-
select cast(' 1' as INT)
309+
select cast(' 1' as smallint)
310310
-- !query 37 schema
311-
struct<CAST( 1 AS INT):int>
311+
struct<CAST( 1 AS SMALLINT):smallint>
312312
-- !query 37 output
313313
1
314314

315315

316316
-- !query 38
317-
select cast(' 1' as bigint)
317+
select cast(' 1' as INT)
318318
-- !query 38 schema
319-
struct<CAST( 1 AS BIGINT):bigint>
319+
struct<CAST( 1 AS INT):int>
320320
-- !query 38 output
321321
1
322322

323323

324324
-- !query 39
325-
select cast(' 1' as float)
325+
select cast(' 1' as bigint)
326326
-- !query 39 schema
327-
struct<CAST( 1 AS FLOAT):float>
327+
struct<CAST( 1 AS BIGINT):bigint>
328328
-- !query 39 output
329-
1.0
329+
1
330330

331331

332332
-- !query 40
333-
select cast(' 1 ' as DOUBLE)
333+
select cast(' 1' as float)
334334
-- !query 40 schema
335-
struct<CAST( 1 AS DOUBLE):double>
335+
struct<CAST( 1 AS FLOAT):float>
336336
-- !query 40 output
337337
1.0
338338

339339

340340
-- !query 41
341-
select cast('1.0 ' as DEC)
341+
select cast(' 1 ' as DOUBLE)
342342
-- !query 41 schema
343-
struct<CAST(1.0 AS DECIMAL(10,0)):decimal(10,0)>
343+
struct<CAST( 1 AS DOUBLE):double>
344344
-- !query 41 output
345+
1.0
346+
347+
348+
-- !query 42
349+
select cast('1.0 ' as DEC)
350+
-- !query 42 schema
351+
struct<CAST(1.0 AS DECIMAL(10,0)):decimal(10,0)>
352+
-- !query 42 output
345353
NULL

sql/core/src/test/resources/sql-tests/results/comparator.sql.out

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 9
2+
-- Number of queries: 10
33

44

55
-- !query 0
@@ -27,48 +27,56 @@ true
2727

2828

2929
-- !query 3
30-
select '1 ' = 1S
30+
select '\t1 ' = 1Y
3131
-- !query 3 schema
32-
struct<(CAST(1 AS SMALLINT) = 1):boolean>
32+
struct<(CAST( 1 AS TINYINT) = 1):boolean>
3333
-- !query 3 output
3434
true
3535

3636

3737
-- !query 4
38-
select '1 ' = 1
38+
select '1 ' = 1S
3939
-- !query 4 schema
40-
struct<(CAST(1 AS INT) = 1):boolean>
40+
struct<(CAST(1 AS SMALLINT) = 1):boolean>
4141
-- !query 4 output
4242
true
4343

4444

4545
-- !query 5
46-
select ' 1' = 1L
46+
select '1 ' = 1
4747
-- !query 5 schema
48-
struct<(CAST( 1 AS BIGINT) = 1):boolean>
48+
struct<(CAST(1 AS INT) = 1):boolean>
4949
-- !query 5 output
5050
true
5151

5252

5353
-- !query 6
54-
select ' 1' = cast(1.0 as float)
54+
select ' 1' = 1L
5555
-- !query 6 schema
56-
struct<(CAST( 1 AS FLOAT) = CAST(1.0 AS FLOAT)):boolean>
56+
struct<(CAST( 1 AS BIGINT) = 1):boolean>
5757
-- !query 6 output
5858
true
5959

6060

6161
-- !query 7
62-
select ' 1.0 ' = 1.0D
62+
select ' 1' = cast(1.0 as float)
6363
-- !query 7 schema
64-
struct<(CAST( 1.0 AS DOUBLE) = 1.0):boolean>
64+
struct<(CAST( 1 AS FLOAT) = CAST(1.0 AS FLOAT)):boolean>
6565
-- !query 7 output
6666
true
6767

6868

6969
-- !query 8
70-
select ' 1.0 ' = 1.0BD
70+
select ' 1.0 ' = 1.0D
7171
-- !query 8 schema
72-
struct<(CAST( 1.0 AS DOUBLE) = CAST(1.0 AS DOUBLE)):boolean>
72+
struct<(CAST( 1.0 AS DOUBLE) = 1.0):boolean>
7373
-- !query 8 output
7474
true
75+
76+
77+
-- !query 9
78+
select ' 1.0 ' = 1.0BD
79+
-- !query 9 schema
80+
struct<(CAST( 1.0 AS DOUBLE) = CAST(1.0 AS DOUBLE)):boolean>
81+
-- !query 9 output
82+
true

0 commit comments

Comments
 (0)