Skip to content

Commit 89f7f45

Browse files
committed
Optimize trimLeft, trimRight too
1 parent 392b7b3 commit 89f7f45

1 file changed

Lines changed: 26 additions & 15 deletions

File tree

common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -534,13 +534,14 @@ public UTF8String trim() {
534534
// skip all of the space (0x20) in the left side
535535
while (s < this.numBytes && getByte(s) == 0x20) s++;
536536
if (s == this.numBytes) {
537-
// empty string
537+
// Everything trimmed
538538
return EMPTY_UTF8;
539539
}
540540
// skip all of the space (0x20) in the right side
541541
int e = this.numBytes - 1;
542542
while (e > s && getByte(e) == 0x20) e--;
543543
if (s == 0 && e == numBytes - 1) {
544+
// Nothing trimmed
544545
return this;
545546
}
546547
return copyUTF8String(s, e);
@@ -565,12 +566,15 @@ public UTF8String trimLeft() {
565566
int s = 0;
566567
// skip all of the space (0x20) in the left side
567568
while (s < this.numBytes && getByte(s) == 0x20) s++;
569+
if (s == 0) {
570+
// Nothing trimmed
571+
return this;
572+
}
568573
if (s == this.numBytes) {
569-
// empty string
574+
// Everything trimmed
570575
return EMPTY_UTF8;
571-
} else {
572-
return copyUTF8String(s, this.numBytes - 1);
573576
}
577+
return copyUTF8String(s, this.numBytes - 1);
574578
}
575579

576580
/**
@@ -600,26 +604,30 @@ public UTF8String trimLeft(UTF8String trimString) {
600604
}
601605
srchIdx += searchCharBytes;
602606
}
603-
607+
if (srchIdx == 0) {
608+
// Nothing trimmed
609+
return this;
610+
}
604611
if (trimIdx >= numBytes) {
605-
// empty string
612+
// Everything trimmed
606613
return EMPTY_UTF8;
607-
} else {
608-
return copyUTF8String(trimIdx, numBytes - 1);
609614
}
615+
return copyUTF8String(trimIdx, numBytes - 1);
610616
}
611617

612618
public UTF8String trimRight() {
613619
int e = numBytes - 1;
614620
// skip all of the space (0x20) in the right side
615621
while (e >= 0 && getByte(e) == 0x20) e--;
616-
622+
if (e == numBytes - 1) {
623+
// Nothing trimmed
624+
return this;
625+
}
617626
if (e < 0) {
618-
// empty string
627+
// Everything trimmed
619628
return EMPTY_UTF8;
620-
} else {
621-
return copyUTF8String(0, e);
622629
}
630+
return copyUTF8String(0, e);
623631
}
624632

625633
/**
@@ -661,12 +669,15 @@ public UTF8String trimRight(UTF8String trimString) {
661669
numChars --;
662670
}
663671

672+
if (trimEnd == numBytes - 1) {
673+
// Nothing trimmed
674+
return this;
675+
}
664676
if (trimEnd < 0) {
665-
// empty string
677+
// Everything trimmed
666678
return EMPTY_UTF8;
667-
} else {
668-
return copyUTF8String(0, trimEnd);
669679
}
680+
return copyUTF8String(0, trimEnd);
670681
}
671682

672683
public UTF8String reverse() {

0 commit comments

Comments
 (0)