Skip to content

Commit a8409fb

Browse files
committed
* make CharSetUtility.CheckCharset methods public
* fix GetBom bom check by adjusting list order * Add tests for CheckCharset with byte array input - indrectly tests above * Xml.cs supports ignoring segment groups but not the nested segments
1 parent 09f85a4 commit a8409fb

File tree

6 files changed

+144
-30
lines changed

6 files changed

+144
-30
lines changed

src/NHapi.Base/Llp/CharSetUtility.cs

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,53 @@ public static byte[] WithoutBom(byte[] messageBytes)
4343
return messageBytes.Skip(bom.BomBytes.Length).ToArray();
4444
}
4545

46-
internal static Encoding CheckCharset(byte[] message, Encoding defaultEncoding = null)
46+
/// <summary>
47+
/// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message.
48+
/// </summary>
49+
/// <param name="message">HL7 message as bytes.</param>
50+
/// <returns>The detected Hl7 character set, if none detected defaults to ASCII (us-ascii).</returns>
51+
public static Encoding CheckCharset(byte[] message)
4752
{
53+
return CheckCharset(message, Encoding.ASCII);
54+
}
55+
56+
/// <summary>
57+
/// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message.
58+
/// </summary>
59+
/// <param name="message">HL7 message as <see cref="T:byte[]"/>.</param>
60+
/// <param name="encoding">HL7 Character to be used should one not be detected.</param>
61+
/// <returns>The detected Hl7 character set, if none detected defaults to the one provided by the
62+
/// <paramref name="encoding"/> parameter.
63+
/// </returns>
64+
public static Encoding CheckCharset(byte[] message, Encoding encoding)
65+
{
66+
encoding ??= Encoding.ASCII;
4867
var messageFromBytes = Bom.SkipBom(message);
4968

50-
return CheckCharset(messageFromBytes, defaultEncoding);
69+
return CheckCharset(messageFromBytes, encoding);
70+
}
71+
72+
/// <summary>
73+
/// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message.
74+
/// </summary>
75+
/// <param name="message">HL7 message as a <see cref="string"/>.</param>
76+
/// <returns>The detected Hl7 character set, if none detected defaults to ASCII (us-ascii).</returns>
77+
public static Encoding CheckCharset(string message)
78+
{
79+
return CheckCharset(message, Encoding.ASCII);
5180
}
5281

53-
internal static Encoding CheckCharset(string message, Encoding defaultEncoding = null)
82+
/// <summary>
83+
/// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message.
84+
/// </summary>
85+
/// <param name="message">HL7 message as a <see cref="string"/>.</param>
86+
/// <param name="encoding">HL7 Character to be used should one not be detected.</param>
87+
/// <returns>The detected Hl7 character set, if none detected defaults to the one provided by the
88+
/// <paramref name="encoding"/> parameter.
89+
/// </returns>
90+
public static Encoding CheckCharset(string message, Encoding encoding)
5491
{
55-
var encoding = defaultEncoding ?? Encoding.ASCII;
92+
encoding ??= Encoding.ASCII;
5693

5794
try
5895
{
@@ -101,14 +138,15 @@ private class Bom
101138
{
102139
private static readonly IList<Bom> KnownBoms = new List<Bom>
103140
{
104-
new Bom(new byte[] { 0xEF, 0xBB, 0xBF }, Encoding.UTF8),
105-
new Bom(new byte[] { 0xFF, 0xFE }, Encoding.Unicode), // UTF-16LE
106-
new Bom(new byte[] { 0xFE, 0xFF, 0xBF }, Encoding.BigEndianUnicode), // UTF-16BE
107141
new Bom(new byte[] { 0xFF, 0xFE, 0x00, 0x00 }, Encoding.UTF32), // UTF-32LE
108142
new Bom(new byte[] { 0x00, 0x00, 0xFE, 0xFF }, new UTF32Encoding(true, true)), // UTF-32BE
109-
new Bom(new byte[] { }, Encoding.ASCII),
143+
new Bom(new byte[] { 0xEF, 0xBB, 0xBF }, Encoding.UTF8), // Unicode (UTF-8)
144+
new Bom(new byte[] { 0xFE, 0xFF, 0xBF }, Encoding.BigEndianUnicode), // UTF-16BE
145+
new Bom(new byte[] { 0xFF, 0xFE }, Encoding.Unicode), // UTF-16LE
110146
};
111147

148+
private static readonly Bom DefaultBom = new Bom(new byte[] { }, Encoding.ASCII); // ASCII (us-ascii)
149+
112150
public Bom(byte[] bomBytes, Encoding encoding)
113151
{
114152
BomBytes = bomBytes;
@@ -122,9 +160,8 @@ public Bom(byte[] bomBytes, Encoding encoding)
122160
public static string SkipBom(byte[] messageBytes)
123161
{
124162
var bom = GetBom(messageBytes);
125-
var encoding = bom.Encoding;
126163
var messageBytesWithoutBom = messageBytes.Skip(bom.BomBytes.Length).ToArray();
127-
return encoding.GetString(messageBytesWithoutBom);
164+
return bom.Encoding.GetString(messageBytesWithoutBom);
128165
}
129166

130167
public static Bom GetBom(byte[] messageBytes)
@@ -143,7 +180,7 @@ public static Bom GetBom(byte[] messageBytes)
143180
}
144181
}
145182

146-
return KnownBoms[0];
183+
return DefaultBom;
147184
}
148185
}
149186
}

src/NHapi.Base/Llp/Hl7CharSets.cs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,29 +39,29 @@ internal static class Hl7CharSets
3939
{
4040
private static readonly Dictionary<string, string> EncodingMap = new ()
4141
{
42-
{ "ASCII", Encoding.ASCII.BodyName }, // ASCII
43-
{ "8859/1", "iso-8859-1" }, // Western European (ISO)
44-
{ "8859/2", "iso-8859-2" }, // Central European (ISO)
45-
{ "8859/3", "iso-8859-3" }, // Latin 3 (ISO)
46-
{ "8859/4", "iso-8859-4" }, // Baltic (ISO)
47-
{ "8859/5", "iso-8859-5" }, // Cyrillic (ISO)
48-
{ "8859/6", "iso-8859-6" }, // Arabic (ISO)
49-
{ "8859/7", "iso-8859-7" }, // Greek (ISO)
50-
{ "8859/8", "iso-8859-8" }, // Hebrew (ISO-Visual)
51-
{ "8859/9", "iso-8859-9" }, // Turkish (ISO)
52-
{ "8859/15", "iso-8859-15" }, // Latin 9 (ISO)
42+
{ "ASCII", Encoding.ASCII.BodyName }, // ASCII (us-ascii)
43+
{ "8859/1", "iso-8859-1" }, // Western European (ISO)
44+
{ "8859/2", "iso-8859-2" }, // Central European (ISO)
45+
{ "8859/3", "iso-8859-3" }, // Latin 3 (ISO)
46+
{ "8859/4", "iso-8859-4" }, // Baltic (ISO)
47+
{ "8859/5", "iso-8859-5" }, // Cyrillic (ISO)
48+
{ "8859/6", "iso-8859-6" }, // Arabic (ISO)
49+
{ "8859/7", "iso-8859-7" }, // Greek (ISO)
50+
{ "8859/8", "iso-8859-8" }, // Hebrew (ISO-Visual)
51+
{ "8859/9", "iso-8859-9" }, // Turkish (ISO)
52+
{ "8859/15", "iso-8859-15" }, // Latin 9 (ISO)
5353
{ "ISO IR6", "ISO IR6" },
5454
{ "ISO IR14", "ISO IR14" },
5555
{ "ISO IR87", "ISO IR87" },
5656
{ "ISO IR159", "ISO IR159" },
57-
{ "GB 18030-2000", "gb18030" }, // Chinese Simplified (GB18030)
58-
{ "KS X 1001", "euc-kr" }, // Korean (EUC)
57+
{ "GB 18030-2000", "gb18030" }, // Chinese Simplified (GB18030)
58+
{ "KS X 1001", "euc-kr" }, // Korean (EUC)
5959
{ "CNS 11643-1992", "CNS 11643-1992" },
60-
{ "BIG-5", "big5" }, // Chinese Traditional (Big5)
61-
{ "UNICODE", Encoding.UTF8.BodyName },
62-
{ "UNICODE UTF-8", Encoding.UTF8.BodyName }, // Unicode (UTF-8)
63-
{ "UNICODE UTF-16", "utf-16" }, // Unicode
64-
{ "UNICODE UTF-32", Encoding.UTF32.BodyName }, // Unicode (UTF-32)
60+
{ "BIG-5", "big5" }, // Chinese Traditional (Big5)
61+
{ "UNICODE", Encoding.UTF8.BodyName }, // Unicode (UTF-8)
62+
{ "UNICODE UTF-8", Encoding.UTF8.BodyName }, // Unicode (UTF-8)
63+
{ "UNICODE UTF-16", Encoding.Unicode.BodyName }, // Unicode (UTF-16LE)
64+
{ "UNICODE UTF-32", Encoding.UTF32.BodyName }, // Unicode (UTF-32LE)
6565
};
6666

6767
/// <summary>

src/NHapi.Base/PreParser/Xml.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ private static string SearchForDatum(XDocument message, DatumPath datumPath)
151151

152152
var nameSpace = message.Root?.Name.Namespace;
153153
var search =
154-
message.Root?.Elements(nameSpace + segment)
154+
message.Root?.Descendants(nameSpace + segment)
155155
.ElementAt((int)datumPath.Get(1));
156156

157157
if (search?.HasElements == true)

tests/NHapi.Base.NUnit/Llp/CharSetUtilityTests.cs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
namespace NHapi.Base.NUnit.Llp
22
{
33
using System;
4+
using System.Collections.Generic;
5+
using System.Linq;
46
using System.Text;
57

68
using global::NUnit.Framework;
@@ -96,5 +98,61 @@ public void CheckCharset_WhenEncodingSupportByPlatform_ReturnsExpectedEncoding(s
9698
// Assert
9799
Assert.AreEqual(expectedDotnetEncoding, result.BodyName);
98100
}
101+
102+
[TestCase("ASCII", "us-ascii")] // ASCII
103+
[TestCase("8859/1", "iso-8859-1")] // Western European (ISO)
104+
[TestCase("8859/2", "iso-8859-2")] // Central European (ISO)
105+
[TestCase("8859/3", "iso-8859-3")] // Latin 3 (ISO)
106+
[TestCase("8859/4", "iso-8859-4")] // Baltic (ISO)
107+
[TestCase("8859/5", "iso-8859-5")] // Cyrillic (ISO)
108+
[TestCase("8859/6", "iso-8859-6")] // Arabic (ISO)
109+
[TestCase("8859/7", "iso-8859-7")] // Greek (ISO)
110+
[TestCase("8859/8", "iso-8859-8")] // Hebrew (ISO-Visual)
111+
[TestCase("8859/9", "iso-8859-9")] // Turkish (ISO)
112+
[TestCase("8859/15", "iso-8859-15")] // Latin 9 (ISO)
113+
#if NET6_0_OR_GREATER
114+
[TestCase("GB 18030-2000", "gb18030")] // Chinese Simplified (GB18030)
115+
#elif NETFRAMEWORK
116+
[TestCase("GB 18030-2000", "GB18030")] // Chinese Simplified (GB18030)
117+
#endif
118+
[TestCase("KS X 1001", "euc-kr")] // Korean (EUC)
119+
[TestCase("BIG-5", "big5")] // Chinese Traditional (Big5)
120+
[TestCase("UNICODE", "utf-8")]
121+
[TestCase("UNICODE UTF-8", "utf-8")] // Unicode (UTF-8)
122+
[TestCase("UNICODE UTF-16", "utf-16")] // Unicode
123+
[TestCase("UNICODE UTF-32", "utf-32")] // Unicode (UTF-32)
124+
public void CheckCharset_MessageIsBytes_WhenEncodingSupportByPlatform_ReturnsExpectedEncoding(string hl7CharSet, string expectedDotnetEncoding)
125+
{
126+
// Arrange
127+
var encoding = Encoding.GetEncoding(expectedDotnetEncoding);
128+
var bomBytes = GetBom(expectedDotnetEncoding);
129+
130+
var messageBytes = encoding.GetBytes(
131+
$"MSH|^~\\&|XPress Arrival||||200610120839||ORU^R01|EBzH1711114101206|P|2.3|||AL|||{hl7CharSet}\r"
132+
+ "PID|1||1711114||Appt^Test||19720501||||||||||||001020006\r"
133+
+ "ORC|||||F\r"
134+
+ "OBR|1|||ehipack^eHippa Acknowlegment|||200610120839|||||||||00002^eProvider^Electronic|||||||||F\r"
135+
+ "OBX|1||||STValue||||||F\r");
136+
137+
var messageBytesWithBom = bomBytes.Concat(messageBytes).ToArray();
138+
139+
// Act
140+
var result = CharSetUtility.CheckCharset(messageBytesWithBom);
141+
142+
// Assert
143+
Assert.AreEqual(expectedDotnetEncoding, result.BodyName);
144+
}
145+
146+
#pragma warning disable SA1201
147+
private static byte[] GetBom(string encoding) => encoding switch
148+
{
149+
"utf-8" => new byte[] { 0xEF, 0xBB, 0xBF },
150+
"utf-16" => new byte[] { 0xFF, 0xFE }, // UTF-16LE
151+
"utf-16BE" => new byte[] { 0xFE, 0xFF, 0xBF }, // UTF-16BE
152+
"utf-32" => new byte[] { 0xFF, 0xFE, 0x00, 0x00 }, // UTF-32LE
153+
"utf-32BE" => new byte[] { 0x00, 0x00, 0xFE, 0xFF }, // UTF-32BE
154+
_ => Array.Empty<byte>()
155+
};
99156
}
157+
#pragma warning restore SA1201
100158
}

tests/NHapi.Base.NUnit/NHapi.Base.NUnit.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
<PropertyGroup>
44
<TargetFrameworks>net461;net6.0</TargetFrameworks>
55
<IsPackable>false</IsPackable>
6+
<LangVersion>latest</LangVersion>
67
</PropertyGroup>
78

89
<PropertyGroup Condition=" '$(Configuration)' == 'Release' ">

tests/NHapi.Base.NUnit/PreParser/XmlTests.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,24 @@ public void TryParseMessage_ValidXML_NullPathSpec_ReturnsExpectedResult()
5858
Assert.Contains(expectedValue, actualResults);
5959
}
6060

61+
[TestCase("PID-1", "grouped")]
62+
[TestCase("PID(1)-1", "not grouped")]
63+
public void TryParseMessage_MessageContainsSegmentGroupElements_ReturnsExpectedResult(string pathSpec, string expectedValue)
64+
{
65+
// Arrange
66+
const string message = "<?xml version=\"1.0\" standalone=\"no\"?><root><root.group><PID>grouped</PID></root.group><PID>not grouped</PID></root>";
67+
68+
var pathSpecs = new List<DatumPath> { pathSpec.FromPathSpec() };
69+
70+
// Act
71+
var parsed = Xml.TryParseMessage(message, pathSpecs, out var results);
72+
var actualResults = results.Select(r => r.Value).ToArray();
73+
74+
// Assert
75+
Assert.True(parsed);
76+
Assert.Contains(expectedValue, actualResults);
77+
}
78+
6179
[TestCase("MSH-9", "QBP")]
6280
[TestCase("MSH-9-2", "Q22")]
6381
[TestCase("QPD-8-4-2", "TTH")]

0 commit comments

Comments
 (0)