Skip to content

Commit acead92

Browse files
committed
Handle data sgroups in CXSMILES.
1 parent 6888bc5 commit acead92

File tree

9 files changed

+320
-79
lines changed

9 files changed

+320
-79
lines changed

storage/smiles/src/main/java/org/openscience/cdk/smiles/CxSmilesGenerator.java

Lines changed: 98 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,24 @@
2323

2424
package org.openscience.cdk.smiles;
2525

26-
import org.openscience.cdk.smiles.CxSmilesState.PolymerSgroup;
26+
import org.openscience.cdk.sgroup.Sgroup;
27+
import org.openscience.cdk.smiles.CxSmilesState.CxDataSgroup;
28+
import org.openscience.cdk.smiles.CxSmilesState.CxPolymerSgroup;
29+
import org.openscience.cdk.smiles.CxSmilesState.CxSgroup;
2730

2831
import java.text.DecimalFormat;
2932
import java.text.DecimalFormatSymbols;
3033
import java.util.ArrayList;
3134
import java.util.Arrays;
3235
import java.util.Collections;
3336
import java.util.Comparator;
37+
import java.util.HashMap;
38+
import java.util.HashSet;
3439
import java.util.Iterator;
3540
import java.util.List;
3641
import java.util.Locale;
3742
import java.util.Map;
43+
import java.util.Set;
3844
import java.util.TreeMap;
3945

4046
public class CxSmilesGenerator {
@@ -254,38 +260,113 @@ public int compare(List<Integer> a, List<Integer> b) {
254260
}
255261

256262

263+
int numSgroups = 0;
264+
257265
// *CCO* |$_AP1;;;;_AP2$,Sg:n:1,2,3::ht|
258266
if (SmiFlavor.isSet(opts, SmiFlavor.CxPolymer) &&
259-
state.sgroups != null && !state.sgroups.isEmpty()) {
260-
List<PolymerSgroup> sgroups = new ArrayList<>(state.sgroups);
261-
262-
for (PolymerSgroup psgroup : sgroups)
263-
Collections.sort(psgroup.atomset, comp);
267+
state.mysgroups != null && !state.mysgroups.isEmpty()) {
268+
List<CxPolymerSgroup> polysgroups = new ArrayList<>();
269+
for (CxSgroup polysgroup : state.mysgroups) {
270+
if (polysgroup instanceof CxPolymerSgroup) {
271+
polysgroups.add((CxPolymerSgroup) polysgroup);
272+
Collections.sort(polysgroup.atoms, comp);
273+
}
274+
}
264275

265-
Collections.sort(sgroups, new Comparator<PolymerSgroup>() {
276+
Collections.sort(polysgroups, new Comparator<CxPolymerSgroup>() {
266277
@Override
267-
public int compare(PolymerSgroup a, PolymerSgroup b) {
278+
public int compare(CxPolymerSgroup a, CxPolymerSgroup b) {
268279
int cmp = 0;
269280
cmp = a.type.compareTo(b.type);
270281
if (cmp != 0) return cmp;
271-
cmp = CxSmilesGenerator.compare(comp, a.atomset, b.atomset);
282+
cmp = CxSmilesGenerator.compare(comp, a.atoms, b.atoms);
272283
return cmp;
273284
}
274285
});
275286

276-
for (int i = 0; i < sgroups.size(); i++) {
287+
for (CxPolymerSgroup cxPolymerSgroup : polysgroups) {
288+
cxPolymerSgroup.id = numSgroups++;
277289
if (sb.length() > 2) sb.append(',');
278290
sb.append("Sg:");
279-
PolymerSgroup sgroup = sgroups.get(i);
280-
sb.append(sgroup.type);
291+
sb.append(cxPolymerSgroup.type);
281292
sb.append(':');
282-
appendIntegers(ordering, ',', sb, sgroup.atomset);
293+
appendIntegers(ordering, ',', sb, cxPolymerSgroup.atoms);
283294
sb.append(':');
284-
if (sgroup.subscript != null)
285-
sb.append(sgroup.subscript);
295+
if (cxPolymerSgroup.subscript != null)
296+
sb.append(cxPolymerSgroup.subscript);
286297
sb.append(':');
287-
if (sgroup.supscript != null)
288-
sb.append(sgroup.supscript.toLowerCase(Locale.ROOT));
298+
if (cxPolymerSgroup.supscript != null)
299+
sb.append(cxPolymerSgroup.supscript.toLowerCase(Locale.ROOT));
300+
}
301+
}
302+
303+
if (SmiFlavor.isSet(opts, SmiFlavor.CxDataSgroups) &&
304+
state.mysgroups != null && !state.mysgroups.isEmpty()) {
305+
List<CxDataSgroup> datasgroups = new ArrayList<>();
306+
for (CxSgroup datasgroup : state.mysgroups) {
307+
if (datasgroup instanceof CxDataSgroup) {
308+
datasgroups.add((CxDataSgroup)datasgroup);
309+
Collections.sort(datasgroup.atoms, comp);
310+
}
311+
}
312+
313+
Collections.sort(datasgroups, new Comparator<CxDataSgroup>() {
314+
@Override
315+
public int compare(CxDataSgroup a, CxDataSgroup b) {
316+
int cmp = 0;
317+
cmp = a.field.compareTo(b.field);
318+
if (cmp != 0) return cmp;
319+
cmp = a.value.compareTo(b.value);
320+
if (cmp != 0) return cmp;
321+
cmp = CxSmilesGenerator.compare(comp, a.atoms, b.atoms);
322+
return cmp;
323+
}
324+
});
325+
326+
for (CxDataSgroup cxDataSgroup : datasgroups) {
327+
cxDataSgroup.id = numSgroups++;
328+
if (sb.length() > 2) sb.append(',');
329+
sb.append("SgD:");
330+
appendIntegers(ordering, ',', sb, cxDataSgroup.atoms);
331+
sb.append(':');
332+
if (cxDataSgroup.field != null)
333+
sb.append(cxDataSgroup.field);
334+
sb.append(':');
335+
if (cxDataSgroup.value != null)
336+
sb.append(cxDataSgroup.value);
337+
sb.append(':');
338+
if (cxDataSgroup.operator != null)
339+
sb.append(cxDataSgroup.operator);
340+
sb.append(':');
341+
if (cxDataSgroup.unit != null)
342+
sb.append(cxDataSgroup.unit);
343+
// fmt (t/f/n) + coords?
344+
}
345+
}
346+
347+
// hierarchy information
348+
if (numSgroups > 0) {
349+
boolean firstSgH = true;
350+
if (state.mysgroups != null) {
351+
for (CxSgroup sgroup : state.mysgroups) {
352+
if (sgroup.children.isEmpty())
353+
continue;
354+
if (sb.length() > 2) sb.append(',');
355+
if (firstSgH) {
356+
sb.append("SgH:");
357+
firstSgH = false;
358+
}
359+
sb.append(sgroup.id).append(':');
360+
boolean first = true;
361+
for (CxSgroup child : sgroup.children) {
362+
if (child.id < 0)
363+
continue;
364+
if (!first)
365+
sb.append('.');
366+
first = false;
367+
sb.append(child.id);
368+
}
369+
}
289370
}
290371
}
291372

storage/smiles/src/main/java/org/openscience/cdk/smiles/CxSmilesParser.java

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ private static boolean isSgroupDelim(char c) {
215215

216216
private static boolean processDataSgroups(CharIter iter, CxSmilesState state) {
217217

218-
if (state.dataSgroups == null)
219-
state.dataSgroups = new ArrayList<>(4);
218+
if (state.mysgroups == null)
219+
state.mysgroups = new ArrayList<>(4);
220220

221221
final List<Integer> atomset = new ArrayList<>();
222222
if (!processIntList(iter, COMMA_SEPARATOR, atomset))
@@ -237,7 +237,7 @@ private static boolean processDataSgroups(CharIter iter, CxSmilesState state) {
237237
final String value = unescape(iter.substr(beg, iter.pos));
238238

239239
if (!iter.nextIf(':')) {
240-
state.dataSgroups.add(new CxSmilesState.DataSgroup(atomset, field, value, "", "", ""));
240+
state.mysgroups.add(new CxSmilesState.CxDataSgroup(atomset, field, value, "", "", ""));
241241
return true;
242242
}
243243

@@ -247,7 +247,7 @@ private static boolean processDataSgroups(CharIter iter, CxSmilesState state) {
247247
final String operator = unescape(iter.substr(beg, iter.pos));
248248

249249
if (!iter.nextIf(':')) {
250-
state.dataSgroups.add(new CxSmilesState.DataSgroup(atomset, field, value, operator, "", ""));
250+
state.mysgroups.add(new CxSmilesState.CxDataSgroup(atomset, field, value, operator, "", ""));
251251
return true;
252252
}
253253

@@ -257,7 +257,7 @@ private static boolean processDataSgroups(CharIter iter, CxSmilesState state) {
257257
final String unit = unescape(iter.substr(beg, iter.pos));
258258

259259
if (!iter.nextIf(':')) {
260-
state.dataSgroups.add(new CxSmilesState.DataSgroup(atomset, field, value, operator, unit, ""));
260+
state.mysgroups.add(new CxSmilesState.CxDataSgroup(atomset, field, value, operator, unit, ""));
261261
return true;
262262
}
263263

@@ -266,7 +266,7 @@ private static boolean processDataSgroups(CharIter iter, CxSmilesState state) {
266266
iter.next();
267267
final String tag = unescape(iter.substr(beg, iter.pos));
268268

269-
state.dataSgroups.add(new CxSmilesState.DataSgroup(atomset, field, value, operator, unit, tag));
269+
state.mysgroups.add(new CxSmilesState.CxDataSgroup(atomset, field, value, operator, unit, tag));
270270

271271
return true;
272272
}
@@ -279,8 +279,8 @@ private static boolean processDataSgroups(CharIter iter, CxSmilesState state) {
279279
* @return parse was a success (or not)
280280
*/
281281
private static boolean processPolymerSgroups(CharIter iter, CxSmilesState state) {
282-
if (state.sgroups == null)
283-
state.sgroups = new ArrayList<>();
282+
if (state.mysgroups == null)
283+
state.mysgroups = new ArrayList<>();
284284
int beg = iter.pos;
285285
while (iter.hasNext() && !isSgroupDelim(iter.curr()))
286286
iter.next();
@@ -308,18 +308,20 @@ private static boolean processPolymerSgroups(CharIter iter, CxSmilesState state)
308308
subscript = keyword;
309309

310310
// "In the superscript only connectivity and flip information is allowed.", default
311-
// appears to be "eu" either/unspecified
311+
// appears to be "eu" either/unspecified for SRU
312312
if (!iter.nextIf(':'))
313313
return false;
314314
beg = iter.pos;
315315
while (iter.hasNext() && !isSgroupDelim(iter.curr()))
316316
iter.next();
317317
supscript = unescape(iter.substr(beg, iter.pos));
318-
if (supscript.isEmpty())
318+
if (supscript.isEmpty() &&
319+
!keyword.equals("c")&&!keyword.equals("mix")&&
320+
!keyword.equals("f")&&!keyword.equals("mod"))
319321
supscript = "eu";
320322

321323
if (iter.nextIf(',') || iter.curr() == '|') {
322-
state.sgroups.add(new CxSmilesState.PolymerSgroup(keyword, atomset, subscript, supscript));
324+
state.mysgroups.add(new CxSmilesState.CxPolymerSgroup(keyword, atomset, subscript, supscript));
323325
return true;
324326
}
325327
// not supported: crossing bond info (difficult to work out from doc) and bracket orientation
@@ -493,6 +495,12 @@ else if (iter.nextIf("tu:")) {
493495
else if (iter.nextIf("gD:")) {
494496
if (!processDataSgroups(iter, state))
495497
return -1;
498+
if (iter.nextIf(','))
499+
break;
500+
}
501+
else if (iter.nextIf("gH:")) {
502+
if (!processSgroupsHierarchy(iter, state))
503+
return -1;
496504
}
497505
else {
498506
return -1;
@@ -543,6 +551,42 @@ else if (iter.nextIf("gD:")) {
543551
return -1;
544552
}
545553

554+
555+
private static boolean processSgroupsHierarchy(CharIter iter, CxSmilesState state) {
556+
int nsgroups = 0;
557+
if (state.mysgroups != null)
558+
nsgroups += state.mysgroups.size();
559+
if (nsgroups == 0)
560+
return false; // may not be written yet
561+
for (;;) {
562+
int parent = processUnsignedInt(iter);
563+
if (parent < 0)
564+
return false;
565+
if (!iter.nextIf(':'))
566+
return false;
567+
List<Integer> children = new ArrayList<>();
568+
processIntList(iter, '.', children);
569+
if (parent < state.mysgroups.size()) {
570+
for (Integer child : children) {
571+
if (child < nsgroups) {
572+
state.mysgroups.get(parent).children
573+
.add(state.mysgroups.get(child));
574+
} else
575+
return false; // missing Sgroup
576+
}
577+
} else {
578+
return false; // missing Sgroup
579+
}
580+
if (iter.curr() == '|')
581+
return true;
582+
if (!iter.nextIf(','))
583+
return false;
584+
if (!isDigit(iter.curr()))
585+
return true;
586+
}
587+
}
588+
589+
546590
private static boolean isDigit(char c) {
547591
return c >= '0' && c <= '9';
548592
}

0 commit comments

Comments
 (0)