Skip to content

Commit d038430

Browse files
committed
Round tripping ligand order in CXSMILES.
1 parent a2c8039 commit d038430

File tree

6 files changed

+135
-29
lines changed

6 files changed

+135
-29
lines changed

storage/smiles/src/main/java/org/openscience/cdk/smiles/CxSmilesGenerator.java

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -217,27 +217,41 @@ public int compare(List<Integer> a, List<Integer> b) {
217217
List<Map.Entry<Integer, List<Integer>>> multicenters = new ArrayList<>(state.positionVar.entrySet());
218218

219219
// consistent output order
220-
Collections.sort(multicenters,
221-
new Comparator<Map.Entry<Integer, List<Integer>>>() {
222-
@Override
223-
public int compare(Map.Entry<Integer, List<Integer>> a,
224-
Map.Entry<Integer, List<Integer>> b) {
225-
return comp.compare(a.getKey(), b.getKey());
226-
}
227-
});
220+
multicenters.sort((a, b) -> comp.compare(a.getKey(), b.getKey()));
228221

229222
for (int i = 0; i < multicenters.size(); i++) {
230223
if (i != 0) sb.append(',');
231224
Map.Entry<Integer, List<Integer>> e = multicenters.get(i);
232225
sb.append(ordering[e.getKey()]);
233226
sb.append(':');
234227
List<Integer> vals = new ArrayList<>(e.getValue());
235-
Collections.sort(vals, comp);
228+
vals.sort(comp);
236229
appendIntegers(ordering, '.', sb, vals);
237230
}
238231

239232
}
240233

234+
if (state.ligandOrdering != null && !state.ligandOrdering.isEmpty()) {
235+
236+
if (sb.length() > 2) sb.append(',');
237+
sb.append("LO");
238+
sb.append(':');
239+
240+
List<Map.Entry<Integer, List<Integer>>> ligandorderings = new ArrayList<>(state.ligandOrdering.entrySet());
241+
242+
// consistent output order
243+
ligandorderings.sort((a, b) -> comp.compare(a.getKey(), b.getKey()));
244+
245+
for (int i = 0; i < ligandorderings.size(); i++) {
246+
if (i != 0) sb.append(',');
247+
Map.Entry<Integer, List<Integer>> e = ligandorderings.get(i);
248+
sb.append(ordering[e.getKey()]);
249+
sb.append(':');
250+
appendIntegers(ordering, '.', sb, e.getValue());
251+
}
252+
253+
}
254+
241255

242256
// *CCO* |$_AP1;;;;_AP2$,Sg:n:1,2,3::ht|
243257
if (SmiFlavor.isSet(opts, SmiFlavor.CxPolymer) &&

storage/smiles/src/main/java/org/openscience/cdk/smiles/CxSmilesParser.java

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -327,16 +327,7 @@ private static boolean processPolymerSgroups(CharIter iter, CxSmilesState state)
327327
return false;
328328
}
329329

330-
/**
331-
* Positional variation/multi centre bonding. Describe as a begin atom and one or more end points.
332-
*
333-
* @param iter input characters, iterator is progressed by this method
334-
* @param state output CXSMILES state
335-
* @return parse was a success (or not)
336-
*/
337-
private static boolean processPositionalVariation(CharIter iter, CxSmilesState state) {
338-
if (state.positionVar == null)
339-
state.positionVar = new TreeMap<>();
330+
private static boolean processIntListMap(Map<Integer,List<Integer>> map, CharIter iter) {
340331
while (iter.hasNext()) {
341332
if (isDigit(iter.curr())) {
342333
final int beg = processUnsignedInt(iter);
@@ -346,14 +337,40 @@ private static boolean processPositionalVariation(CharIter iter, CxSmilesState s
346337
if (!processIntList(iter, DOT_SEPARATOR, endpoints))
347338
return false;
348339
iter.nextIf(',');
349-
state.positionVar.put(beg, endpoints);
340+
map.put(beg, endpoints);
350341
} else {
351342
return true;
352343
}
353344
}
354345
return false;
355346
}
356347

348+
/**
349+
* Positional variation/multi centre bonding. Describe as a begin atom and one or more end points.
350+
*
351+
* @param iter input characters, iterator is progressed by this method
352+
* @param state output CXSMILES state
353+
* @return parse was a success (or not)
354+
*/
355+
private static boolean processPositionalVariation(CharIter iter, CxSmilesState state) {
356+
if (state.positionVar == null)
357+
state.positionVar = new TreeMap<>();
358+
return processIntListMap(state.positionVar, iter);
359+
}
360+
361+
362+
/**
363+
* Ligand ordering indicate attachments around R groups.
364+
* @param iter the character iterator
365+
* @param state the CX state
366+
* @return parse was a success (or not)
367+
*/
368+
private static boolean processLigandOrdering(CharIter iter, CxSmilesState state) {
369+
if (state.ligandOrdering == null)
370+
state.ligandOrdering = new TreeMap<>();
371+
return processIntListMap(state.ligandOrdering, iter);
372+
}
373+
357374
/**
358375
* CXSMILES radicals.
359376
*
@@ -505,6 +522,19 @@ else if (iter.nextIf("gD:")) {
505522
// consume optional separators
506523
if (!iter.nextIf(' ')) iter.nextIf('\t');
507524
return iter.pos;
525+
case 'L':
526+
// LO, Ligand Ordering
527+
if (iter.nextIf('O')) {
528+
if (!iter.nextIf(':'))
529+
return -1;
530+
if (!processLigandOrdering(iter, state))
531+
return -1;
532+
}
533+
else {
534+
// LP, bond connected lone pair?
535+
return -1;
536+
}
537+
break;
508538
default:
509539
return -1;
510540
}

storage/smiles/src/main/java/org/openscience/cdk/smiles/CxSmilesState.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ final class CxSmilesState {
3838
List<double[]> atomCoords = null;
3939
List<List<Integer>> fragGroups = null;
4040
Map<Integer, Radical> atomRads = null;
41+
Map<Integer, List<Integer>> ligandOrdering = null;
4142
Map<Integer, List<Integer>> positionVar = null;
4243
List<PolymerSgroup> sgroups = null;
4344
List<DataSgroup> dataSgroups = null;

storage/smiles/src/main/java/org/openscience/cdk/smiles/SmilesGenerator.java

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import java.util.Comparator;
5353
import java.util.HashMap;
5454
import java.util.HashSet;
55+
import java.util.LinkedHashMap;
5556
import java.util.List;
5657
import java.util.Map;
5758
import java.util.Set;
@@ -829,6 +830,7 @@ else if (val == CxSmilesState.Radical.Trivalent)
829830
if (sgroups != null) {
830831
state.sgroups = new ArrayList<>();
831832
state.positionVar = new HashMap<>();
833+
state.ligandOrdering = new HashMap<>();
832834
for (Sgroup sgroup : sgroups) {
833835
switch (sgroup.getType()) {
834836
// polymer SRU
@@ -851,10 +853,10 @@ else if (val == CxSmilesState.Radical.Trivalent)
851853
supscript));
852854
break;
853855

854-
case ExtMulticenter:
855-
IAtom beg = null;
856-
List<IAtom> ends = new ArrayList<>();
857-
Set<IBond> bonds = sgroup.getBonds();
856+
case ExtMulticenter: {
857+
IAtom beg = null;
858+
List<IAtom> ends = new ArrayList<>();
859+
Set<IBond> bonds = sgroup.getBonds();
858860
if (bonds.size() != 1)
859861
throw new IllegalArgumentException("Multicenter Sgroup in inconsistent state!");
860862
IBond bond = bonds.iterator().next();
@@ -869,6 +871,23 @@ else if (val == CxSmilesState.Radical.Trivalent)
869871
}
870872
state.positionVar.put(ensureNotNull(atomidx.get(beg)),
871873
toAtomIdxs(ends, atomidx));
874+
}
875+
break;
876+
case ExtAttachOrdering: {
877+
IAtom beg = null;
878+
List<IAtom> ends = new ArrayList<>();
879+
if (sgroup.getAtoms().size() != 1)
880+
throw new IllegalArgumentException("Attach ordering in inconsistent state!");
881+
beg = sgroup.getAtoms().iterator().next();
882+
for (IBond bond : sgroup.getBonds()) {
883+
IAtom nbr = bond.getOther(beg);
884+
if (nbr == null)
885+
throw new IllegalArgumentException("Attach ordering in inconsistent state!");
886+
ends.add(nbr);
887+
}
888+
state.ligandOrdering.put(ensureNotNull(atomidx.get(beg)),
889+
toAtomIdxs(ends, atomidx));
890+
}
872891
break;
873892
case CtabAbbreviation:
874893
case CtabMultipleGroup:

storage/smiles/src/main/java/org/openscience/cdk/smiles/SmilesParser.java

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
import com.google.common.collect.HashMultimap;
2727
import com.google.common.collect.Maps;
2828
import com.google.common.collect.Multimap;
29+
import org.openscience.cdk.CDK;
2930
import org.openscience.cdk.CDKConstants;
31+
import org.openscience.cdk.exception.CDKException;
3032
import org.openscience.cdk.exception.InvalidSmilesException;
3133
import org.openscience.cdk.graph.ConnectivityChecker;
3234
import org.openscience.cdk.interfaces.IAtom;
@@ -307,7 +309,7 @@ private int parseIntSafe(String val) {
307309
* @param title SMILES title field
308310
* @param mol molecule
309311
*/
310-
private void parseMolCXSMILES(String title, IAtomContainer mol) {
312+
private void parseMolCXSMILES(String title, IAtomContainer mol) throws InvalidSmilesException {
311313
CxSmilesState cxstate;
312314
int pos;
313315
if (title != null && title.startsWith("|")) {
@@ -335,7 +337,7 @@ private void parseMolCXSMILES(String title, IAtomContainer mol) {
335337
* @param title SMILES title field
336338
* @param rxn parsed reaction
337339
*/
338-
private void parseRxnCXSMILES(String title, IReaction rxn) {
340+
private void parseRxnCXSMILES(String title, IReaction rxn) throws InvalidSmilesException {
339341
CxSmilesState cxstate;
340342
int pos;
341343
if (title != null && title.startsWith("|")) {
@@ -460,7 +462,7 @@ private void assignCxSmilesInfo(IChemObjectBuilder bldr,
460462
IChemObject chemObj,
461463
List<IAtom> atoms,
462464
Map<IAtom, IAtomContainer> atomToMol,
463-
CxSmilesState cxstate) {
465+
CxSmilesState cxstate) throws InvalidSmilesException {
464466

465467
// atom-labels - must be done first as we replace atoms
466468
if (cxstate.atomLabels != null) {
@@ -486,7 +488,7 @@ else if (val.startsWith("_AP")) // attachment point
486488
IAtomContainer mol = atomToMol.get(old);
487489
AtomContainerManipulator.replaceAtomByAtom(mol, old, pseudo);
488490
atomToMol.put(pseudo, mol);
489-
atoms.set(e.getKey(), pseudo);
491+
atoms.set(e.getKey(), mol.getAtom(old.getIndex()));
490492
}
491493
}
492494

@@ -553,7 +555,7 @@ else if (val.startsWith("_AP")) // attachment point
553555
IAtomContainer mol = atomToMol.get(beg);
554556
List<IBond> bonds = mol.getConnectedBondsList(beg);
555557
if (bonds.isEmpty())
556-
continue; // bad
558+
continue; // possibly okay
557559
sgroup.addAtom(beg);
558560
sgroup.addBond(bonds.get(0));
559561
for (Integer endpt : e.getValue())
@@ -562,6 +564,29 @@ else if (val.startsWith("_AP")) // attachment point
562564
}
563565
}
564566

567+
// ligand ordering
568+
if (cxstate.ligandOrdering != null) {
569+
for (Map.Entry<Integer, List<Integer>> e : cxstate.ligandOrdering.entrySet()) {
570+
Sgroup sgroup = new Sgroup();
571+
sgroup.setType(SgroupType.ExtAttachOrdering);
572+
IAtom beg = atoms.get(e.getKey());
573+
IAtomContainer mol = atomToMol.get(beg);
574+
List<IBond> bonds = mol.getConnectedBondsList(beg);
575+
if (bonds.isEmpty())
576+
throw new InvalidSmilesException("CXSMILES LO: no bonds to order");
577+
if (bonds.size() != e.getValue().size())
578+
throw new InvalidSmilesException("CXSMILES LO: bond count and ordering count was different");
579+
sgroup.addAtom(beg);
580+
for (Integer endpt : e.getValue()) {
581+
IBond bond = beg.getBond(atoms.get(endpt));
582+
if (bond == null)
583+
throw new InvalidSmilesException("CXSMILES LO: defined ordering to non-existant bond");
584+
sgroup.addBond(bond);
585+
}
586+
sgroupMap.put(mol, sgroup);
587+
}
588+
}
589+
565590
// data sgroups
566591
if (cxstate.dataSgroups != null) {
567592
for (DataSgroup dsgroup : cxstate.dataSgroups) {

storage/smiles/src/test/java/org/openscience/cdk/smiles/CxSmilesTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.openscience.cdk.CDKConstants;
2929
import org.openscience.cdk.exception.CDKException;
3030
import org.openscience.cdk.exception.InvalidSmilesException;
31+
import org.openscience.cdk.interfaces.IAtom;
3132
import org.openscience.cdk.interfaces.IAtomContainer;
3233
import org.openscience.cdk.interfaces.IBond;
3334
import org.openscience.cdk.interfaces.IChemObjectBuilder;
@@ -378,4 +379,20 @@ public void atomValues() throws InvalidSmilesException {
378379
assertThat(smigen.create(mol), is("OC=1C=CC=CC1 |$_AV:6;5;0;1;2;3;4$|"));
379380
}
380381

382+
@Test public void roundTripLigandOrdering() throws CDKException {
383+
IChemObjectBuilder bldr = SilentChemObjectBuilder.getInstance();
384+
SmilesParser smipar = new SmilesParser(bldr);
385+
IAtomContainer mol = smipar.parseSmiles("Cl[*](Br)I |$;_R1;;$,LO:1:0.2.3|");
386+
SmilesGenerator smigen = new SmilesGenerator(SmiFlavor.CxSmiles);
387+
assertThat(smigen.create(mol), is("Cl*(Br)I |$;R1$,LO:1:0.2.3|"));
388+
}
389+
390+
@Test public void canonLigandOrdering() throws CDKException {
391+
IChemObjectBuilder bldr = SilentChemObjectBuilder.getInstance();
392+
SmilesParser smipar = new SmilesParser(bldr);
393+
IAtomContainer mol = smipar.parseSmiles("Cl[*](I)Br |$;_R1;;$,LO:1:0.2.3|");
394+
SmilesGenerator smigen = new SmilesGenerator(SmiFlavor.Canonical|SmiFlavor.CxSmiles);
395+
assertThat(smigen.create(mol), is("Cl*(Br)I |$;R1$,LO:1:0.3.2|"));
396+
}
397+
381398
}

0 commit comments

Comments
 (0)