Skip to content

Commit fc938d2

Browse files
committed
Write support for stereogroups in CXSMILES, V2000/V3000 MOLfiles.
1 parent 49257e2 commit fc938d2

File tree

11 files changed

+791
-12
lines changed

11 files changed

+791
-12
lines changed

storage/ctab/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,7 @@ public void writeMolecule(IAtomContainer container) throws Exception {
430430
//write number of atom lists
431431
line.append(formatMDLInt(atomLists.size(), 3));
432432
line.append(" 0");
433-
// we mark all stereochemistry to absolute for now
434-
line.append(atomstereo.isEmpty() ? " 0" : " 1");
433+
line.append(getChiralFlag(atomstereo.values()) ? " 1" : " 0");
435434
line.append(" 0 0 0 0 0999 V2000");
436435
writer.write(line.toString());
437436
writer.write('\n');
@@ -618,7 +617,7 @@ else if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.I
618617
case UNSET:
619618
if (bond.isAromatic()) {
620619
if (!writeAromaticBondTypes.isSet())
621-
throw new CDKException("Bond at idx " + container.indexOf(bond) + " was an unspecific aromatic bond which should only be used for querie in Molfiles. These can be written if desired by enabling the option 'WriteAromaticBondTypes'.");
620+
throw new CDKException("Bond at idx " + container.indexOf(bond) + " was an unspecific aromatic bond which should only be used for queries in Molfiles. These can be written if desired by enabling the option 'WriteAromaticBondTypes'.");
622621
bondType = 4;
623622
}
624623
break;
@@ -803,6 +802,40 @@ else if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.I
803802
writer.flush();
804803
}
805804

805+
/**
806+
* Determines the chiral flag, a molecule is chiral if all it's tetrahedral stereocenters are marked as absolute.
807+
* This function also checks if there is enhanced stereochemistry that cannot be emitted (without information loss)
808+
* in V2000.
809+
*
810+
* @param stereo tetrahedral stereo
811+
* @return the chiral status
812+
*/
813+
static boolean getChiralFlag(Iterable<? extends IStereoElement> stereo) {
814+
boolean chiral = true;
815+
int seenGrpInfo = 0;
816+
int numTetrahedral = 0;
817+
for (IStereoElement tc : stereo) {
818+
if (tc.getConfigClass() != IStereoElement.TH)
819+
continue;
820+
numTetrahedral++;
821+
if (tc.getGroupInfo() != IStereoElement.GRP_ABS) {
822+
if (seenGrpInfo == 0) {
823+
seenGrpInfo = tc.getGroupInfo();
824+
} else if (seenGrpInfo != tc.getGroupInfo()) {
825+
// we could check for racemic only but V2000 originally didn't differentiate between relative
826+
// or racemic so providing they're all the same it's okay. But we should warn if there is something
827+
// more complicated
828+
logger.warn("Molecule has enhanced stereochemistry that cannot be represented in V2000");
829+
}
830+
chiral = false;
831+
}
832+
}
833+
if (numTetrahedral == 0)
834+
chiral = false;
835+
return chiral;
836+
}
837+
838+
806839
private static void writeAtomLists(Map<Integer, IAtom> atomLists, BufferedWriter writer) throws IOException {
807840
//write out first as the legacy atom list way and then as the M ALS way
808841
//since there should only be a few lines to write each way

storage/ctab/src/main/java/org/openscience/cdk/io/MDLV3000Writer.java

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@
6060
import java.util.Collections;
6161
import java.util.Comparator;
6262
import java.util.HashMap;
63-
import java.util.Iterator;
6463
import java.util.List;
6564
import java.util.Locale;
6665
import java.util.Map;
6766
import java.util.Set;
67+
import java.util.TreeMap;
6868
import java.util.regex.Matcher;
6969
import java.util.regex.Pattern;
7070

@@ -674,14 +674,18 @@ private void writeMol(IAtomContainer mol) throws IOException, CDKException {
674674
if (sgroup.getType().isCtabStandard())
675675
numSgroups++;
676676

677+
int chiralFlag = getChiralFlag(mol.stereoElements());
678+
677679
writer.write("BEGIN CTAB\n");
678680
writer.write("COUNTS ")
679681
.write(mol.getAtomCount())
680682
.write(' ')
681683
.write(mol.getBondCount())
682684
.write(' ')
683685
.write(numSgroups)
684-
.write(" 0 0\n");
686+
.write(" 0")
687+
.write(chiralFlag == 1 ? " 1" : " 0")
688+
.write("\n");
685689

686690
// fast lookup atom indexes, MDL indexing starts at 1
687691
Map<IChemObject, Integer> idxs = new HashMap<>();
@@ -704,12 +708,55 @@ private void writeMol(IAtomContainer mol) throws IOException, CDKException {
704708
writeAtomBlock(mol, atoms, idxs, atomToStereo);
705709
writeBondBlock(mol, idxs);
706710
writeSgroupBlock(sgroups, idxs);
711+
if (chiralFlag > 1)
712+
writeEnhancedStereo(mol, idxs);
707713

708714
writer.write("END CTAB\n");
709715
writer.writeDirect("M END\n");
710716
writer.writer.flush();
711717
}
712718

719+
private void writeEnhancedStereo(IAtomContainer mol, Map<IChemObject, Integer> idxs) throws IOException {
720+
// group together
721+
Map<Integer,List<IAtom>> groups = new TreeMap<>();
722+
for (IStereoElement<?,?> se : mol.stereoElements()) {
723+
if (se.getConfigClass() == IStereoElement.TH) {
724+
groups.computeIfAbsent(se.getGroupInfo(), e -> new ArrayList<>())
725+
.add((IAtom)se.getFocus());
726+
}
727+
}
728+
writer.write("BEGIN COLLECTION\n");
729+
int numRel = 0;
730+
int numRac = 0;
731+
for (Map.Entry<Integer,List<IAtom>> e : groups.entrySet()) {
732+
int grpInfo = e.getKey();
733+
List<IAtom> atoms = e.getValue();
734+
writer.write("MDLV30/STE");
735+
switch (grpInfo & IStereoElement.GRP_TYPE_MASK) {
736+
case IStereoElement.GRP_ABS:
737+
writer.write("ABS");
738+
break;
739+
case IStereoElement.GRP_RAC:
740+
writer.write("RAC");
741+
writer.write(++numRac);
742+
break;
743+
case IStereoElement.GRP_REL:
744+
writer.write("REL");
745+
writer.write(++numRel);
746+
break;
747+
default:
748+
throw new IllegalStateException("Unexpected ");
749+
}
750+
writer.write(" ATOMS=(");
751+
writer.write(idxs.get(atoms.get(0)));
752+
for (int i=1; i<atoms.size(); i++) {
753+
writer.write(' ');
754+
writer.write(idxs.get(atoms.get(i)));
755+
}
756+
writer.write(")\n");
757+
}
758+
writer.write("END COLLECTION\n");
759+
}
713760

714761
/**
715762
* Writes a molecule to the V3000 format. {@inheritDoc}
@@ -957,4 +1004,30 @@ public void customizeJob() {
9571004
fireIOSettingQuestion(setting);
9581005
}
9591006
}
1007+
1008+
/**
1009+
* Determines the chiral flag, a molecule is chiral if all it's tetrahedral stereocenters are marked as absolute.
1010+
*
1011+
* @param stereo tetrahedral stereo
1012+
* @return the chiral status, 0=not chiral, 1=chiral (all abs), 2=enhanced
1013+
*/
1014+
static int getChiralFlag(Iterable<? extends IStereoElement> stereo) {
1015+
boolean init = false;
1016+
int grp = 0;
1017+
for (IStereoElement<?,?> se : stereo) {
1018+
if (se.getConfigClass() == IStereoElement.TH) {
1019+
if (!init) {
1020+
init = true;
1021+
grp = se.getGroupInfo();
1022+
} else if (grp != se.getGroupInfo()) {
1023+
return 2; // mixed
1024+
}
1025+
}
1026+
}
1027+
if (!init)
1028+
return 0;
1029+
if (grp == IStereoElement.GRP_ABS)
1030+
return 1;
1031+
return 2;
1032+
}
9601033
}

storage/ctab/src/main/java/org/openscience/cdk/io/SDFWriter.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.openscience.cdk.interfaces.IChemModel;
4343
import org.openscience.cdk.interfaces.IChemObject;
4444
import org.openscience.cdk.interfaces.IChemSequence;
45+
import org.openscience.cdk.interfaces.IStereoElement;
4546
import org.openscience.cdk.io.formats.IResourceFormat;
4647
import org.openscience.cdk.io.formats.SDFFormat;
4748
import org.openscience.cdk.io.setting.BooleanIOSetting;
@@ -342,7 +343,29 @@ private boolean writeV3000(IAtomContainer container) {
342343
return true;
343344
if (container.getBondCount() > 999)
344345
return true;
345-
// check for positional variation, this can be output in base V3000 and not V2000
346+
347+
// enhanced stereo check, if every tetrahedral element is Absolute (ABS) or in the same Racemic (RAC) group then
348+
// we can use V2000
349+
boolean init = false;
350+
int grp = 0;
351+
for (IStereoElement<?,?> se : container.stereoElements()) {
352+
if (se.getConfigClass() == IStereoElement.TH) {
353+
if (!init) {
354+
init = true;
355+
grp = se.getGroupInfo();
356+
} else if (grp != se.getGroupInfo()) {
357+
// >1 group types e.g. &1 &2, &1 or1 etc, use V3000
358+
return true;
359+
}
360+
}
361+
}
362+
363+
// original V2000 didn't distinguish racemic and relative stereo (flag=0) however
364+
// MDL/Accelrys/BIOVIA decided these should be read as racemic, so even if all
365+
if ((grp & IStereoElement.GRP_TYPE_MASK) == IStereoElement.GRP_REL)
366+
return true;
367+
368+
// check for positional variation, this can be output in V3000 and not V2000
346369
List<Sgroup> sgroups = container.getProperty(CDKConstants.CTAB_SGROUPS);
347370
if (sgroups != null) {
348371
for (Sgroup sgroup : sgroups)

storage/ctab/src/test/java/org/openscience/cdk/io/MDLV2000WriterTest.java

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.openscience.cdk.interfaces.IChemObjectBuilder;
3939
import org.openscience.cdk.interfaces.IPseudoAtom;
4040
import org.openscience.cdk.interfaces.ISingleElectron;
41+
import org.openscience.cdk.interfaces.IStereoElement;
4142
import org.openscience.cdk.interfaces.ITetrahedralChirality;
4243
import org.openscience.cdk.io.listener.PropertiesListener;
4344
import org.openscience.cdk.sgroup.Sgroup;
@@ -50,6 +51,7 @@
5051
import javax.vecmath.Point3d;
5152
import java.io.IOException;
5253
import java.io.InputStream;
54+
import java.io.StringReader;
5355
import java.io.StringWriter;
5456
import java.util.ArrayList;
5557
import java.util.Collections;
@@ -62,6 +64,7 @@
6264
import static org.hamcrest.CoreMatchers.is;
6365
import static org.hamcrest.CoreMatchers.not;
6466
import static org.hamcrest.MatcherAssert.assertThat;
67+
import static org.junit.Assert.assertTrue;
6568
import static org.openscience.cdk.CDKConstants.ISAROMATIC;
6669

6770
/**
@@ -452,9 +455,6 @@ public void testAtomParity() throws CDKException, IOException {
452455
writer.write(molecule);
453456
writer.close();
454457

455-
456-
System.out.println(sw.toString());
457-
458458
Assert.assertTrue(sw.toString().contains(
459459
" -1.1749 0.1436 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0"));
460460

@@ -955,7 +955,7 @@ public void skipDefaultProps() throws Exception {
955955
mdlw.write(mdlr.read(new AtomContainer()));
956956
String output = sw.toString();
957957
assertThat(output, containsString("\n"
958-
+ " 5 4 0 0 1 0 0 0 0 0999 V2000\n"
958+
+ " 5 4 0 0 0 0 0 0 0 0999 V2000\n"
959959
+ " 0.0000 0.0000 0.0000 C 0 0 1 0 0 0\n"
960960
+ " 0.0000 0.0000 0.0000 C 0 0\n"
961961
+ " 0.0000 0.0000 0.0000 C 0 0\n"
@@ -1072,4 +1072,64 @@ public void dataSgroupRoundTrip() {
10721072
Assert.fail(e.getMessage());
10731073
}
10741074
}
1075+
1076+
@Test
1077+
public void testNoChiralFlag() throws Exception {
1078+
final String input = "\n" +
1079+
" Mrv1810 02052112282D \n" +
1080+
"\n" +
1081+
" 7 7 0 0 0 0 999 V2000\n" +
1082+
" -1.1468 6.5972 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0\n" +
1083+
" -1.8613 6.1847 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1084+
" -1.8613 5.3597 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1085+
" -1.1468 4.9472 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1086+
" -0.4323 5.3597 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1087+
" -0.4323 6.1847 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1088+
" -1.1468 7.4222 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1089+
" 1 2 1 0 0 0 0\n" +
1090+
" 2 3 1 0 0 0 0\n" +
1091+
" 3 4 1 0 0 0 0\n" +
1092+
" 4 5 1 0 0 0 0\n" +
1093+
" 5 6 1 0 0 0 0\n" +
1094+
" 1 6 1 0 0 0 0\n" +
1095+
" 1 7 1 1 0 0 0\n" +
1096+
"M END\n";
1097+
IChemObjectBuilder bldr = SilentChemObjectBuilder.getInstance();
1098+
StringWriter sw = new StringWriter();
1099+
try (MDLV2000Reader mdlr = new MDLV2000Reader(new StringReader(input));
1100+
MDLV2000Writer mdlw = new MDLV2000Writer(sw)) {
1101+
mdlw.write(mdlr.read(bldr.newAtomContainer()));
1102+
}
1103+
assertThat(sw.toString(), containsString(" 7 7 0 0 0 0"));
1104+
}
1105+
1106+
@Test
1107+
public void testChiralFlag() throws Exception {
1108+
final String input = "\n" +
1109+
" Mrv1810 02052112282D \n" +
1110+
"\n" +
1111+
" 7 7 0 0 1 0 999 V2000\n" +
1112+
" -1.1468 6.5972 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0\n" +
1113+
" -1.8613 6.1847 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1114+
" -1.8613 5.3597 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1115+
" -1.1468 4.9472 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1116+
" -0.4323 5.3597 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1117+
" -0.4323 6.1847 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1118+
" -1.1468 7.4222 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" +
1119+
" 1 2 1 0 0 0 0\n" +
1120+
" 2 3 1 0 0 0 0\n" +
1121+
" 3 4 1 0 0 0 0\n" +
1122+
" 4 5 1 0 0 0 0\n" +
1123+
" 5 6 1 0 0 0 0\n" +
1124+
" 1 6 1 0 0 0 0\n" +
1125+
" 1 7 1 1 0 0 0\n" +
1126+
"M END\n";
1127+
IChemObjectBuilder bldr = SilentChemObjectBuilder.getInstance();
1128+
StringWriter sw = new StringWriter();
1129+
try (MDLV2000Reader mdlr = new MDLV2000Reader(new StringReader(input));
1130+
MDLV2000Writer mdlw = new MDLV2000Writer(sw)) {
1131+
mdlw.write(mdlr.read(bldr.newAtomContainer()));
1132+
}
1133+
assertThat(sw.toString(), containsString(" 7 7 0 0 1 0"));
1134+
}
10751135
}

0 commit comments

Comments
 (0)