Skip to content

Commit eb3a48b

Browse files
committed
write out ALS lines in molfile
1 parent c2f7e62 commit eb3a48b

File tree

2 files changed

+97
-19
lines changed

2 files changed

+97
-19
lines changed

storage/ctab/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
*/
2525
package org.openscience.cdk.io;
2626

27+
import org.openscience.cdk.AtomRef;
2728
import org.openscience.cdk.CDKConstants;
29+
import org.openscience.cdk.config.Elements;
2830
import org.openscience.cdk.config.Isotopes;
2931
import org.openscience.cdk.exception.CDKException;
3032
import org.openscience.cdk.interfaces.IAtom;
@@ -44,6 +46,8 @@
4446
import org.openscience.cdk.io.setting.IOSetting;
4547
import org.openscience.cdk.io.setting.StringIOSetting;
4648
import org.openscience.cdk.isomorphism.matchers.Expr;
49+
import org.openscience.cdk.isomorphism.matchers.IQueryAtom;
50+
import org.openscience.cdk.isomorphism.matchers.QueryAtom;
4751
import org.openscience.cdk.isomorphism.matchers.QueryBond;
4852
import org.openscience.cdk.sgroup.Sgroup;
4953
import org.openscience.cdk.sgroup.SgroupBracket;
@@ -63,19 +67,10 @@
6367
import java.nio.charset.StandardCharsets;
6468
import java.text.NumberFormat;
6569
import java.text.SimpleDateFormat;
66-
import java.util.ArrayList;
67-
import java.util.Arrays;
68-
import java.util.Collection;
69-
import java.util.HashMap;
70-
import java.util.Iterator;
71-
import java.util.LinkedHashMap;
72-
import java.util.List;
73-
import java.util.Locale;
74-
import java.util.Map;
75-
import java.util.Set;
76-
import java.util.TreeMap;
70+
import java.util.*;
7771
import java.util.regex.Matcher;
7872
import java.util.regex.Pattern;
73+
import java.util.stream.Collectors;
7974

8075
/**
8176
* Writes MDL molfiles, which contains a single molecule (see {@cdk.cite DAL92}).
@@ -414,6 +409,7 @@ public void writeMolecule(IAtomContainer container) throws Exception {
414409
writer.write(line.toString());
415410
writer.write('\n');
416411

412+
Set<IAtom> atomLists = new LinkedHashSet<>();
417413
// write Atom block
418414
for (int f = 0; f < container.getAtomCount(); f++) {
419415
IAtom atom = container.getAtom(f);
@@ -490,6 +486,14 @@ public void writeMolecule(IAtomContainer container) throws Exception {
490486
}
491487
}
492488

489+
}else if(container.getAtom(f) instanceof IQueryAtom){
490+
QueryAtom queryAtom = (QueryAtom) AtomRef.deref(container.getAtom(f));
491+
Expr expr = queryAtom.getExpression();
492+
String symbol = getSymbolForAtomExpression(expr);
493+
line.append(formatMDLString(symbol, 3));
494+
if("L".equals(symbol)){
495+
atomLists.add(container.getAtom(f));
496+
}
493497
} else {
494498
line.append(formatMDLString(container.getAtom(f).getSymbol(), 3));
495499
}
@@ -761,6 +765,28 @@ else if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.I
761765

762766
}
763767
}
768+
//write atom lists
769+
for(IAtom a : atomLists){
770+
QueryAtom qa = (QueryAtom) AtomRef.deref(a);
771+
//atom lists are limited to just a list of ELEMENTS OR'ed together
772+
//with the whole expression possibly negated
773+
774+
Expr expression = qa.getExpression();
775+
List<String> elements=getAtomList(expression);
776+
writer.write("M ALS ");
777+
writer.write(formatMDLInt(a.getIndex()+1, 3));
778+
writer.write(formatMDLInt(elements.size(), 3));
779+
//root expression type is either OR or NOT
780+
if(expression.type() == Expr.Type.NOT){
781+
writer.write(" T ");
782+
}else {
783+
writer.write(" F ");
784+
}
785+
for(String symbol : elements){
786+
writer.write(formatMDLString(symbol, 4));
787+
}
788+
writer.write('\n');
789+
}
764790

765791
writeSgroups(container, writer, atomindex);
766792

@@ -769,7 +795,34 @@ else if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.I
769795
writer.write('\n');
770796
writer.flush();
771797
}
798+
private static String getSymbolForAtomExpression(Expr exp){
799+
List<Expr> elist = new ArrayList<>();
800+
getLeafNodes(exp, elist);
801+
if(!elist.isEmpty() && elist.stream()
802+
.allMatch(ex->ex.type().equals(Expr.Type.ELEMENT))){
803+
return "L";
804+
}else{
805+
return "A";
806+
}
807+
}
808+
private static List<String> getAtomList(Expr exp){
809+
List<Expr> elist = new ArrayList<>();
810+
getLeafNodes(exp, elist);
811+
return elist.stream().map(expr->Elements.ofNumber(expr.value()).symbol())
812+
.collect(Collectors.toList());
813+
814+
}
772815

816+
private static void getLeafNodes(Expr exr, List<Expr> elist){
817+
if(exr.type().equals(Expr.Type.OR) || exr.type().equals(Expr.Type.AND)){
818+
getLeafNodes(exr.left(), elist);
819+
getLeafNodes(exr.right(), elist);
820+
}else if(exr.type().equals(Expr.Type.NOT)){
821+
getLeafNodes(exr.left(), elist);
822+
}else{
823+
elist.add(exr);
824+
}
825+
}
773826
// 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,
774827
// 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3
775828
private int determineCharge(IAtomContainer mol, IAtom atom) {

storage/ctab/src/test/java/org/openscience/cdk/io/MDLV2000WriterTest.java

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,7 @@
2626
import org.junit.Assert;
2727
import org.junit.BeforeClass;
2828
import org.junit.Test;
29-
import org.openscience.cdk.Atom;
30-
import org.openscience.cdk.AtomContainer;
31-
import org.openscience.cdk.Bond;
32-
import org.openscience.cdk.CDKConstants;
33-
import org.openscience.cdk.ChemFile;
34-
import org.openscience.cdk.ChemModel;
35-
import org.openscience.cdk.DefaultChemObjectBuilder;
36-
import org.openscience.cdk.PseudoAtom;
29+
import org.openscience.cdk.*;
3730
import org.openscience.cdk.exception.CDKException;
3831
import org.openscience.cdk.interfaces.IAtom;
3932
import org.openscience.cdk.interfaces.IAtomContainer;
@@ -1001,4 +994,36 @@ public void writeParentAtomSgroupAsList() throws Exception{
1001994
assertThat(sw.toString(), containsString("SPA 1 1"));
1002995

1003996
}
997+
998+
@Test
999+
public void roundTripWithNotAtomList() throws Exception {
1000+
try (InputStream in = getClass().getResourceAsStream("query_notatomlist.mol");
1001+
MDLV2000Reader mdlr = new MDLV2000Reader(in)) {
1002+
1003+
IAtomContainer mol = mdlr.read(SilentChemObjectBuilder.getInstance().newAtomContainer());
1004+
1005+
StringWriter sw = new StringWriter();
1006+
try (MDLV2000Writer mdlw = new MDLV2000Writer(sw)) {
1007+
mdlw.write(mol);
1008+
}
1009+
String writtenMol = sw.toString();
1010+
// M ALS 1 3 F F N O
1011+
assertThat(writtenMol, containsString("M ALS 1 3 T F N O"));
1012+
}
1013+
}
1014+
@Test
1015+
public void roundTripWithAtomList() throws Exception {
1016+
try (InputStream in = getClass().getResourceAsStream("query_atomlist.mol");
1017+
MDLV2000Reader mdlr = new MDLV2000Reader(in)) {
1018+
1019+
IAtomContainer mol = mdlr.read(SilentChemObjectBuilder.getInstance().newAtomContainer());
1020+
1021+
StringWriter sw = new StringWriter();
1022+
try (MDLV2000Writer mdlw = new MDLV2000Writer(sw)) {
1023+
mdlw.write(mol);
1024+
}
1025+
String writtenMol = sw.toString();
1026+
assertThat(writtenMol, containsString("M ALS 1 3 F F N O"));
1027+
}
1028+
}
10041029
}

0 commit comments

Comments
 (0)