Skip to content

Commit 1fa020e

Browse files
committed
added support for reading and writing legacy atomlist blocks
1 parent f518209 commit 1fa020e

File tree

8 files changed

+248
-33
lines changed

8 files changed

+248
-33
lines changed

storage/ctab/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,45 @@ void readPropertiesFast(final BufferedReader input, final IAtomContainer contain
943943
if (group == null) return;
944944
break;
945945

946+
// Newer programs use the M ALS item in the properties block in place of the atom list
947+
// block. The atom list block is retained for compatibility, but information in an M ALS item
948+
// supersedes atom list block information.
949+
// aaa kSSSSn 111 222 333 444 555
950+
// 0123456789012345
951+
// aaa = number of atom (L) where list is attached
952+
// k = T = [NOT] list, = F = normal list
953+
// n = number of entries in list; maximum is 5
954+
// 111...555 = atomic number of each atom on the list
955+
// S = space
956+
case LEGACY_ATOM_LIST:
957+
index = readUInt(line, 0, 3)-1;
958+
{
959+
boolean negate = line.charAt(3) == 'T' ||
960+
line.charAt(4) == 'T';
961+
Expr expr = new Expr(Expr.Type.TRUE);
962+
StringBuilder sb = new StringBuilder();
963+
for (int i = 11; i < line.length(); i+=4) {
964+
int atomicNumber = readUInt(line, i, 3);
965+
expr.or(new Expr(Expr.Type.ELEMENT, atomicNumber));
966+
967+
}
968+
969+
if (negate)
970+
expr.negate();
971+
IAtom atom = container.getAtom(index);
972+
if (AtomRef.deref(atom) instanceof QueryAtom) {
973+
QueryAtom ref = (QueryAtom)AtomRef.deref(atom);
974+
ref.setExpression(expr);
975+
} else {
976+
QueryAtom queryAtom = new QueryAtom(expr);
977+
//keep coordinates from old atom
978+
queryAtom.setPoint2d(atom.getPoint2d());
979+
queryAtom.setPoint3d(atom.getPoint3d());
980+
container.setAtom(index, queryAtom);
981+
}
982+
}
983+
break;
984+
946985
// M ALS aaannn e 11112222 ...
947986
// 012345678901234567
948987
// aaa: atom index
@@ -2394,11 +2433,14 @@ enum PropertyKey {
23942433
M_END,
23952434

23962435
/** Non-property header. */
2397-
UNKNOWN;
2436+
UNKNOWN,
2437+
/** old atom list superseded by {@link #M_ALS} */
2438+
LEGACY_ATOM_LIST;
23982439

23992440
/** Index of 'M XXX' properties for quick lookup. */
24002441
private static final Map<String, PropertyKey> mSuffix = new HashMap<String, PropertyKey>(60);
24012442

2443+
private static Pattern LEGACY_ATOM_LIST_PATTERN = Pattern.compile("^[0-9 ][0-9 ][0-9 ] [T|F]");
24022444
static {
24032445
for (PropertyKey p : values()) {
24042446
if (p.name().charAt(0) == 'M') mSuffix.put(p.name().substring(2, 5), p);
@@ -2432,6 +2474,10 @@ static PropertyKey of(final String line) {
24322474
if (property != null) return property;
24332475
return UNKNOWN;
24342476
}
2477+
Matcher matcher = LEGACY_ATOM_LIST_PATTERN.matcher(line);
2478+
if(matcher.find()){
2479+
return LEGACY_ATOM_LIST;
2480+
}
24352481
return UNKNOWN;
24362482
}
24372483

storage/ctab/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java

Lines changed: 74 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -413,15 +413,29 @@ public void writeMolecule(IAtomContainer container) throws Exception {
413413
// write Counts line
414414
line.append(formatMDLInt(container.getAtomCount(), 3));
415415
line.append(formatMDLInt(container.getBondCount(), 3));
416-
line.append(" 0 0");
416+
417+
418+
//find all the atoms that should be atom lists
419+
Map<Integer, IAtom> atomLists = new LinkedHashMap<>();
420+
421+
for (int f = 0; f < container.getAtomCount(); f++) {
422+
if (container.getAtom(f) instanceof IQueryAtom) {
423+
QueryAtom queryAtom = (QueryAtom) AtomRef.deref(container.getAtom(f));
424+
Expr expr = queryAtom.getExpression();
425+
if (isValidAtomListExpression(expr)) {
426+
atomLists.put(f, container.getAtom(f));
427+
}
428+
}
429+
}
430+
//write number of atom lists
431+
line.append(formatMDLInt(atomLists.size(), 3));
432+
line.append(" 0");
417433
// we mark all stereochemistry to absolute for now
418434
line.append(atomstereo.isEmpty() ? " 0" : " 1");
419435
line.append(" 0 0 0 0 0999 V2000");
420436
writer.write(line.toString());
421437
writer.write('\n');
422438

423-
//map of ALS atoms to write and their indexes since some atom getIndex() can return -1
424-
Map<IAtom, Integer> atomLists = new LinkedHashMap<>();
425439
// write Atom block
426440
for (int f = 0; f < container.getAtomCount(); f++) {
427441
IAtom atom = container.getAtom(f);
@@ -498,15 +512,8 @@ public void writeMolecule(IAtomContainer container) throws Exception {
498512
}
499513
}
500514

501-
} else if(container.getAtom(f) instanceof IQueryAtom) {
502-
QueryAtom queryAtom = (QueryAtom) AtomRef.deref(container.getAtom(f));
503-
Expr expr = queryAtom.getExpression();
504-
if(isValidAtomListExpression(expr)){
505-
line.append(formatMDLString("L", 3));
506-
atomLists.put(container.getAtom(f), f);
507-
}else{
508-
line.append(formatMDLString(container.getAtom(f).getSymbol(), 3));
509-
}
515+
} else if(atomLists.containsKey(f)) {
516+
line.append(formatMDLString("L", 3));
510517
} else {
511518

512519
line.append(formatMDLString(container.getAtom(f).getSymbol(), 3));
@@ -780,35 +787,67 @@ else if (e.equals(new Expr(Expr.Type.ALIPHATIC_ORDER, 2).or(new Expr(Expr.Type.I
780787
}
781788
}
782789
//write atom lists
783-
for(Map.Entry<IAtom, Integer> entry : atomLists.entrySet()){
784-
QueryAtom qa = (QueryAtom) AtomRef.deref(entry.getKey());
790+
writeAtomLists(atomLists, writer);
791+
792+
writeSgroups(container, writer, atomindex);
793+
794+
// close molecule
795+
writer.write("M END");
796+
writer.write('\n');
797+
writer.flush();
798+
}
799+
800+
private static void writeAtomLists(Map<Integer, IAtom> atomLists, BufferedWriter writer) throws IOException {
801+
//write out first as the legacy atom list way and then as the M ALS way
802+
//since there should only be a few lines to write each way
803+
//it's easier to write them out in one pass through our Map
804+
// and save the lines to write into temp Lists to write out at the end.
805+
List<String> legacyLines = new ArrayList<>(atomLists.size());
806+
List<String> alsLines = new ArrayList<>(atomLists.size());
807+
808+
for(Map.Entry<Integer, IAtom> entry : atomLists.entrySet()){
809+
QueryAtom qa = (QueryAtom) AtomRef.deref(entry.getValue());
785810
//atom lists are limited to just a list of ELEMENTS OR'ed together
786811
//with the whole expression possibly negated
787812

788813
Expr expression = qa.getExpression();
789814
List<String> elements=getAtomList(expression);
790-
writer.write("M ALS ");
791-
writer.write(formatMDLInt(entry.getValue()+1, 3));
792-
writer.write(formatMDLInt(elements.size(), 3));
815+
StringBuilder legacyBuilder = new StringBuilder(80);
816+
StringBuilder alsBuilder = new StringBuilder(80);
817+
alsBuilder.append("M ALS ");
818+
alsBuilder.append(formatMDLInt(entry.getKey()+1, 3));
819+
alsBuilder.append(formatMDLInt(elements.size(), 3));
820+
821+
legacyBuilder.append(formatMDLInt(entry.getKey()+1, 3));
793822
//root expression type is either OR or NOT
794823
if(expression.type() == Expr.Type.NOT){
795-
writer.write(" T ");
824+
alsBuilder.append(" T ");
825+
legacyBuilder.append(" T ");
796826
}else {
797-
writer.write(" F ");
827+
alsBuilder.append(" F ");
828+
legacyBuilder.append(" F ");
798829
}
799830
for(String symbol : elements){
800-
writer.write(formatMDLString(symbol, 4));
831+
alsBuilder.append(formatMDLString(symbol, 4));
801832
}
802-
writer.write('\n');
803-
}
804-
805-
writeSgroups(container, writer, atomindex);
833+
legacyBuilder.append(formatMDLInt(elements.size(), 1));
834+
for(Integer atomicNumber : getAtomListNumbers(expression)){
835+
legacyBuilder.append(" ").append(formatMDLInt(atomicNumber, 3));
836+
}
837+
alsBuilder.append('\n');
838+
legacyBuilder.append('\n');
806839

807-
// close molecule
808-
writer.write("M END");
809-
writer.write('\n');
810-
writer.flush();
840+
alsLines.add(alsBuilder.toString());
841+
legacyLines.add(legacyBuilder.toString());
842+
}
843+
for(String line: legacyLines){
844+
writer.write(line);
845+
}
846+
for(String line: alsLines){
847+
writer.write(line);
848+
}
811849
}
850+
812851
private static boolean isValidAtomListExpression(Expr exp){
813852

814853
Expr rootToCheck;
@@ -837,6 +876,13 @@ private static List<String> getAtomList(Expr exp){
837876
return elist.stream().map(expr->Elements.ofNumber(expr.value()).symbol())
838877
.collect(Collectors.toList());
839878

879+
}
880+
private static List<Integer> getAtomListNumbers(Expr exp){
881+
List<Expr> elist = new ArrayList<>();
882+
getLeafNodes(exp, elist);
883+
return elist.stream().map(Expr::value)
884+
.collect(Collectors.toList());
885+
840886
}
841887

842888
private static void getLeafNodes(Expr exr, List<Expr> elist){

storage/ctab/src/test/java/org/openscience/cdk/io/MDLV2000ReaderTest.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1850,6 +1850,20 @@ public void testBadAtomCoordinateFormat() throws Exception {
18501850
assertThat(expr, is(expected));
18511851
}
18521852
}
1853+
@Test public void legacyAtomList() throws Exception {
1854+
try (InputStream in = getClass().getResourceAsStream("query_legacyatomlist.mol");
1855+
MDLV2000Reader mdlr = new MDLV2000Reader(in)) {
1856+
IQueryAtomContainer mol = mdlr.read(new QueryAtomContainer(SilentChemObjectBuilder.getInstance()));
1857+
IAtom deref = AtomRef.deref(mol.getAtom(0));
1858+
assertThat(deref, CoreMatchers.<IAtom>instanceOf(QueryAtom.class));
1859+
QueryAtom queryAtom = (QueryAtom) deref;
1860+
Expr expr = queryAtom.getExpression();
1861+
Expr expected = new Expr(Expr.Type.ELEMENT, 9) // F
1862+
.or(new Expr(Expr.Type.ELEMENT, 7)) // N
1863+
.or(new Expr(Expr.Type.ELEMENT, 8)); // O
1864+
assertThat(expr, is(expected));
1865+
}
1866+
}
18531867

18541868
@Test public void notatomList() throws Exception {
18551869
try (InputStream in = getClass().getResourceAsStream("query_notatomlist.mol");
@@ -1866,6 +1880,21 @@ public void testBadAtomCoordinateFormat() throws Exception {
18661880
assertThat(expr, is(expected));
18671881
}
18681882
}
1883+
@Test public void legacynotatomList() throws Exception {
1884+
try (InputStream in = getClass().getResourceAsStream("query_legacynotatomlist.mol");
1885+
MDLV2000Reader mdlr = new MDLV2000Reader(in)) {
1886+
IQueryAtomContainer mol = mdlr.read(new QueryAtomContainer(SilentChemObjectBuilder.getInstance()));
1887+
IAtom deref = AtomRef.deref(mol.getAtom(0));
1888+
assertThat(deref, CoreMatchers.<IAtom>instanceOf(QueryAtom.class));
1889+
QueryAtom queryAtom = (QueryAtom) deref;
1890+
Expr expr = queryAtom.getExpression();
1891+
Expr expected = new Expr(Expr.Type.ELEMENT, 9) // F
1892+
.or(new Expr(Expr.Type.ELEMENT, 7)) // N
1893+
.or(new Expr(Expr.Type.ELEMENT, 8)); // O
1894+
expected = expected.negate();
1895+
assertThat(expr, is(expected));
1896+
}
1897+
}
18691898

18701899
@Test public void sgroupsAbbrRoundTrip() throws IOException, CDKException {
18711900
StringWriter sw = new StringWriter();

storage/ctab/src/test/java/org/openscience/cdk/io/MDLV2000WriterTest.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,8 +1008,9 @@ public void roundTripWithNotAtomList() throws Exception {
10081008
mdlw.write(mol);
10091009
}
10101010
String writtenMol = sw.toString();
1011-
// M ALS 1 3 F F N O
1012-
assertThat(writtenMol, containsString("M ALS 1 3 T F N O"));
1011+
assertThat(writtenMol, containsString(
1012+
" 1 T 3 9 7 8\n" +
1013+
"M ALS 1 3 T F N O"));
10131014
}
10141015
}
10151016
@Test
@@ -1024,7 +1025,32 @@ public void roundTripWithAtomList() throws Exception {
10241025
mdlw.write(mol);
10251026
}
10261027
String writtenMol = sw.toString();
1027-
assertThat(writtenMol, containsString("M ALS 1 3 F F N O"));
1028+
1029+
assertThat(writtenMol, containsString(
1030+
" 1 F 3 9 7 8\n"+
1031+
"M ALS 1 3 F F N O"));
1032+
}
1033+
}
1034+
@Test
1035+
public void roundTripWithMultipleLegacyAtomLists() throws Exception {
1036+
try (InputStream in = getClass().getResourceAsStream("query_manylegacyatomlist.mol");
1037+
MDLV2000Reader mdlr = new MDLV2000Reader(in)) {
1038+
1039+
IAtomContainer mol = mdlr.read(SilentChemObjectBuilder.getInstance().newAtomContainer());
1040+
1041+
StringWriter sw = new StringWriter();
1042+
try (MDLV2000Writer mdlw = new MDLV2000Writer(sw)) {
1043+
mdlw.write(mol);
1044+
}
1045+
String writtenMol = sw.toString();
1046+
1047+
assertThat(writtenMol, containsString(
1048+
" 4 F 2 8 7\n" +
1049+
" 5 F 2 7 8\n" +
1050+
" 6 F 2 7 8\n"+
1051+
"M ALS 4 2 F O N \n" +
1052+
"M ALS 5 2 F N O \n" +
1053+
"M ALS 6 2 F N O"));
10281054
}
10291055
}
10301056

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
Mrv1810 05242013152D
3+
4+
7 7 1 0 0 0 999 V2000
5+
0.7145 2.0625 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
6+
0.7145 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7+
1.4289 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8+
1.4289 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9+
0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
10+
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
11+
0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12+
1 2 1 0 0 0 0
13+
2 3 1 0 0 0 0
14+
3 4 1 0 0 0 0
15+
4 5 1 0 0 0 0
16+
5 6 1 0 0 0 0
17+
6 7 1 0 0 0 0
18+
2 7 1 0 0 0 0
19+
1 F 3 9 7 8
20+
M END
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
Mrv1810 05242013152D
3+
4+
7 7 1 0 0 0 999 V2000
5+
0.7145 2.0625 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
6+
0.7145 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7+
1.4289 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8+
1.4289 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
9+
0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
10+
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
11+
0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12+
1 2 1 0 0 0 0
13+
2 3 1 0 0 0 0
14+
3 4 1 0 0 0 0
15+
4 5 1 0 0 0 0
16+
5 6 1 0 0 0 0
17+
6 7 1 0 0 0 0
18+
2 7 1 0 0 0 0
19+
1 T 3 9 7 8
20+
M END
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
2+
ACCLDraw10012012192D
3+
4+
10 10 3 0 0 0 0 0 0 0999 V2000
5+
12.8286 -7.3697 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6+
12.0309 -7.3697 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7+
13.2300 -8.0490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
8+
11.5986 -6.6106 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
9+
11.5909 -8.0335 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
10+
12.8363 -8.7514 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
11+
13.2300 -6.6260 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12+
14.0379 -8.0490 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
13+
14.0714 -6.6260 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
14+
14.4779 -7.3439 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
15+
2 1 1 0 0 0 0
16+
3 1 2 0 0 0 0
17+
4 2 2 0 0 0 0
18+
5 2 1 0 0 0 0
19+
6 3 1 0 0 0 0
20+
7 1 1 0 0 0 0
21+
8 3 1 0 0 0 0
22+
9 7 2 0 0 0 0
23+
10 9 1 0 0 0 0
24+
10 8 2 0 0 0 0
25+
4 F 2 8 7
26+
5 F 2 7 8
27+
6 F 2 7 8
28+
M END

storage/ctab/src/test/resources/org/openscience/cdk/io/query_notatomlist.mol

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@
1616
5 6 1 0 0 0 0
1717
6 7 1 0 0 0 0
1818
2 7 1 0 0 0 0
19-
1 F 3 9 7 8
19+
1 T 3 9 7 8
2020
M ALS 1 3 T F N O
2121
M END

0 commit comments

Comments
 (0)