Skip to content

Commit 778f877

Browse files
committed
More correctly handle query atoms/bonds - note we load the query as drawn but it won't round trip quite right to SMARTS.
1 parent ae25719 commit 778f877

File tree

3 files changed

+98
-10
lines changed

3 files changed

+98
-10
lines changed

storage/ctab/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ private static IChemModel newModel(final IAtomContainer container) {
315315
*/
316316
private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKException {
317317

318+
boolean isQuery = molecule instanceof IQueryAtomContainer;
318319
IAtomContainer outputContainer = null;
319320
Map<IAtom,Integer> parities = new HashMap<>();
320321

@@ -394,7 +395,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
394395
line = input.readLine();
395396
linecount++;
396397

397-
final IAtom atom = readAtomFast(line, molecule.getBuilder(), parities, linecount);
398+
final IAtom atom = readAtomFast(line, molecule.getBuilder(), parities, linecount, isQuery);
398399

399400
atoms[i] = atom;
400401

@@ -429,19 +430,18 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
429430
}
430431
}
431432

432-
boolean hasQueryBonds = false;
433433
for (int i = 0; i < nBonds; i++) {
434434
line = input.readLine();
435435
linecount++;
436436

437-
bonds[i] = readBondFast(line, molecule.getBuilder(), atoms, explicitValence, linecount);
438-
hasQueryBonds = hasQueryBonds ||
437+
bonds[i] = readBondFast(line, molecule.getBuilder(), atoms, explicitValence, linecount, isQuery);
438+
isQuery = isQuery ||
439439
bonds[i] instanceof IQueryBond ||
440440
(bonds[i].getOrder() == IBond.Order.UNSET &&
441441
!bonds[i].getFlag(CDKConstants.ISAROMATIC));
442442
}
443443

444-
if (!hasQueryBonds)
444+
if (!isQuery)
445445
outputContainer = molecule;
446446
else
447447
outputContainer = new QueryAtomContainer(molecule.getBuilder());
@@ -514,7 +514,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
514514
for (int i = offset; i < outputContainer.getAtomCount(); i++) {
515515
int valence = explicitValence[i - offset];
516516
if (valence < 0) {
517-
hasQueryBonds = true; // also counts aromatic bond as query
517+
isQuery = true; // also counts aromatic bond as query
518518
} else {
519519
int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i));
520520
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired, unpaired);
@@ -638,7 +638,7 @@ private String removeNonDigits(String input) {
638638
}
639639

640640
IAtom readAtomFast(String line, IChemObjectBuilder builder, int lineNum) throws CDKException, IOException {
641-
return readAtomFast(line, builder, Collections.<IAtom,Integer>emptyMap(), lineNum);
641+
return readAtomFast(line, builder, Collections.<IAtom,Integer>emptyMap(), lineNum, false);
642642
}
643643

644644
/**
@@ -661,7 +661,7 @@ IAtom readAtomFast(String line, IChemObjectBuilder builder, int lineNum) throws
661661
* @param lineNum the line number - for printing error messages
662662
* @return a new atom instance
663663
*/
664-
IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom,Integer> parities, int lineNum) throws CDKException, IOException {
664+
IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom,Integer> parities, int lineNum, boolean isQuery) throws CDKException, IOException {
665665

666666
// The line may be truncated and it's checked in reverse at the specified
667667
// lengths:
@@ -672,7 +672,7 @@ IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom,Integer> p
672672

673673
String symbol;
674674
double x, y, z;
675-
int massDiff = 0, charge = 0, parity = 0, valence = 0, mapping = 0;
675+
int massDiff = 0, charge = 0, parity = 0, valence = 0, mapping = 0, hcount = 0;
676676

677677
int length = length(line);
678678
if (length > 69) // excess data we should check all fields
@@ -692,6 +692,7 @@ IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom,Integer> p
692692
valence = readMolfileInt(line, 48);
693693
case 48: // bbb: stereo care [query]
694694
case 45: // hhh: hydrogen count + 1 [query]
695+
hcount = readMolfileInt(line, 42);
695696
case 42: // sss: stereo parity
696697
parity = toInt(line.charAt(41));
697698
case 39: // ccc: charge
@@ -713,6 +714,17 @@ IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom,Integer> p
713714

714715
IAtom atom = createAtom(symbol, builder, lineNum);
715716

717+
if (isQuery) {
718+
Expr expr = new Expr(Expr.Type.ELEMENT, atom.getAtomicNumber());
719+
if (hcount != 0) {
720+
if (hcount < 0)
721+
hcount = 0;
722+
expr.and(new Expr(Expr.Type.IMPL_H_COUNT, hcount));
723+
}
724+
atom = new QueryAtom(builder);
725+
((QueryAtom)atom).setExpression(expr);
726+
}
727+
716728
atom.setPoint3d(new Point3d(x, y, z));
717729
atom.setFormalCharge(charge);
718730
atom.setStereoParity(parity);
@@ -732,9 +744,15 @@ IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom,Integer> p
732744

733745
if (mapping != 0) atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, mapping);
734746

747+
735748
return atom;
736749
}
737750

751+
// for testing
752+
IBond readBondFast(String line, IChemObjectBuilder builder, IAtom[] atoms, int[] explicitValence, int lineNum) throws CDKException {
753+
return readBondFast(line, builder, atoms, explicitValence, lineNum, false);
754+
}
755+
738756
/**
739757
* Read a bond from a line in the MDL bond block. The bond block is
740758
* formatted as follows, {@code 111222tttsssxxxrrrccc}, where:
@@ -756,7 +774,8 @@ IAtom readAtomFast(String line, IChemObjectBuilder builder, Map<IAtom,Integer> p
756774
* @throws CDKException thrown if the input was malformed or didn't make
757775
* sense
758776
*/
759-
IBond readBondFast(String line, IChemObjectBuilder builder, IAtom[] atoms, int[] explicitValence, int lineNum)
777+
IBond readBondFast(String line, IChemObjectBuilder builder, IAtom[] atoms, int[] explicitValence, int lineNum,
778+
boolean isQuery)
760779
throws CDKException {
761780

762781
// The line may be truncated and it's checked in reverse at the specified
@@ -831,6 +850,18 @@ IBond readBondFast(String line, IChemObjectBuilder builder, IAtom[] atoms, int[]
831850
explicitValence[u] = explicitValence[v] = Integer.MIN_VALUE;
832851
}
833852

853+
if (isQuery && bond.getClass() != QueryBond.class) {
854+
IBond.Order order = bond.getOrder();
855+
Expr expr = null;
856+
if (bond.isAromatic()) {
857+
expr = new Expr(Expr.Type.IS_AROMATIC);
858+
} else {
859+
expr = new Expr(Expr.Type.ORDER,
860+
bond.getOrder().numeric());
861+
}
862+
bond = new QueryBond(atoms[u], atoms[v], expr);
863+
}
864+
834865
return bond;
835866
}
836867

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* =====================================
3+
* Copyright (c) 2020 NextMove Software
4+
* =====================================
5+
*/
6+
7+
package org.openscience.cdk.smarts;
8+
9+
import org.hamcrest.CoreMatchers;
10+
import org.junit.Test;
11+
import org.openscience.cdk.io.MDLV2000Reader;
12+
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;
13+
import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer;
14+
import org.openscience.cdk.silent.SilentChemObjectBuilder;
15+
16+
import java.io.InputStream;
17+
18+
import static org.hamcrest.CoreMatchers.is;
19+
import static org.hamcrest.MatcherAssert.assertThat;
20+
21+
22+
public class Mdl2SmartsTest {
23+
24+
@Test
25+
public void atomList() throws Exception {
26+
try (InputStream in = getClass().getResourceAsStream("mdlquery.mol");
27+
MDLV2000Reader mdlr = new MDLV2000Reader(in)) {
28+
IQueryAtomContainer mol = mdlr.read(new QueryAtomContainer(SilentChemObjectBuilder.getInstance()));
29+
// Important! MDL => SMARTS is not exact since SMARTS has no was of
30+
// expressing double bond, = means "double aliphatic" and will
31+
// not match benzene (for example) where as the MDL query would
32+
assertThat(Smarts.generate(mol),
33+
is("[F,#7,#8]-[#6]1-[#6h0]=[#6h1]-[#6]=[#6]-[#6]=1"));
34+
}
35+
}
36+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
Mrv1810 05242013152D
3+
4+
7 7 1 0 0 0 999 V2000
5+
0.7145 2.0625 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
6+
0.7145 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7+
1.4289 0.8250 0.0000 C 0 0 0 -1 0 0 0 0 0 0 0 0
8+
1.4289 -0.0000 0.0000 C 0 0 0 1 0 0 0 0 0 0 0 0
9+
0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
10+
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
11+
0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
12+
1 2 1 0 0 0 0
13+
2 3 1 0 0 0 0
14+
3 4 2 0 0 0 0
15+
4 5 1 0 0 0 0
16+
5 6 2 0 0 0 0
17+
6 7 1 0 0 0 0
18+
2 7 2 0 0 0 0
19+
1 F 3 9 7 8
20+
M ALS 1 3 F F N O
21+
M END

0 commit comments

Comments
 (0)