Skip to content

Commit f70b038

Browse files
committed
Useful utility to suppress query hydrogens (e.g. in SMARTS).
1 parent 7a18f1a commit f70b038

File tree

2 files changed

+143
-5
lines changed

2 files changed

+143
-5
lines changed

base/isomorphism/src/main/java/org/openscience/cdk/isomorphism/matchers/QueryAtomContainerCreator.java

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,19 @@
1818
*/
1919
package org.openscience.cdk.isomorphism.matchers;
2020

21+
import org.openscience.cdk.AtomRef;
22+
import org.openscience.cdk.BondRef;
23+
import org.openscience.cdk.interfaces.IAtom;
2124
import org.openscience.cdk.interfaces.IAtomContainer;
2225
import org.openscience.cdk.interfaces.IBond;
26+
import org.openscience.cdk.interfaces.IChemObject;
27+
import org.openscience.cdk.interfaces.IStereoElement;
2328

29+
import java.util.HashMap;
30+
import java.util.HashSet;
2431
import java.util.Iterator;
32+
import java.util.Map;
33+
import java.util.Set;
2534

2635
/**
2736
* Utilities for creating queries from 'real' molecules. Note that most of this
@@ -102,9 +111,9 @@ public static QueryAtomContainer createSymbolChargeIDQueryContainer(IAtomContain
102111
}
103112
Iterator<IBond> bonds = container.bonds().iterator();
104113
while (bonds.hasNext()) {
105-
IBond bond = bonds.next();
106-
int index1 = container.indexOf(bond.getBegin());
107-
int index2 = container.indexOf(bond.getEnd());
114+
IBond bond = bonds.next();
115+
int index1 = container.indexOf(bond.getBegin());
116+
int index2 = container.indexOf(bond.getEnd());
108117
if (bond.isAromatic()) {
109118
QueryBond qbond = new QueryBond(queryContainer.getAtom(index1),
110119
queryContainer.getAtom(index2),
@@ -135,7 +144,7 @@ public static QueryAtomContainer createSymbolChargeIDQueryContainer(IAtomContain
135144
* Expr.Type.ORDER);
136145
* </pre>
137146
*
138-
* @param container The AtomContainer that stands as model
147+
* @param container The AtomContainer that stands as model
139148
* @param aromaticity option flag
140149
* @return The new QueryAtomContainer created from container.
141150
*/
@@ -160,7 +169,7 @@ public static QueryAtomContainer createAnyAtomContainer(IAtomContainer container
160169
* QueryAtomContainer.create(container);
161170
* </pre>
162171
*
163-
* @param container The AtomContainer that stands as model
172+
* @param container The AtomContainer that stands as model
164173
* @param aromaticity option flag
165174
* @return The new QueryAtomContainer created from container.
166175
*/
@@ -190,4 +199,68 @@ public static QueryAtomContainer createAnyAtomForPseudoAtomQueryContainer(IAtomC
190199
Expr.Type.IS_AROMATIC,
191200
Expr.Type.ALIPHATIC_ORDER);
192201
}
202+
203+
static boolean isSimpleHydrogen(Expr expr) {
204+
switch (expr.type()) {
205+
case ELEMENT:
206+
case ALIPHATIC_ELEMENT:
207+
return expr.value() == 1;
208+
default:
209+
return false;
210+
}
211+
}
212+
213+
public static IAtomContainer suppressQueryHydrogens(IAtomContainer mol) {
214+
215+
// pre-checks
216+
for (IAtom atom : mol.atoms()) {
217+
if (!(AtomRef.deref(atom) instanceof QueryAtom))
218+
throw new IllegalArgumentException("Non-query atoms found!");
219+
}
220+
for (IBond bond : mol.bonds()) {
221+
if (!(BondRef.deref(bond) instanceof QueryBond))
222+
throw new IllegalArgumentException("Non-query bonds found!");
223+
}
224+
225+
Map<IChemObject,IChemObject> plainHydrogens = new HashMap<>();
226+
for (IAtom atom : mol.atoms()) {
227+
int hcnt = 0;
228+
for (IAtom nbor : mol.getConnectedAtomsList(atom)) {
229+
QueryAtom qnbor = (QueryAtom) AtomRef.deref(nbor);
230+
if (mol.getConnectedBondsCount(nbor) == 1 &&
231+
isSimpleHydrogen(qnbor.getExpression())) {
232+
hcnt++;
233+
plainHydrogens.put(nbor, atom);
234+
}
235+
}
236+
if (hcnt > 0) {
237+
QueryAtom qatom = (QueryAtom) AtomRef.deref(atom);
238+
Expr e = qatom.getExpression();
239+
Expr hexpr = new Expr();
240+
for (int i = 0; i < hcnt; i++)
241+
hexpr.and(new Expr(Expr.Type.TOTAL_H_COUNT, i).negate());
242+
e.and(hexpr);
243+
}
244+
}
245+
246+
// nothing to do
247+
if (plainHydrogens.isEmpty())
248+
return mol;
249+
250+
IAtomContainer res = new QueryAtomContainer(mol.getBuilder());
251+
for (IAtom atom : mol.atoms()) {
252+
if (!plainHydrogens.containsKey(atom))
253+
res.addAtom(atom);
254+
}
255+
for (IBond bond : mol.bonds()) {
256+
if (!plainHydrogens.containsKey(bond.getBegin()) &&
257+
!plainHydrogens.containsKey(bond.getEnd()))
258+
res.addBond(bond);
259+
}
260+
for (IStereoElement se : mol.stereoElements()) {
261+
res.addStereoElement(se.map(plainHydrogens));
262+
}
263+
264+
return res;
265+
}
193266
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Copyright (C) 2019 The Chemistry Development Kit (CDK) project
3+
*
4+
* Contact: cdk-devel@lists.sourceforge.net
5+
*
6+
* This program is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU Lesser General Public License as published by
8+
* the Free Software Foundation; either version 2.1 of the License, or (at
9+
* your option) any later version. All we ask is that proper credit is given
10+
* for our work, which includes - but is not limited to - adding the above
11+
* copyright notice to the beginning of your source code files, and to any
12+
* copyright notice that you may distribute with programs based on this work.
13+
*
14+
* This program is distributed in the hope that it will be useful, but WITHOUT
15+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17+
* License for more details.
18+
*
19+
* You should have received a copy of the GNU Lesser General Public License
20+
* along with this program; if not, write to the Free Software
21+
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22+
*/
23+
24+
package org.openscience.cdk.isomorphism;
25+
26+
import org.junit.Test;
27+
import org.openscience.cdk.interfaces.IAtomContainer;
28+
import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer;
29+
import org.openscience.cdk.isomorphism.matchers.QueryAtomContainerCreator;
30+
import org.openscience.cdk.smarts.Smarts;
31+
32+
import static org.hamcrest.CoreMatchers.is;
33+
import static org.junit.Assert.assertThat;
34+
import static org.junit.Assert.assertTrue;
35+
36+
public class HydrogenSuppressionTest {
37+
38+
private static void test(String smaexp,
39+
String smainp) {
40+
IAtomContainer qry = new QueryAtomContainer(null);
41+
assertTrue(Smarts.parse(qry, smainp));
42+
IAtomContainer sup = QueryAtomContainerCreator.suppressQueryHydrogens(qry);
43+
String smaact = Smarts.generate(sup);
44+
assertThat(smaact, is(smaexp));
45+
}
46+
47+
@Test public void oneHydrogen() {
48+
test("[c!H0]", "c[H]");
49+
test("[c!H0]", "c[#1]");
50+
}
51+
52+
@Test public void twoHydrogens() {
53+
test("[c!H0!H1]", "c([H])[H]");
54+
test("[c!H0!H1]", "c([#1])[#1]");
55+
}
56+
57+
@Test public void deuteriumIsKept() {
58+
test("[c!H0][2#1]", "c([2H])[H]");
59+
test("[c!H0][2#1]", "c([2#1])[#1]");
60+
}
61+
62+
@Test public void bridgingIsKept() {
63+
test("B[#1]B", "B[H]B");
64+
}
65+
}

0 commit comments

Comments
 (0)