Skip to content

Commit abb10a2

Browse files
committed
Allow queries to be created in a custom destination molecule, If the input allready has query features use those otherwise create them using the specified query options.
1 parent d7df812 commit abb10a2

File tree

1 file changed

+207
-112
lines changed

1 file changed

+207
-112
lines changed

base/isomorphism/src/main/java/org/openscience/cdk/isomorphism/matchers/QueryAtomContainer.java

Lines changed: 207 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.util.Map;
2929
import java.util.Set;
3030

31+
import org.openscience.cdk.AtomRef;
32+
import org.openscience.cdk.BondRef;
3133
import org.openscience.cdk.CDKConstants;
3234
import org.openscience.cdk.interfaces.IAtom;
3335
import org.openscience.cdk.interfaces.IAtomContainer;
@@ -1646,13 +1648,14 @@ public void setStereoElements(List<IStereoElement> elements) {
16461648
}
16471649

16481650
/**
1649-
* Create a query from a molecule and a provided set of expressions. The
1651+
* Populate a query from a molecule and a provided set of expressions. The
16501652
* molecule is converted and any features specified in the {@code opts}
16511653
* will be matched. <br><br>
16521654
* A good starting point is the following options:
16531655
* <pre>{@code
16541656
* // [nH]1ccc(=O)cc1 => n1:c:c:c(=O):c:c:1
1655-
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1657+
* QueryAtomContainer.create(qry, mol,
1658+
* ALIPHATIC_ELEMENT,
16561659
* AROMATIC_ELEMENT,
16571660
* SINGLE_OR_AROMATIC,
16581661
* ALIPHATIC_ORDER,
@@ -1665,7 +1668,8 @@ public void setStereoElements(List<IStereoElement> elements) {
16651668
* <br>
16661669
* <pre>{@code
16671670
* // [nH]1ccc(=O)cc1 => [nD2]1:[cD2]:[cD2]:[cD2](=[OD1]):[cD2]:[cD2]:1
1668-
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1671+
* QueryAtomContainer.create(qry, mol,
1672+
* ALIPHATIC_ELEMENT,
16691673
* AROMATIC_ELEMENT,
16701674
* DEGREE,
16711675
* SINGLE_OR_AROMATIC,
@@ -1681,7 +1685,8 @@ public void setStereoElements(List<IStereoElement> elements) {
16811685
* // [nH]1ccc(=O)cc1 =>
16821686
* // [nx2+0]1:[cx2+0]:[cx2+0]:[cx2+0](=[O&x0+0]):[cx2+0]:[cx2+0]:1
16831687
* // IMPORTANT! use Cycles.markRingAtomsAndBonds(mol) to set ring status
1684-
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1688+
* QueryAtomContainer.create(qry, mol,
1689+
* ALIPHATIC_ELEMENT,
16851690
* AROMATIC_ELEMENT,
16861691
* FORMAL_CHARGE,
16871692
* ISOTOPE,
@@ -1698,7 +1703,8 @@ public void setStereoElements(List<IStereoElement> elements) {
16981703
* <pre>{@code
16991704
* // [nH]1ccc(=O)cc1 =>
17001705
* // [0n+0]1:[0c+0]:[0c+0]:[0c+0](=[O+0]):[0c+0]:[0c+0]:1
1701-
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1706+
* QueryAtomContainer.create(qry, mol,
1707+
* ALIPHATIC_ELEMENT,
17021708
* AROMATIC_ELEMENT,
17031709
* FORMAL_CHARGE,
17041710
* ISOTOPE,
@@ -1710,95 +1716,103 @@ public void setStereoElements(List<IStereoElement> elements) {
17101716
* Please note not all {@link Expr.Type}s are currently supported, if you
17111717
* require a specific type that you think is useful please open an issue.
17121718
*
1713-
* @param mol the molecule
1719+
* @param dst the output destination
1720+
* @param src the input molecule
17141721
* @param opts set of the expr types to match
1715-
* @return the query molecule
17161722
*/
1717-
public static QueryAtomContainer create(IAtomContainer mol, Expr.Type... opts) {
1723+
public static void create(IAtomContainer dst,
1724+
IAtomContainer src,
1725+
Expr.Type... opts) {
17181726
Set<Expr.Type> optset = EnumSet.noneOf(Expr.Type.class);
17191727
optset.addAll(Arrays.asList(opts));
17201728

1721-
QueryAtomContainer query = new QueryAtomContainer(mol.getBuilder());
17221729
Map<IChemObject, IChemObject> mapping = new HashMap<>();
17231730
Map<IChemObject, IStereoElement> stereos = new HashMap<>();
17241731

1725-
for (IStereoElement se : mol.stereoElements())
1732+
for (IStereoElement se : src.stereoElements())
17261733
stereos.put(se.getFocus(), se);
17271734
List<IStereoElement> qstereo = new ArrayList<>();
17281735

1729-
for (IAtom atom : mol.atoms()) {
1730-
Expr expr = new Expr();
1731-
1732-
// isotope first
1733-
if (optset.contains(ISOTOPE) && atom.getMassNumber() != null)
1734-
expr.and(new Expr(ISOTOPE, atom.getMassNumber()));
1735-
1736-
if (atom.getAtomicNumber() != null &&
1737-
atom.getAtomicNumber() != 0) {
1738-
if (atom.isAromatic()) {
1739-
if (optset.contains(AROMATIC_ELEMENT)) {
1740-
expr.and(new Expr(AROMATIC_ELEMENT,
1741-
atom.getAtomicNumber()));
1742-
} else {
1743-
if (optset.contains(IS_AROMATIC)) {
1744-
if (optset.contains(ELEMENT))
1745-
expr.and(new Expr(AROMATIC_ELEMENT,
1746-
atom.getAtomicNumber()));
1747-
else
1748-
expr.and(new Expr(Expr.Type.IS_AROMATIC));
1749-
} else if (optset.contains(ELEMENT)) {
1750-
expr.and(new Expr(ELEMENT,
1751-
atom.getAtomicNumber()));
1736+
for (IAtom atom : src.atoms()) {
1737+
Expr expr;
1738+
if (atom instanceof IQueryAtom) {
1739+
expr = ((QueryAtom)AtomRef.deref(atom)).getExpression();
1740+
IStereoElement se = stereos.get(atom);
1741+
if (se != null) qstereo.add(se);
1742+
} else {
1743+
expr = new Expr();
1744+
1745+
// isotope first
1746+
if (optset.contains(ISOTOPE) && atom.getMassNumber() != null)
1747+
expr.and(new Expr(ISOTOPE, atom.getMassNumber()));
1748+
1749+
if (atom.getAtomicNumber() != null &&
1750+
atom.getAtomicNumber() != 0) {
1751+
if (atom.isAromatic()) {
1752+
if (optset.contains(AROMATIC_ELEMENT)) {
1753+
expr.and(new Expr(AROMATIC_ELEMENT,
1754+
atom.getAtomicNumber()));
1755+
} else {
1756+
if (optset.contains(IS_AROMATIC)) {
1757+
if (optset.contains(ELEMENT))
1758+
expr.and(new Expr(AROMATIC_ELEMENT,
1759+
atom.getAtomicNumber()));
1760+
else
1761+
expr.and(new Expr(Expr.Type.IS_AROMATIC));
1762+
} else if (optset.contains(ELEMENT)) {
1763+
expr.and(new Expr(ELEMENT,
1764+
atom.getAtomicNumber()));
1765+
}
17521766
}
1753-
}
1754-
} else {
1755-
if (optset.contains(ALIPHATIC_ELEMENT)) {
1756-
expr.and(new Expr(ALIPHATIC_ELEMENT,
1757-
atom.getAtomicNumber()));
1758-
} else {
1759-
if (optset.contains(IS_ALIPHATIC)) {
1760-
if (optset.contains(ELEMENT))
1761-
expr.and(new Expr(ALIPHATIC_ELEMENT,
1762-
atom.getAtomicNumber()));
1763-
else
1764-
expr.and(new Expr(Expr.Type.IS_ALIPHATIC));
1765-
} else if (optset.contains(ELEMENT)) {
1766-
expr.and(new Expr(ELEMENT,
1767-
atom.getAtomicNumber()));
1767+
} else {
1768+
if (optset.contains(ALIPHATIC_ELEMENT)) {
1769+
expr.and(new Expr(ALIPHATIC_ELEMENT,
1770+
atom.getAtomicNumber()));
1771+
} else {
1772+
if (optset.contains(IS_ALIPHATIC)) {
1773+
if (optset.contains(ELEMENT))
1774+
expr.and(new Expr(ALIPHATIC_ELEMENT,
1775+
atom.getAtomicNumber()));
1776+
else
1777+
expr.and(new Expr(Expr.Type.IS_ALIPHATIC));
1778+
} else if (optset.contains(ELEMENT)) {
1779+
expr.and(new Expr(ELEMENT,
1780+
atom.getAtomicNumber()));
1781+
}
17681782
}
17691783
}
17701784
}
1771-
}
17721785

1773-
if (optset.contains(DEGREE))
1774-
expr.and(new Expr(DEGREE,
1775-
atom.getBondCount()));
1776-
if (optset.contains(TOTAL_DEGREE))
1777-
expr.and(new Expr(DEGREE,
1778-
atom.getBondCount() + atom.getImplicitHydrogenCount()));
1779-
if (optset.contains(IS_IN_RING) && atom.isInRing())
1780-
expr.and(new Expr(IS_IN_RING));
1781-
if (optset.contains(IS_IN_CHAIN) && !atom.isInRing())
1782-
expr.and(new Expr(IS_IN_CHAIN));
1783-
if (optset.contains(IMPL_H_COUNT))
1784-
expr.and(new Expr(IMPL_H_COUNT));
1785-
if (optset.contains(RING_BOND_COUNT)) {
1786-
int rbonds = 0;
1787-
for (IBond bond : mol.getConnectedBondsList(atom))
1788-
if (bond.isInRing())
1789-
rbonds++;
1790-
1791-
expr.and(new Expr(RING_BOND_COUNT, rbonds));
1792-
}
1793-
if (optset.contains(FORMAL_CHARGE) && atom.getFormalCharge() != null)
1794-
expr.and(new Expr(FORMAL_CHARGE, atom.getFormalCharge()));
1795-
1796-
IStereoElement se = stereos.get(atom);
1797-
if (se != null &&
1798-
se.getConfigClass() == IStereoElement.TH &&
1799-
optset.contains(STEREOCHEMISTRY)) {
1800-
expr.and(new Expr(STEREOCHEMISTRY, se.getConfigOrder()));
1801-
qstereo.add(se);
1786+
if (optset.contains(DEGREE))
1787+
expr.and(new Expr(DEGREE,
1788+
atom.getBondCount()));
1789+
if (optset.contains(TOTAL_DEGREE))
1790+
expr.and(new Expr(DEGREE,
1791+
atom.getBondCount() + atom.getImplicitHydrogenCount()));
1792+
if (optset.contains(IS_IN_RING) && atom.isInRing())
1793+
expr.and(new Expr(IS_IN_RING));
1794+
if (optset.contains(IS_IN_CHAIN) && !atom.isInRing())
1795+
expr.and(new Expr(IS_IN_CHAIN));
1796+
if (optset.contains(IMPL_H_COUNT))
1797+
expr.and(new Expr(IMPL_H_COUNT));
1798+
if (optset.contains(RING_BOND_COUNT)) {
1799+
int rbonds = 0;
1800+
for (IBond bond : src.getConnectedBondsList(atom))
1801+
if (bond.isInRing())
1802+
rbonds++;
1803+
1804+
expr.and(new Expr(RING_BOND_COUNT, rbonds));
1805+
}
1806+
if (optset.contains(FORMAL_CHARGE) && atom.getFormalCharge() != null)
1807+
expr.and(new Expr(FORMAL_CHARGE, atom.getFormalCharge()));
1808+
1809+
IStereoElement se = stereos.get(atom);
1810+
if (se != null &&
1811+
se.getConfigClass() == IStereoElement.TH &&
1812+
optset.contains(STEREOCHEMISTRY)) {
1813+
expr.and(new Expr(STEREOCHEMISTRY, se.getConfigOrder()));
1814+
qstereo.add(se);
1815+
}
18021816
}
18031817

18041818
QueryAtom qatom = new QueryAtom(expr);
@@ -1816,37 +1830,44 @@ public static QueryAtomContainer create(IAtomContainer mol, Expr.Type... opts) {
18161830
qatom.setIsAromatic(atom.isAromatic());
18171831

18181832
mapping.put(atom, qatom);
1819-
query.addAtom(qatom);
1820-
}
1821-
1822-
for (IBond bond : mol.bonds()) {
1823-
Expr expr = new Expr();
1824-
1825-
if (bond.isAromatic() &&
1826-
(optset.contains(SINGLE_OR_AROMATIC) ||
1827-
optset.contains(DOUBLE_OR_AROMATIC) ||
1828-
optset.contains(IS_AROMATIC)))
1829-
expr.and(new Expr(Expr.Type.IS_AROMATIC));
1830-
else if ((optset.contains(SINGLE_OR_AROMATIC) ||
1831-
optset.contains(DOUBLE_OR_AROMATIC) ||
1832-
optset.contains(ALIPHATIC_ORDER)) && !bond.isAromatic())
1833-
expr.and(new Expr(ALIPHATIC_ORDER, bond.getOrder().numeric()));
1834-
else if (bond.isAromatic() && optset.contains(IS_ALIPHATIC))
1835-
expr.and(new Expr(IS_ALIPHATIC));
1836-
else if (optset.contains(ORDER))
1837-
expr.and(new Expr(ORDER, bond.getOrder().numeric()));
1838-
1839-
1840-
if (optset.contains(IS_IN_RING) && bond.isInRing())
1841-
expr.and(new Expr(IS_IN_RING));
1842-
else if (optset.contains(IS_IN_CHAIN) && !bond.isInRing())
1843-
expr.and(new Expr(IS_IN_CHAIN));
1844-
1845-
IStereoElement se = stereos.get(bond);
1846-
if (se != null &&
1847-
optset.contains(STEREOCHEMISTRY)) {
1848-
expr.and(new Expr(STEREOCHEMISTRY, se.getConfigOrder()));
1849-
qstereo.add(se);
1833+
dst.addAtom(qatom);
1834+
}
1835+
1836+
for (IBond bond : src.bonds()) {
1837+
Expr expr;
1838+
if (bond instanceof IQueryBond) {
1839+
expr = ((QueryBond)BondRef.deref(bond)).getExpression();
1840+
IStereoElement se = stereos.get(bond);
1841+
if (se != null) qstereo.add(se);
1842+
} else {
1843+
expr = new Expr();
1844+
1845+
if (bond.isAromatic() &&
1846+
(optset.contains(SINGLE_OR_AROMATIC) ||
1847+
optset.contains(DOUBLE_OR_AROMATIC) ||
1848+
optset.contains(IS_AROMATIC)))
1849+
expr.and(new Expr(Expr.Type.IS_AROMATIC));
1850+
else if ((optset.contains(SINGLE_OR_AROMATIC) ||
1851+
optset.contains(DOUBLE_OR_AROMATIC) ||
1852+
optset.contains(ALIPHATIC_ORDER)) && !bond.isAromatic())
1853+
expr.and(new Expr(ALIPHATIC_ORDER, bond.getOrder().numeric()));
1854+
else if (bond.isAromatic() && optset.contains(IS_ALIPHATIC))
1855+
expr.and(new Expr(IS_ALIPHATIC));
1856+
else if (optset.contains(ORDER))
1857+
expr.and(new Expr(ORDER, bond.getOrder().numeric()));
1858+
1859+
1860+
if (optset.contains(IS_IN_RING) && bond.isInRing())
1861+
expr.and(new Expr(IS_IN_RING));
1862+
else if (optset.contains(IS_IN_CHAIN) && !bond.isInRing())
1863+
expr.and(new Expr(IS_IN_CHAIN));
1864+
1865+
IStereoElement se = stereos.get(bond);
1866+
if (se != null &&
1867+
optset.contains(STEREOCHEMISTRY)) {
1868+
expr.and(new Expr(STEREOCHEMISTRY, se.getConfigOrder()));
1869+
qstereo.add(se);
1870+
}
18501871
}
18511872

18521873
QueryBond qbond = new QueryBond((IAtom) mapping.get(bond.getBegin()),
@@ -1863,12 +1884,86 @@ else if (optset.contains(IS_IN_CHAIN) && !bond.isInRing())
18631884
qbond.setIsAromatic(bond.isAromatic());
18641885

18651886
mapping.put(bond, qbond);
1866-
query.addBond(qbond);
1887+
dst.addBond(qbond);
18671888
}
18681889

18691890
for (IStereoElement se : qstereo)
1870-
query.addStereoElement(se.map(mapping));
1891+
dst.addStereoElement(se.map(mapping));
1892+
}
18711893

1872-
return query;
1894+
/**
1895+
* Create a query from a molecule and a provided set of expressions. The
1896+
* molecule is converted and any features specified in the {@code opts}
1897+
* will be matched. <br><br>
1898+
* A good starting point is the following options:
1899+
* <pre>{@code
1900+
* // [nH]1ccc(=O)cc1 => n1:c:c:c(=O):c:c:1
1901+
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1902+
* AROMATIC_ELEMENT,
1903+
* SINGLE_OR_AROMATIC,
1904+
* ALIPHATIC_ORDER,
1905+
* STEREOCHEMISTRY);
1906+
* }</pre>
1907+
* <br>
1908+
* Specifying {@link Expr.Type#DEGREE} (or {@link Expr.Type#TOTAL_DEGREE} +
1909+
* {@link Expr.Type#IMPL_H_COUNT}) means the molecule will not match as a
1910+
* substructure.
1911+
* <br>
1912+
* <pre>{@code
1913+
* // [nH]1ccc(=O)cc1 => [nD2]1:[cD2]:[cD2]:[cD2](=[OD1]):[cD2]:[cD2]:1
1914+
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1915+
* AROMATIC_ELEMENT,
1916+
* DEGREE,
1917+
* SINGLE_OR_AROMATIC,
1918+
* ALIPHATIC_ORDER);
1919+
* }</pre>
1920+
* <br>
1921+
* The {@link Expr.Type#RING_BOND_COUNT} property is useful for locking in
1922+
* ring systems. Specifying the ring bond count on benzene means it will
1923+
* not match larger ring systems (e.g. naphthalenee) but can still be
1924+
* substituted.
1925+
* <br>
1926+
* <pre>{@code
1927+
* // [nH]1ccc(=O)cc1 =>
1928+
* // [nx2+0]1:[cx2+0]:[cx2+0]:[cx2+0](=[O&x0+0]):[cx2+0]:[cx2+0]:1
1929+
* // IMPORTANT! use Cycles.markRingAtomsAndBonds(mol) to set ring status
1930+
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1931+
* AROMATIC_ELEMENT,
1932+
* FORMAL_CHARGE,
1933+
* ISOTOPE,
1934+
* RING_BOND_COUNT,
1935+
* SINGLE_OR_AROMATIC,
1936+
* ALIPHATIC_ORDER);
1937+
* }</pre>
1938+
* <br>
1939+
* Note that {@link Expr.Type#FORMAL_CHARGE},
1940+
* {@link Expr.Type#IMPL_H_COUNT}, and {@link Expr.Type#ISOTOPE} are ignored
1941+
* if null. Explicitly setting these to zero (only required for Isotope from
1942+
* SMILES) forces their inclusion.
1943+
* <br>
1944+
* <pre>{@code
1945+
* // [nH]1ccc(=O)cc1 =>
1946+
* // [0n+0]1:[0c+0]:[0c+0]:[0c+0](=[O+0]):[0c+0]:[0c+0]:1
1947+
* QueryAtomContainer.create(mol, ALIPHATIC_ELEMENT,
1948+
* AROMATIC_ELEMENT,
1949+
* FORMAL_CHARGE,
1950+
* ISOTOPE,
1951+
* RING_BOND_COUNT,
1952+
* SINGLE_OR_AROMATIC,
1953+
* ALIPHATIC_ORDER);
1954+
* }</pre>
1955+
*
1956+
* Please note not all {@link Expr.Type}s are currently supported, if you
1957+
* require a specific type that you think is useful please open an issue.
1958+
*
1959+
* @param src the input molecule
1960+
* @param opts set of the expr types to match
1961+
* @return the query container
1962+
*/
1963+
public static QueryAtomContainer create(IAtomContainer src,
1964+
Expr.Type... opts) {
1965+
QueryAtomContainer dst = new QueryAtomContainer(src.getBuilder());
1966+
create(dst, src, opts);
1967+
return dst;
18731968
}
18741969
}

0 commit comments

Comments
 (0)