Skip to content

Commit d19b6bb

Browse files
committed
Tweak extended fingerprinter to allow base fp options to be set and correct version info.
1 parent 41592d8 commit d19b6bb

File tree

1 file changed

+80
-28
lines changed

1 file changed

+80
-28
lines changed

descriptor/fingerprint/src/main/java/org/openscience/cdk/fingerprint/ExtendedFingerprinter.java

Lines changed: 80 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (C) 2002-2007 Stefan Kuhn <shk3@users.sf.net>
1+
/* Copyright (C) 2002-2007,2020 Stefan Kuhn <shk3@users.sf.net>
22
*
33
* Contact: cdk-devel@lists.sourceforge.net
44
*
@@ -22,35 +22,43 @@
2222
*/
2323
package org.openscience.cdk.fingerprint;
2424

25+
import org.openscience.cdk.CDK;
2526
import org.openscience.cdk.exception.CDKException;
2627
import org.openscience.cdk.graph.Cycles;
2728
import org.openscience.cdk.interfaces.IAtomContainer;
2829
import org.openscience.cdk.interfaces.IRingSet;
2930
import org.openscience.cdk.ringsearch.RingPartitioner;
3031
import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;
3132

33+
import java.util.BitSet;
3234
import java.util.List;
3335
import java.util.Map;
3436

3537
/**
3638
* Generates an extended fingerprint for a given {@link IAtomContainer}, that
37-
* extends the {@link Fingerprinter} with additional bits describing ring
38-
* features.
39+
* extends the {@link Fingerprinter} with additional (25) bits describing ring
40+
* features and isotopic masses.
3941
*
40-
* @author shk3
41-
* @cdk.created 2006-01-13
42-
* @cdk.keyword fingerprint
43-
* @cdk.keyword similarity
44-
* @cdk.module fingerprint
45-
* @cdk.githash
42+
* <i>JWM Comment: It's better to actually just hash the rings over the entire
43+
* length simply using a different seed.
44+
* The original version of the class used non-unique SSSR which of course
45+
* doesn't work for substructure screening so this fingerprint can only
46+
* be used for similarity.</i>
4647
*
47-
* @see org.openscience.cdk.fingerprint.Fingerprinter
48+
* @author shk3
49+
* @cdk.created 2006-01-13
50+
* @cdk.keyword fingerprint
51+
* @cdk.keyword similarity
52+
* @cdk.module fingerprint
53+
* @cdk.githash
54+
* @see org.openscience.cdk.fingerprint.Fingerprinter
4855
*/
49-
public class ExtendedFingerprinter extends Fingerprinter implements IFingerprinter {
56+
public class ExtendedFingerprinter implements IFingerprinter {
5057

51-
private final int RESERVED_BITS = 25;
58+
// number of bits to hash rings into
59+
private final int RESERVED_BITS = 25;
5260

53-
private Fingerprinter fingerprinter = null;
61+
private final Fingerprinter fingerprinter;
5462

5563
/**
5664
* Creates a fingerprint generator of length <code>DEFAULT_SIZE</code>
@@ -69,8 +77,8 @@ public ExtendedFingerprinter(int size) {
6977
* the given size, using a generation algorithm with the given search
7078
* depth.
7179
*
72-
* @param size The desired size of the fingerprint
73-
* @param searchDepth The desired depth of search
80+
* @param size The desired size of the fingerprint
81+
* @param searchDepth The desired depth of search
7482
*/
7583
public ExtendedFingerprinter(int size, int searchDepth) {
7684
this.fingerprinter = new Fingerprinter(size - RESERVED_BITS, searchDepth);
@@ -84,15 +92,17 @@ public ExtendedFingerprinter(int size, int searchDepth) {
8492
* (referring to smallest set of smallest rings) and bits which tell if
8593
* there is a fused ring system with 1,2...8 or more rings in it
8694
*
87-
*@param container The AtomContainer for which a Fingerprint is generated
88-
*@return a bit fingerprint for the given <code>IAtomContainer</code>.
95+
* @param container The AtomContainer for which a Fingerprint is generated
96+
* @return a bit fingerprint for the given <code>IAtomContainer</code>.
8997
*/
9098
@Override
9199
public IBitFingerprint getBitFingerprint(IAtomContainer container) throws CDKException {
92100
return this.getBitFingerprint(container, null, null);
93101
}
94102

95-
/** {@inheritDoc} */
103+
/**
104+
* {@inheritDoc}
105+
*/
96106
@Override
97107
public Map<String, Integer> getRawFingerprint(IAtomContainer iAtomContainer) throws CDKException {
98108
throw new UnsupportedOperationException();
@@ -108,14 +118,14 @@ public Map<String, Integer> getRawFingerprint(IAtomContainer iAtomContainer) thr
108118
* a smallesSetOfSmallestRings. The List must be a list of all ring
109119
* systems in the molecule.
110120
*
111-
* @param atomContainer The AtomContainer for which a Fingerprint is
112-
* generated
113-
* @param ringSet An SSSR RingSet of ac (if not available, use
114-
* getExtendedFingerprint(AtomContainer ac),
115-
* which does the calculation)
116-
* @param rslist A list of all ring systems in ac
117-
* @exception CDKException for example if input can not be cloned.
121+
* @param atomContainer The AtomContainer for which a Fingerprint is
122+
* generated
123+
* @param ringSet A SSSR RingSet of ac (if not available, use
124+
* getExtendedFingerprint(AtomContainer ac),
125+
* which does the calculation)
126+
* @param rslist A list of all ring systems in ac
118127
* @return a BitSet representing the fingerprint
128+
* @throws CDKException for example if input can not be cloned.
119129
*/
120130
public IBitFingerprint getBitFingerprint(IAtomContainer atomContainer, IRingSet ringSet, List<IRingSet> rslist)
121131
throws CDKException {
@@ -144,24 +154,66 @@ public IBitFingerprint getBitFingerprint(IAtomContainer atomContainer, IRingSet
144154
for (int i = 0; i < rslist.size(); i++) {
145155
if (((IRingSet) rslist.get(i)).getAtomContainerCount() > maximumringsystemsize)
146156

147-
maximumringsystemsize = ((IRingSet) rslist.get(i)).getAtomContainerCount();
157+
maximumringsystemsize = ((IRingSet) rslist.get(i)).getAtomContainerCount();
148158
}
149159
for (int i = 0; i < maximumringsystemsize && i < 9; i++) {
150160
fingerprint.set(size - 8 + i - 3);
151161
}
152162
return fingerprint;
153163
}
154164

155-
/** {@inheritDoc} */
165+
/**
166+
* {@inheritDoc}
167+
*/
156168
@Override
157169
public int getSize() {
158170
return fingerprinter.getSize() + RESERVED_BITS;
159171
}
160172

161-
/** {@inheritDoc} */
173+
/**
174+
* {@inheritDoc}
175+
*/
162176
@Override
163177
public ICountFingerprint getCountFingerprint(IAtomContainer container) throws CDKException {
164178
throw new UnsupportedOperationException();
165179
}
166180

181+
@Override
182+
public String getVersionDescription() {
183+
StringBuilder sb = new StringBuilder();
184+
sb.append("CDK-")
185+
.append(getClass().getSimpleName())
186+
.append("/")
187+
.append(CDK.getVersion()); // could version fingerprints separately
188+
for (Map.Entry<String, String> param : this.fingerprinter.getParameters()) {
189+
sb.append(' ').append(param.getKey()).append('=').append(param.getValue());
190+
}
191+
return sb.toString();
192+
}
193+
194+
@Override
195+
public BitSet getFingerprint(IAtomContainer mol) throws CDKException {
196+
return getBitFingerprint(mol).asBitSet();
197+
}
198+
199+
/**
200+
* Set the pathLimit for the base daylight/path fingerprint. If too many paths are generated from a single atom
201+
* an exception is thrown.
202+
* @param pathLimit the number of paths to generate from a node
203+
* @see Fingerprinter
204+
*/
205+
public void setPathLimit(int pathLimit) {
206+
this.fingerprinter.setPathLimit(pathLimit);
207+
}
208+
209+
/**
210+
* Set the hashPseudoAtoms for the base daylight/path fingerprint. This indicates whether pseudo-atoms should be
211+
* hashed, for substructure screening this is not desirable - but this fingerprint uses SSSR so can't be used for
212+
* substructure screening regardless.
213+
* @param hashPseudoAtoms the number of paths to generate from a node
214+
* @see Fingerprinter
215+
*/
216+
public void setHashPseudoAtoms(boolean hashPseudoAtoms) {
217+
this.fingerprinter.setHashPseudoAtoms(hashPseudoAtoms);
218+
}
167219
}

0 commit comments

Comments
 (0)