1- /* Copyright (C) 2002-2007 Stefan Kuhn <shk3@users.sf.net>
1+ /* Copyright (C) 2002-2007,2020 Stefan Kuhn <shk3@users.sf.net>
22 *
33 * Contact: cdk-devel@lists.sourceforge.net
44 *
2222 */
2323package org .openscience .cdk .fingerprint ;
2424
25+ import org .openscience .cdk .CDK ;
2526import org .openscience .cdk .exception .CDKException ;
2627import org .openscience .cdk .graph .Cycles ;
2728import org .openscience .cdk .interfaces .IAtomContainer ;
2829import org .openscience .cdk .interfaces .IRingSet ;
2930import org .openscience .cdk .ringsearch .RingPartitioner ;
3031import org .openscience .cdk .tools .manipulator .MolecularFormulaManipulator ;
3132
33+ import java .util .BitSet ;
3234import java .util .List ;
3335import java .util .Map ;
3436
3537/**
3638 * Generates an extended fingerprint for a given {@link IAtomContainer}, that
37- * extends the {@link Fingerprinter} with additional bits describing ring
38- * features.
39+ * extends the {@link Fingerprinter} with additional (25) bits describing ring
40+ * features and isotopic masses .
3941 *
40- * @author shk3
41- * @cdk.created 2006-01-13
42- * @cdk.keyword fingerprint
43- * @cdk.keyword similarity
44- * @cdk.module fingerprint
45- * @cdk.githash
42+ * <i>JWM Comment: It's better to actually just hash the rings over the entire
43+ * length simply using a different seed.
44+ * The original version of the class used non-unique SSSR which of course
45+ * doesn't work for substructure screening so this fingerprint can only
46+ * be used for similarity.</i>
4647 *
47- * @see org.openscience.cdk.fingerprint.Fingerprinter
48+ * @author shk3
49+ * @cdk.created 2006-01-13
50+ * @cdk.keyword fingerprint
51+ * @cdk.keyword similarity
52+ * @cdk.module fingerprint
53+ * @cdk.githash
54+ * @see org.openscience.cdk.fingerprint.Fingerprinter
4855 */
49- public class ExtendedFingerprinter extends Fingerprinter implements IFingerprinter {
56+ public class ExtendedFingerprinter implements IFingerprinter {
5057
51- private final int RESERVED_BITS = 25 ;
58+ // number of bits to hash rings into
59+ private final int RESERVED_BITS = 25 ;
5260
53- private Fingerprinter fingerprinter = null ;
61+ private final Fingerprinter fingerprinter ;
5462
5563 /**
5664 * Creates a fingerprint generator of length <code>DEFAULT_SIZE</code>
@@ -69,8 +77,8 @@ public ExtendedFingerprinter(int size) {
6977 * the given size, using a generation algorithm with the given search
7078 * depth.
7179 *
72- * @param size The desired size of the fingerprint
73- * @param searchDepth The desired depth of search
80+ * @param size The desired size of the fingerprint
81+ * @param searchDepth The desired depth of search
7482 */
7583 public ExtendedFingerprinter (int size , int searchDepth ) {
7684 this .fingerprinter = new Fingerprinter (size - RESERVED_BITS , searchDepth );
@@ -84,15 +92,17 @@ public ExtendedFingerprinter(int size, int searchDepth) {
8492 * (referring to smallest set of smallest rings) and bits which tell if
8593 * there is a fused ring system with 1,2...8 or more rings in it
8694 *
87- *@param container The AtomContainer for which a Fingerprint is generated
88- *@return a bit fingerprint for the given <code>IAtomContainer</code>.
95+ * @param container The AtomContainer for which a Fingerprint is generated
96+ * @return a bit fingerprint for the given <code>IAtomContainer</code>.
8997 */
9098 @ Override
9199 public IBitFingerprint getBitFingerprint (IAtomContainer container ) throws CDKException {
92100 return this .getBitFingerprint (container , null , null );
93101 }
94102
95- /** {@inheritDoc} */
103+ /**
104+ * {@inheritDoc}
105+ */
96106 @ Override
97107 public Map <String , Integer > getRawFingerprint (IAtomContainer iAtomContainer ) throws CDKException {
98108 throw new UnsupportedOperationException ();
@@ -108,14 +118,14 @@ public Map<String, Integer> getRawFingerprint(IAtomContainer iAtomContainer) thr
108118 * a smallesSetOfSmallestRings. The List must be a list of all ring
109119 * systems in the molecule.
110120 *
111- * @param atomContainer The AtomContainer for which a Fingerprint is
112- * generated
113- * @param ringSet An SSSR RingSet of ac (if not available, use
114- * getExtendedFingerprint(AtomContainer ac),
115- * which does the calculation)
116- * @param rslist A list of all ring systems in ac
117- * @exception CDKException for example if input can not be cloned.
121+ * @param atomContainer The AtomContainer for which a Fingerprint is
122+ * generated
123+ * @param ringSet A SSSR RingSet of ac (if not available, use
124+ * getExtendedFingerprint(AtomContainer ac),
125+ * which does the calculation)
126+ * @param rslist A list of all ring systems in ac
118127 * @return a BitSet representing the fingerprint
128+ * @throws CDKException for example if input can not be cloned.
119129 */
120130 public IBitFingerprint getBitFingerprint (IAtomContainer atomContainer , IRingSet ringSet , List <IRingSet > rslist )
121131 throws CDKException {
@@ -144,24 +154,66 @@ public IBitFingerprint getBitFingerprint(IAtomContainer atomContainer, IRingSet
144154 for (int i = 0 ; i < rslist .size (); i ++) {
145155 if (((IRingSet ) rslist .get (i )).getAtomContainerCount () > maximumringsystemsize )
146156
147- maximumringsystemsize = ((IRingSet ) rslist .get (i )).getAtomContainerCount ();
157+ maximumringsystemsize = ((IRingSet ) rslist .get (i )).getAtomContainerCount ();
148158 }
149159 for (int i = 0 ; i < maximumringsystemsize && i < 9 ; i ++) {
150160 fingerprint .set (size - 8 + i - 3 );
151161 }
152162 return fingerprint ;
153163 }
154164
155- /** {@inheritDoc} */
165+ /**
166+ * {@inheritDoc}
167+ */
156168 @ Override
157169 public int getSize () {
158170 return fingerprinter .getSize () + RESERVED_BITS ;
159171 }
160172
161- /** {@inheritDoc} */
173+ /**
174+ * {@inheritDoc}
175+ */
162176 @ Override
163177 public ICountFingerprint getCountFingerprint (IAtomContainer container ) throws CDKException {
164178 throw new UnsupportedOperationException ();
165179 }
166180
181+ @ Override
182+ public String getVersionDescription () {
183+ StringBuilder sb = new StringBuilder ();
184+ sb .append ("CDK-" )
185+ .append (getClass ().getSimpleName ())
186+ .append ("/" )
187+ .append (CDK .getVersion ()); // could version fingerprints separately
188+ for (Map .Entry <String , String > param : this .fingerprinter .getParameters ()) {
189+ sb .append (' ' ).append (param .getKey ()).append ('=' ).append (param .getValue ());
190+ }
191+ return sb .toString ();
192+ }
193+
194+ @ Override
195+ public BitSet getFingerprint (IAtomContainer mol ) throws CDKException {
196+ return getBitFingerprint (mol ).asBitSet ();
197+ }
198+
199+ /**
200+ * Set the pathLimit for the base daylight/path fingerprint. If too many paths are generated from a single atom
201+ * an exception is thrown.
202+ * @param pathLimit the number of paths to generate from a node
203+ * @see Fingerprinter
204+ */
205+ public void setPathLimit (int pathLimit ) {
206+ this .fingerprinter .setPathLimit (pathLimit );
207+ }
208+
209+ /**
210+ * Set the hashPseudoAtoms for the base daylight/path fingerprint. This indicates whether pseudo-atoms should be
211+ * hashed, for substructure screening this is not desirable - but this fingerprint uses SSSR so can't be used for
212+ * substructure screening regardless.
213+ * @param hashPseudoAtoms the number of paths to generate from a node
214+ * @see Fingerprinter
215+ */
216+ public void setHashPseudoAtoms (boolean hashPseudoAtoms ) {
217+ this .fingerprinter .setHashPseudoAtoms (hashPseudoAtoms );
218+ }
167219}
0 commit comments