3333import java .io .IOException ;
3434import java .io .InputStreamReader ;
3535import java .util .Arrays ;
36- import java .util .Collections ;
3736import java .util .Comparator ;
3837
3938/**
@@ -102,6 +101,25 @@ private Canon(int[][] g, long[] partition, boolean[] hydrogens, boolean symOnly)
102101 symmetry = refine (labelling , hydrogens );
103102 }
104103
104+ /**
105+ * Compute the canonical labels for the provided structure. The labelling
106+ * does not consider isomer information or stereochemistry. The current
107+ * implementation does not fully distinguish all structure topologies
108+ * but in practise performs well in the majority of cases. A complete
109+ * canonical labelling can be obtained using the {@link InChINumbersTools}
110+ * but is computationally much more expensive.
111+ *
112+ * @param container structure
113+ * @param g adjacency list graph representation
114+ * @param opts canonical generation options see {@link CanonOpts}
115+ * @return the canonical labelling
116+ * @see EquivalentClassPartitioner
117+ * @see InChINumbersTools
118+ */
119+ public static long [] label (IAtomContainer container , int [][] g , int opts ) {
120+ return label (container , g , basicInvariants (container , g , opts ));
121+ }
122+
105123 /**
106124 * Compute the canonical labels for the provided structure. The labelling
107125 * does not consider isomer information or stereochemistry. The current
@@ -117,7 +135,7 @@ private Canon(int[][] g, long[] partition, boolean[] hydrogens, boolean symOnly)
117135 * @see InChINumbersTools
118136 */
119137 public static long [] label (IAtomContainer container , int [][] g ) {
120- return label (container , g , basicInvariants ( container , g ) );
138+ return label (container , g , CanonOpts . Default );
121139 }
122140
123141 /**
@@ -182,13 +200,32 @@ public static long[] label(IAtomContainer container,
182200 *
183201 * @param container structure
184202 * @param g adjacency list graph representation
203+ * @param opts canonical generation options see {@link CanonOpts}
185204 * @return symmetry classes
186205 * @see EquivalentClassPartitioner
187206 */
207+ public static long [] symmetry (IAtomContainer container , int [][] g , int opts ) {
208+ return new Canon (g , basicInvariants (container , g , opts ), terminalHydrogens (container , g ), true ).symmetry ;
209+ }
210+
211+ /**
212+ * Compute the symmetry classes for the provided structure. There are known
213+ * examples where symmetry is incorrectly found. The {@link
214+ * EquivalentClassPartitioner} gives more accurate symmetry perception but
215+ * this method is very quick and in practise successfully portions the
216+ * majority of chemical structures.
217+ *
218+ * @param container structure
219+ * @param g adjacency list graph representation
220+ * @return symmetry classes
221+ * @see EquivalentClassPartitioner
222+ * @see #basicInvariants(IAtomContainer, int[][], int)
223+ */
188224 public static long [] symmetry (IAtomContainer container , int [][] g ) {
189- return new Canon ( g , basicInvariants ( container , g ), terminalHydrogens ( container , g ), true ). symmetry ;
225+ return symmetry ( container , g , CanonOpts . Default ) ;
190226 }
191227
228+
192229 /**
193230 * Internal - refine invariants to a canonical labelling and
194231 * symmetry classes.
@@ -304,6 +341,18 @@ private long primeProduct(int[] ws, long[] ranks, boolean[] hydrogens) {
304341 return prod ;
305342 }
306343
344+ /**
345+ * See {@link #basicInvariants(IAtomContainer, int[][], int)}.
346+ * @param container an atom container to generate labels for
347+ * @param graph graph representation (adjacency list)
348+
349+ * @return the initial invariants
350+ * @see #basicInvariants(IAtomContainer, int[][], int)
351+ */
352+ public static long [] basicInvariants (IAtomContainer container , int [][] graph ) {
353+ return basicInvariants (container , graph , CanonOpts .Default );
354+ }
355+
307356 /**
308357 * Generate the initial invariants for each atom in the {@code container}.
309358 * The labels use the invariants described in {@cdk.cite WEI89}.
@@ -328,11 +377,12 @@ private long primeProduct(int[] ws, long[] ranks, boolean[] hydrogens) {
328377 *
329378 * @param container an atom container to generate labels for
330379 * @param graph graph representation (adjacency list)
380+ * @param flav bit mask canon flavor (see {@link CanonOpts})
331381 * @return initial invariants
332382 * @throws NullPointerException an atom had unset atomic number, hydrogen
333383 * count or formal charge
334384 */
335- public static long [] basicInvariants (IAtomContainer container , int [][] graph ) {
385+ public static long [] basicInvariants (IAtomContainer container , int [][] graph , int flav ) {
336386
337387 long [] labels = new long [graph .length ];
338388
@@ -362,6 +412,16 @@ public static long[] basicInvariants(IAtomContainer container, int[][] graph) {
362412 label <<= 4 ; // hydrogen count <= 15 (4 bits)
363413 label |= impH + expH & 0xf ;
364414
415+ // atomic mass to split ties (if flavour requested), we can't do this
416+ // by default because "unique" smiles doesn't include the isotopic mass
417+ // so splitting on something that doesn't appear in the output would not
418+ // function correctly
419+ // n.b. the comparator based invariants are much more flexible still
420+ if ((flav & CanonOpts .AtomicMass ) != 0 && atom .getMassNumber () != null ) {
421+ label <<= 10 ;
422+ label |= atom .getMassNumber ();
423+ }
424+
365425 labels [v ] = label ;
366426 }
367427 return labels ;
0 commit comments