@@ -4548,211 +4548,6 @@ void PhaseIdealLoop::maybe_multiversion_for_auto_vectorization_runtime_checks(Id
45484548 do_multiversioning (lpt, old_new);
45494549}
45504550
4551- // Returns true if the Reduction node is unordered.
4552- static bool is_unordered_reduction (Node* n) {
4553- return n->is_Reduction () && !n->as_Reduction ()->requires_strict_order ();
4554- }
4555-
4556- // Having ReductionNodes in the loop is expensive. They need to recursively
4557- // fold together the vector values, for every vectorized loop iteration. If
4558- // we encounter the following pattern, we can vector accumulate the values
4559- // inside the loop, and only have a single UnorderedReduction after the loop.
4560- //
4561- // Note: UnorderedReduction represents a ReductionNode which does not require
4562- // calculating in strict order.
4563- //
4564- // CountedLoop init
4565- // | |
4566- // +------+ | +-----------------------+
4567- // | | | |
4568- // PhiNode (s) |
4569- // | |
4570- // | Vector |
4571- // | | |
4572- // UnorderedReduction (first_ur) |
4573- // | |
4574- // ... Vector |
4575- // | | |
4576- // UnorderedReduction (last_ur) |
4577- // | |
4578- // +---------------------+
4579- //
4580- // We patch the graph to look like this:
4581- //
4582- // CountedLoop identity_vector
4583- // | |
4584- // +-------+ | +---------------+
4585- // | | | |
4586- // PhiNode (v) |
4587- // | |
4588- // | Vector |
4589- // | | |
4590- // VectorAccumulator |
4591- // | |
4592- // ... Vector |
4593- // | | |
4594- // init VectorAccumulator |
4595- // | | | |
4596- // UnorderedReduction +-----------+
4597- //
4598- // We turned the scalar (s) Phi into a vectorized one (v). In the loop, we
4599- // use vector_accumulators, which do the same reductions, but only element
4600- // wise. This is a single operation per vector_accumulator, rather than many
4601- // for a UnorderedReduction. We can then reduce the last vector_accumulator
4602- // after the loop, and also reduce the init value into it.
4603- //
4604- // We can not do this with all reductions. Some reductions do not allow the
4605- // reordering of operations (for example float addition/multiplication require
4606- // strict order).
4607- void PhaseIdealLoop::move_unordered_reduction_out_of_loop (IdealLoopTree* loop) {
4608- assert (!C->major_progress () && loop->is_counted () && loop->is_innermost (), " sanity" );
4609-
4610- // Find all Phi nodes with an unordered Reduction on backedge.
4611- CountedLoopNode* cl = loop->_head ->as_CountedLoop ();
4612- for (DUIterator_Fast jmax, j = cl->fast_outs (jmax); j < jmax; j++) {
4613- Node* phi = cl->fast_out (j);
4614- // We have a phi with a single use, and an unordered Reduction on the backedge.
4615- if (!phi->is_Phi () || phi->outcnt () != 1 || !is_unordered_reduction (phi->in (2 ))) {
4616- continue ;
4617- }
4618-
4619- ReductionNode* last_ur = phi->in (2 )->as_Reduction ();
4620- assert (!last_ur->requires_strict_order (), " must be" );
4621-
4622- // Determine types
4623- const TypeVect* vec_t = last_ur->vect_type ();
4624- uint vector_length = vec_t ->length ();
4625- BasicType bt = vec_t ->element_basic_type ();
4626-
4627- // Convert opcode from vector-reduction -> scalar -> normal-vector-op
4628- const int sopc = VectorNode::scalar_opcode (last_ur->Opcode (), bt);
4629- const int vopc = VectorNode::opcode (sopc, bt);
4630- if (!Matcher::match_rule_supported_vector (vopc, vector_length, bt)) {
4631- DEBUG_ONLY ( last_ur->dump (); )
4632- assert (false , " do not have normal vector op for this reduction" );
4633- continue ; // not implemented -> fails
4634- }
4635-
4636- // Traverse up the chain of unordered Reductions, checking that it loops back to
4637- // the phi. Check that all unordered Reductions only have a single use, except for
4638- // the last (last_ur), which only has phi as a use in the loop, and all other uses
4639- // are outside the loop.
4640- ReductionNode* current = last_ur;
4641- ReductionNode* first_ur = nullptr ;
4642- while (true ) {
4643- assert (!current->requires_strict_order (), " sanity" );
4644-
4645- // Expect no ctrl and a vector_input from within the loop.
4646- Node* ctrl = current->in (0 );
4647- Node* vector_input = current->in (2 );
4648- if (ctrl != nullptr || get_ctrl (vector_input) != cl) {
4649- DEBUG_ONLY ( current->dump (1 ); )
4650- assert (false , " reduction has ctrl or bad vector_input" );
4651- break ; // Chain traversal fails.
4652- }
4653-
4654- assert (current->vect_type () != nullptr , " must have vector type" );
4655- if (current->vect_type () != last_ur->vect_type ()) {
4656- // Reductions do not have the same vector type (length and element type).
4657- break ; // Chain traversal fails.
4658- }
4659-
4660- // Expect single use of an unordered Reduction, except for last_ur.
4661- if (current == last_ur) {
4662- // Expect all uses to be outside the loop, except phi.
4663- for (DUIterator_Fast kmax, k = current->fast_outs (kmax); k < kmax; k++) {
4664- Node* use = current->fast_out (k);
4665- if (use != phi && ctrl_or_self (use) == cl) {
4666- DEBUG_ONLY ( current->dump (-1 ); )
4667- assert (false , " reduction has use inside loop" );
4668- // Should not be allowed by SuperWord::mark_reductions
4669- return ; // bail out of optimization
4670- }
4671- }
4672- } else {
4673- if (current->outcnt () != 1 ) {
4674- break ; // Chain traversal fails.
4675- }
4676- }
4677-
4678- // Expect another unordered Reduction or phi as the scalar input.
4679- Node* scalar_input = current->in (1 );
4680- if (is_unordered_reduction (scalar_input) &&
4681- scalar_input->Opcode () == current->Opcode ()) {
4682- // Move up the unordered Reduction chain.
4683- current = scalar_input->as_Reduction ();
4684- assert (!current->requires_strict_order (), " must be" );
4685- } else if (scalar_input == phi) {
4686- // Chain terminates at phi.
4687- first_ur = current;
4688- current = nullptr ;
4689- break ; // Success.
4690- } else {
4691- // scalar_input is neither phi nor a matching reduction
4692- // Can for example be scalar reduction when we have
4693- // partial vectorization.
4694- break ; // Chain traversal fails.
4695- }
4696- }
4697- if (current != nullptr ) {
4698- // Chain traversal was not successful.
4699- continue ;
4700- }
4701- assert (first_ur != nullptr , " must have successfully terminated chain traversal" );
4702-
4703- Node* identity_scalar = ReductionNode::make_identity_con_scalar (_igvn, sopc, bt);
4704- set_root_as_ctrl (identity_scalar);
4705- VectorNode* identity_vector = VectorNode::scalar2vector (identity_scalar, vector_length, bt);
4706- register_new_node (identity_vector, C->root ());
4707- assert (vec_t == identity_vector->vect_type (), " matching vector type" );
4708- VectorNode::trace_new_vector (identity_vector, " Unordered Reduction" );
4709-
4710- // Turn the scalar phi into a vector phi.
4711- _igvn.rehash_node_delayed (phi);
4712- Node* init = phi->in (1 ); // Remember init before replacing it.
4713- phi->set_req_X (1 , identity_vector, &_igvn);
4714- phi->as_Type ()->set_type (vec_t );
4715- _igvn.set_type (phi, vec_t );
4716-
4717- // Traverse down the chain of unordered Reductions, and replace them with vector_accumulators.
4718- current = first_ur;
4719- while (true ) {
4720- // Create vector_accumulator to replace current.
4721- Node* last_vector_accumulator = current->in (1 );
4722- Node* vector_input = current->in (2 );
4723- VectorNode* vector_accumulator = VectorNode::make (vopc, last_vector_accumulator, vector_input, vec_t );
4724- register_new_node (vector_accumulator, cl);
4725- _igvn.replace_node (current, vector_accumulator);
4726- VectorNode::trace_new_vector (vector_accumulator, " Unordered Reduction" );
4727- if (current == last_ur) {
4728- break ;
4729- }
4730- current = vector_accumulator->unique_out ()->as_Reduction ();
4731- assert (!current->requires_strict_order (), " must be" );
4732- }
4733-
4734- // Create post-loop reduction.
4735- Node* last_accumulator = phi->in (2 );
4736- Node* post_loop_reduction = ReductionNode::make (sopc, nullptr , init, last_accumulator, bt);
4737-
4738- // Take over uses of last_accumulator that are not in the loop.
4739- for (DUIterator i = last_accumulator->outs (); last_accumulator->has_out (i); i++) {
4740- Node* use = last_accumulator->out (i);
4741- if (use != phi && use != post_loop_reduction) {
4742- assert (ctrl_or_self (use) != cl, " use must be outside loop" );
4743- use->replace_edge (last_accumulator, post_loop_reduction, &_igvn);
4744- --i;
4745- }
4746- }
4747- register_new_node (post_loop_reduction, get_late_ctrl (post_loop_reduction, cl));
4748- VectorNode::trace_new_vector (post_loop_reduction, " Unordered Reduction" );
4749-
4750- assert (last_accumulator->outcnt () == 2 , " last_accumulator has 2 uses: phi and post_loop_reduction" );
4751- assert (post_loop_reduction->outcnt () > 0 , " should have taken over all non loop uses of last_accumulator" );
4752- assert (phi->outcnt () == 1 , " accumulator is the only use of phi" );
4753- }
4754- }
4755-
47564551void DataNodeGraph::clone_data_nodes (Node* new_ctrl) {
47574552 for (uint i = 0 ; i < _data_nodes.size (); i++) {
47584553 clone (_data_nodes[i], new_ctrl);
0 commit comments