settings.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 2, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License
00017 // along with this library; see the file COPYING.  If not, write to
00018 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
00019 // MA 02111-1307, USA.
00020 
00021 // As a special exception, you may use this file as part of a free
00022 // software library without restriction.  Specifically, if other files
00023 // instantiate templates or use macros or inline functions from this
00024 // file, or you compile this file and link it with other files to
00025 // produce an executable, this file does not by itself cause the
00026 // resulting executable to be covered by the GNU General Public
00027 // License.  This exception does not however invalidate any other
00028 // reasons why the executable file might be covered by the GNU General
00029 // Public License.
00030 
00031 /** @file parallel/settings.h
00032  *  @brief Runtime settings and tuning parameters, heuristics to decide
00033  *  whether to use parallelized algorithms.
00034  *  This file is a GNU parallel extension to the Standard C++ Library.
00035  *
00036  *  @section parallelization_decision 
00037  *  The decision whether to run an algorithm in parallel.
00038  *
00039  *  There are several ways the user can switch on and off the parallel
00040  *  execution of an algorithm, both at compile- and run-time.
00041  *
00042  *  Only sequential execution can be forced at compile-time.  This
00043  *  reduces code size and protects code parts that have
00044  *  non-thread-safe side effects.
00045  *
00046  *  Ultimately, forcing parallel execution at compile-time makes
00047  *  sense.  Often, the sequential algorithm implementation is used as
00048  *  a subroutine, so no reduction in code size can be achieved.  Also,
00049  *  the machine the program is run on might have only one processor
00050  *  core, so to avoid overhead, the algorithm is executed
00051  *  sequentially.
00052  *
00053  *  To force sequential execution of an algorithm ultimately at
00054  *  compile-time, the user must add the tag
00055  *  __gnu_parallel::sequential_tag() to the end of the parameter list,
00056  *  e. g.
00057  *
00058  *  \code
00059  *  std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
00060  *  \endcode
00061  *
00062  *  This is compatible with all overloaded algorithm variants.  No
00063  *  additional code will be instantiated, at all.  The same holds for
00064  *  most algorithm calls with iterators not providing random access.
00065  *
00066  *  If the algorithm call is not forced to be executed sequentially
00067  *  at compile-time, the decision is made at run-time.
00068  *  The global variable __gnu_parallel::_Settings::algorithm_strategy
00069  *  is checked. It is a tristate variable corresponding to:
00070  *
00071  *  a. force_sequential, meaning the sequential algorithm is executed.
00072  *  b. force_parallel, meaning the parallel algorithm is executed.
00073  *  c. heuristic
00074  *
00075  *  For heuristic, the parallel algorithm implementation is called
00076  *  only if the input size is sufficiently large.  For most
00077  *  algorithms, the input size is the (combined) length of the input
00078  *  sequence(s).  The threshold can be set by the user, individually
00079  *  for each algorithm.  The according variables are called
00080  *  __gnu_parallel::_Settings::[algorithm]_minimal_n .
00081  *
00082  *  For some of the algorithms, there are even more tuning options,
00083  *  e. g. the ability to choose from multiple algorithm variants.  See
00084  *  below for details.
00085  */
00086 
00087 // Written by Johannes Singler and Felix Putze.
00088 
00089 #ifndef _GLIBCXX_PARALLEL_SETTINGS_H
00090 #define _GLIBCXX_PARALLEL_SETTINGS_H 1
00091 
00092 #include <parallel/types.h>
00093 
00094 /** 
00095   * @brief Determine at compile(?)-time if the parallel variant of an
00096   * algorithm should be called.
00097   * @param c A condition that is convertible to bool that is overruled by
00098   * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
00099   * based on the input size.
00100   */
00101 #define _GLIBCXX_PARALLEL_CONDITION(c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
00102 
00103 /*
00104 inline bool
00105 parallel_condition(bool c)
00106 {
00107   bool ret = false;
00108   const _Settings& s = _Settings::get();
00109   if (s.algorithm_strategy != force_seqential)
00110     {
00111       if (s.algorithm_strategy == force_parallel)
00112     ret = true;
00113       else
00114     ret = get_max_threads() > 1 && c;
00115     }
00116   return ret;
00117 }
00118 */
00119 
00120 namespace __gnu_parallel
00121 {
00122   /// class _Settings 
00123   /// Run-time settings for the parallel mode, including all tunable parameters.
00124   struct _Settings
00125   {
00126     _AlgorithmStrategy      algorithm_strategy;
00127     
00128     _SortAlgorithm      sort_algorithm;
00129     _PartialSumAlgorithm    partial_sum_algorithm;
00130     _MultiwayMergeAlgorithm     multiway_merge_algorithm;
00131     _FindAlgorithm      find_algorithm;
00132 
00133     _SplittingAlgorithm     sort_splitting;
00134     _SplittingAlgorithm     merge_splitting;
00135     _SplittingAlgorithm     multiway_merge_splitting;
00136 
00137     // Per-algorithm settings.
00138 
00139     /// Minimal input size for accumulate.
00140     sequence_index_t        accumulate_minimal_n;
00141 
00142     /// Minimal input size for adjacent_difference.
00143     unsigned int        adjacent_difference_minimal_n;
00144 
00145     /// Minimal input size for count and count_if.
00146     sequence_index_t        count_minimal_n;
00147 
00148     /// Minimal input size for fill.
00149     sequence_index_t        fill_minimal_n;
00150 
00151     /// Block size increase factor for find.
00152     double          find_increasing_factor;
00153 
00154     /// Initial block size for find.
00155     sequence_index_t        find_initial_block_size;
00156 
00157     /// Maximal block size for find.
00158     sequence_index_t        find_maximum_block_size;
00159 
00160     /// Start with looking for this many elements sequentially, for find.
00161     sequence_index_t        find_sequential_search_size;
00162 
00163     /// Minimal input size for for_each.
00164     sequence_index_t        for_each_minimal_n;
00165 
00166     /// Minimal input size for generate.
00167     sequence_index_t        generate_minimal_n;
00168 
00169     /// Minimal input size for max_element.
00170     sequence_index_t        max_element_minimal_n;
00171 
00172     /// Minimal input size for merge.
00173     sequence_index_t        merge_minimal_n;
00174 
00175     /// Oversampling factor for merge.
00176     unsigned int        merge_oversampling;
00177 
00178     /// Minimal input size for min_element.
00179     sequence_index_t        min_element_minimal_n;
00180 
00181     /// Minimal input size for multiway_merge.
00182     sequence_index_t        multiway_merge_minimal_n;
00183 
00184     /// Oversampling factor for multiway_merge.
00185     int             multiway_merge_minimal_k;
00186 
00187     /// Oversampling factor for multiway_merge.
00188     unsigned int        multiway_merge_oversampling;
00189 
00190     /// Minimal input size for nth_element.
00191     sequence_index_t        nth_element_minimal_n;
00192 
00193     /// Chunk size for partition.
00194     sequence_index_t        partition_chunk_size;
00195 
00196     /// Chunk size for partition, relative to input size.  If > 0.0,
00197     /// this value overrides partition_chunk_size.
00198     double          partition_chunk_share;
00199 
00200     /// Minimal input size for partition.
00201     sequence_index_t        partition_minimal_n;
00202 
00203     /// Minimal input size for partial_sort.
00204     sequence_index_t        partial_sort_minimal_n;
00205 
00206     /// Ratio for partial_sum. Assume "sum and write result" to be
00207     /// this factor slower than just "sum".
00208     float           partial_sum_dilation;
00209 
00210     /// Minimal input size for partial_sum.
00211     unsigned int        partial_sum_minimal_n;
00212 
00213     /// Minimal input size for random_shuffle.
00214     unsigned int        random_shuffle_minimal_n;
00215 
00216     /// Minimal input size for replace and replace_if.
00217     sequence_index_t        replace_minimal_n;
00218 
00219     /// Minimal input size for set_difference.
00220     sequence_index_t        set_difference_minimal_n;
00221 
00222     /// Minimal input size for set_intersection.
00223     sequence_index_t        set_intersection_minimal_n;
00224 
00225     /// Minimal input size for set_symmetric_difference.
00226     sequence_index_t        set_symmetric_difference_minimal_n;
00227 
00228     /// Minimal input size for set_union.
00229     sequence_index_t        set_union_minimal_n;
00230 
00231     /// Minimal input size for parallel sorting.
00232     sequence_index_t        sort_minimal_n;
00233 
00234     /// Oversampling factor for parallel std::sort (MWMS).
00235     unsigned int        sort_mwms_oversampling;
00236 
00237     /// Such many samples to take to find a good pivot (quicksort).
00238     unsigned int        sort_qs_num_samples_preset;
00239 
00240     /// Maximal subsequence length to switch to unbalanced base case.
00241     /// Applies to std::sort with dynamically load-balanced quicksort.
00242     sequence_index_t        sort_qsb_base_case_maximal_n;
00243 
00244     /// Minimal input size for parallel std::transform.
00245     sequence_index_t        transform_minimal_n;
00246 
00247     /// Minimal input size for unique_copy. 
00248     sequence_index_t        unique_copy_minimal_n;
00249 
00250     sequence_index_t        workstealing_chunk_size;
00251 
00252     // Hardware dependent tuning parameters.
00253 
00254     /// Size of the L1 cache in bytes (underestimation).
00255     unsigned long long      L1_cache_size;
00256 
00257     /// Size of the L2 cache in bytes (underestimation).
00258     unsigned long long      L2_cache_size;
00259 
00260     /// Size of the Translation Lookaside Buffer (underestimation).
00261     unsigned int        TLB_size;
00262 
00263     /// Overestimation of cache line size.  Used to avoid false
00264     /// sharing, i. e. elements of different threads are at least this
00265     /// amount apart.
00266     unsigned int        cache_line_size;
00267 
00268     // Statistics.
00269 
00270     /// The number of stolen ranges in load-balanced quicksort.
00271     sequence_index_t        qsb_steals;
00272 
00273     /// Get the global settings.
00274     static const _Settings&
00275     get() throw();
00276 
00277     /// Set the global settings.
00278     static void
00279     set(_Settings&) throw();
00280 
00281     explicit 
00282     _Settings() : algorithm_strategy(heuristic), sort_algorithm(MWMS), partial_sum_algorithm(LINEAR), multiway_merge_algorithm(LOSER_TREE), find_algorithm(CONSTANT_SIZE_BLOCKS), sort_splitting(EXACT), merge_splitting(EXACT), multiway_merge_splitting(EXACT), accumulate_minimal_n(1000), adjacent_difference_minimal_n(1000), count_minimal_n(1000), fill_minimal_n(1000), find_increasing_factor(2.0), find_initial_block_size(256), find_maximum_block_size(8192), find_sequential_search_size(256), for_each_minimal_n(1000), generate_minimal_n(1000), max_element_minimal_n(1000), merge_minimal_n(1000), merge_oversampling(10), min_element_minimal_n(1000), multiway_merge_minimal_n(1000), multiway_merge_minimal_k(2), multiway_merge_oversampling(10), nth_element_minimal_n(1000), partition_chunk_size(1000), partition_chunk_share(0.0), partition_minimal_n(1000), partial_sort_minimal_n(1000), partial_sum_dilation(1.0f), partial_sum_minimal_n(1000), random_shuffle_minimal_n(1000), replace_minimal_n(1000), set_difference_minimal_n(1000), set_intersection_minimal_n(1000), set_symmetric_difference_minimal_n(1000), set_union_minimal_n(1000), sort_minimal_n(1000), sort_mwms_oversampling(10), sort_qs_num_samples_preset(100), sort_qsb_base_case_maximal_n(100), transform_minimal_n(1000), unique_copy_minimal_n(10000), workstealing_chunk_size(100), L1_cache_size(16 << 10), L2_cache_size(256 << 10), TLB_size(128), cache_line_size(64), qsb_steals(0)
00283     { }
00284   };
00285 }
00286 
00287 #endif /* _GLIBCXX_SETTINGS_H */

Generated on Sat Dec 12 09:40:14 2009 for libstdc++ by  doxygen 1.5.6