00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00040 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00041
00042 #include <omp.h>
00043 #include <new>
00044 #include <bits/stl_algobase.h>
00045 #include <parallel/parallel.h>
00046 #include <parallel/numericfwd.h>
00047
00048 namespace __gnu_parallel
00049 {
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060 template<typename InputIterator,
00061 typename OutputIterator,
00062 typename BinaryOperation>
00063 OutputIterator
00064 parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
00065 OutputIterator result, BinaryOperation bin_op,
00066 typename std::iterator_traits
00067 <InputIterator>::value_type value)
00068 {
00069 if (begin == end)
00070 return result;
00071
00072 while (begin != end)
00073 {
00074 value = bin_op(value, *begin);
00075 *result = value;
00076 ++result;
00077 ++begin;
00078 }
00079 return result;
00080 }
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092 template<typename InputIterator,
00093 typename OutputIterator,
00094 typename BinaryOperation>
00095 OutputIterator
00096 parallel_partial_sum_linear(InputIterator begin, InputIterator end,
00097 OutputIterator result, BinaryOperation bin_op,
00098 typename std::iterator_traits
00099 <InputIterator>::difference_type n)
00100 {
00101 typedef std::iterator_traits<InputIterator> traits_type;
00102 typedef typename traits_type::value_type value_type;
00103 typedef typename traits_type::difference_type difference_type;
00104
00105 if (begin == end)
00106 return result;
00107
00108 thread_index_t num_threads =
00109 std::min<difference_type>(get_max_threads(), n - 1);
00110
00111 if (num_threads < 2)
00112 {
00113 *result = *begin;
00114 return parallel_partial_sum_basecase(
00115 begin + 1, end, result + 1, bin_op, *begin);
00116 }
00117
00118 difference_type* borders;
00119 value_type* sums;
00120
00121 const _Settings& __s = _Settings::get();
00122
00123 # pragma omp parallel num_threads(num_threads)
00124 {
00125 # pragma omp single
00126 {
00127 num_threads = omp_get_num_threads();
00128
00129 borders = new difference_type[num_threads + 2];
00130
00131 if (__s.partial_sum_dilation == 1.0f)
00132 equally_split(n, num_threads + 1, borders);
00133 else
00134 {
00135 difference_type chunk_length =
00136 ((double)n
00137 / ((double)num_threads + __s.partial_sum_dilation)),
00138 borderstart = n - num_threads * chunk_length;
00139 borders[0] = 0;
00140 for (int i = 1; i < (num_threads + 1); ++i)
00141 {
00142 borders[i] = borderstart;
00143 borderstart += chunk_length;
00144 }
00145 borders[num_threads + 1] = n;
00146 }
00147
00148 sums = static_cast<value_type*>(::operator new(sizeof(value_type)
00149 * num_threads));
00150 OutputIterator target_end;
00151 }
00152
00153 thread_index_t iam = omp_get_thread_num();
00154 if (iam == 0)
00155 {
00156 *result = *begin;
00157 parallel_partial_sum_basecase(begin + 1, begin + borders[1],
00158 result + 1, bin_op, *begin);
00159 ::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
00160 }
00161 else
00162 {
00163 ::new(&(sums[iam]))
00164 value_type(std::accumulate(begin + borders[iam] + 1,
00165 begin + borders[iam + 1],
00166 *(begin + borders[iam]),
00167 bin_op,
00168 __gnu_parallel::sequential_tag()));
00169 }
00170
00171 # pragma omp barrier
00172
00173 # pragma omp single
00174 parallel_partial_sum_basecase(
00175 sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
00176
00177 # pragma omp barrier
00178
00179
00180 parallel_partial_sum_basecase(begin + borders[iam + 1],
00181 begin + borders[iam + 2],
00182 result + borders[iam + 1], bin_op,
00183 sums[iam]);
00184 }
00185
00186 ::operator delete(sums);
00187 delete[] borders;
00188
00189 return result + n;
00190 }
00191
00192
00193
00194
00195
00196
00197
00198 template<typename InputIterator,
00199 typename OutputIterator,
00200 typename BinaryOperation>
00201 OutputIterator
00202 parallel_partial_sum(InputIterator begin, InputIterator end,
00203 OutputIterator result, BinaryOperation bin_op)
00204 {
00205 _GLIBCXX_CALL(begin - end)
00206
00207 typedef std::iterator_traits<InputIterator> traits_type;
00208 typedef typename traits_type::value_type value_type;
00209 typedef typename traits_type::difference_type difference_type;
00210
00211 difference_type n = end - begin;
00212
00213 switch (_Settings::get().partial_sum_algorithm)
00214 {
00215 case LINEAR:
00216
00217 return parallel_partial_sum_linear(begin, end, result, bin_op, n);
00218 default:
00219
00220 _GLIBCXX_PARALLEL_ASSERT(0);
00221 return result + n;
00222 }
00223 }
00224 }
00225
00226 #endif