benchmark  1.7.0
perf_counters.h
1 // Copyright 2021 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef BENCHMARK_PERF_COUNTERS_H
16 #define BENCHMARK_PERF_COUNTERS_H
17 
18 #include <array>
19 #include <cstdint>
20 #include <memory>
21 #include <vector>
22 
23 #include "benchmark/benchmark.h"
24 #include "check.h"
25 #include "log.h"
26 #include "mutex.h"
27 
28 #ifndef BENCHMARK_OS_WINDOWS
29 #include <unistd.h>
30 #endif
31 
32 #if defined(_MSC_VER)
33 #pragma warning(push)
34 // C4251: <symbol> needs to have dll-interface to be used by clients of class
35 #pragma warning(disable : 4251)
36 #endif
37 
38 namespace benchmark {
39 namespace internal {
40 
41 // Typically, we can only read a small number of counters. There is also a
42 // padding preceding counter values, when reading multiple counters with one
43 // syscall (which is desirable). PerfCounterValues abstracts these details.
44 // The implementation ensures the storage is inlined, and allows 0-based
45 // indexing into the counter values.
46 // The object is used in conjunction with a PerfCounters object, by passing it
47 // to Snapshot(). The values are populated such that
48 // perfCounters->names()[i]'s value is obtained at position i (as given by
49 // operator[]) of this object.
51  public:
52  explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
53  BM_CHECK_LE(nr_counters_, kMaxCounters);
54  }
55 
56  uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
57 
58  static constexpr size_t kMaxCounters = 3;
59 
60  private:
61  friend class PerfCounters;
62  // Get the byte buffer in which perf counters can be captured.
63  // This is used by PerfCounters::Read
64  std::pair<char*, size_t> get_data_buffer() {
65  return {reinterpret_cast<char*>(values_.data()),
66  sizeof(uint64_t) * (kPadding + nr_counters_)};
67  }
68 
69  static constexpr size_t kPadding = 1;
70  std::array<uint64_t, kPadding + kMaxCounters> values_;
71  const size_t nr_counters_;
72 };
73 
74 // Collect PMU counters. The object, once constructed, is ready to be used by
75 // calling read(). PMU counter collection is enabled from the time create() is
76 // called, to obtain the object, until the object's destructor is called.
77 class BENCHMARK_EXPORT PerfCounters final {
78  public:
79  // True iff this platform supports performance counters.
80  static const bool kSupported;
81 
82  bool IsValid() const { return !counter_names_.empty(); }
83  static PerfCounters NoCounters() { return PerfCounters(); }
84 
85  ~PerfCounters() { CloseCounters(); }
86  PerfCounters(PerfCounters&&) = default;
87  PerfCounters(const PerfCounters&) = delete;
88  PerfCounters& operator=(PerfCounters&&) noexcept;
89  PerfCounters& operator=(const PerfCounters&) = delete;
90 
91  // Platform-specific implementations may choose to do some library
92  // initialization here.
93  static bool Initialize();
94 
95  // Return a PerfCounters object ready to read the counters with the names
96  // specified. The values are user-mode only. The counter name format is
97  // implementation and OS specific.
98  // TODO: once we move to C++-17, this should be a std::optional, and then the
99  // IsValid() boolean can be dropped.
100  static PerfCounters Create(const std::vector<std::string>& counter_names);
101 
102  // Take a snapshot of the current value of the counters into the provided
103  // valid PerfCounterValues storage. The values are populated such that:
104  // names()[i]'s value is (*values)[i]
105  BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
106 #ifndef BENCHMARK_OS_WINDOWS
107  assert(values != nullptr);
108  assert(IsValid());
109  auto buffer = values->get_data_buffer();
110  auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
111  return static_cast<size_t>(read_bytes) == buffer.second;
112 #else
113  (void)values;
114  return false;
115 #endif
116  }
117 
118  const std::vector<std::string>& names() const { return counter_names_; }
119  size_t num_counters() const { return counter_names_.size(); }
120 
121  private:
122  PerfCounters(const std::vector<std::string>& counter_names,
123  std::vector<int>&& counter_ids)
124  : counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {}
125  PerfCounters() = default;
126 
127  void CloseCounters() const;
128 
129  std::vector<int> counter_ids_;
130  std::vector<std::string> counter_names_;
131 };
132 
133 // Typical usage of the above primitives.
134 class BENCHMARK_EXPORT PerfCountersMeasurement final {
135  public:
136  PerfCountersMeasurement(const std::vector<std::string>& counter_names);
138 
139  // The only way to get to `counters_` is after ctor-ing a
140  // `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
141  // decided (either invalid or valid) and won't change again even if a ctor is
142  // concurrently running with this. This is preferring efficiency to
143  // maintainability, because the address of the static can be known at compile
144  // time.
145  bool IsValid() const {
146  MutexLock l(mutex_);
147  return counters_.IsValid();
148  }
149 
150  BENCHMARK_ALWAYS_INLINE void Start() {
151  assert(IsValid());
152  MutexLock l(mutex_);
153  // Tell the compiler to not move instructions above/below where we take
154  // the snapshot.
155  ClobberMemory();
156  valid_read_ &= counters_.Snapshot(&start_values_);
157  ClobberMemory();
158  }
159 
160  BENCHMARK_ALWAYS_INLINE bool Stop(
161  std::vector<std::pair<std::string, double>>& measurements) {
162  assert(IsValid());
163  MutexLock l(mutex_);
164  // Tell the compiler to not move instructions above/below where we take
165  // the snapshot.
166  ClobberMemory();
167  valid_read_ &= counters_.Snapshot(&end_values_);
168  ClobberMemory();
169 
170  for (size_t i = 0; i < counters_.names().size(); ++i) {
171  double measurement = static_cast<double>(end_values_[i]) -
172  static_cast<double>(start_values_[i]);
173  measurements.push_back({counters_.names()[i], measurement});
174  }
175 
176  return valid_read_;
177  }
178 
179  private:
180  static Mutex mutex_;
181  GUARDED_BY(mutex_) static int ref_count_;
182  GUARDED_BY(mutex_) static PerfCounters counters_;
183  bool valid_read_ = true;
184  PerfCounterValues start_values_;
185  PerfCounterValues end_values_;
186 };
187 
188 BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize();
189 
190 } // namespace internal
191 } // namespace benchmark
192 
193 #if defined(_MSC_VER)
194 #pragma warning(pop)
195 #endif
196 
197 #endif // BENCHMARK_PERF_COUNTERS_H
Definition: mutex.h:87
Definition: perf_counters.h:50
Definition: perf_counters.h:134
Definition: perf_counters.h:77