cprover
c_wrangler.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: C Wrangler
4 
5 Author: Daniel Kroening, dkr@amazon.com
6 
7 \*******************************************************************/
8 
11 
12 #include "c_wrangler.h"
13 
14 #include "c_defines.h"
15 #include "ctokenit.h"
16 #include "mini_c_parser.h"
17 
18 #include <util/cprover_prefix.h>
19 #include <util/exception_utils.h>
20 #include <util/file_util.h>
21 #include <util/json.h>
22 #include <util/optional.h>
23 #include <util/prefix.h>
24 #include <util/run.h>
25 #include <util/string_utils.h>
26 #include <util/suffix.h>
27 #include <util/tempdir.h>
28 
29 #include <fstream>
30 #include <iostream>
31 #include <map>
32 #include <regex>
33 #include <sstream>
34 #include <unordered_map>
35 
37 {
38  // sources and preprocessing
39  std::vector<std::string> source_files;
40  std::vector<std::string> includes;
41  std::vector<std::string> defines;
42 
43  // transformations
45  {
46  std::string clause;
47  std::string content;
48  contract_clauset(std::string _clause, std::string _content)
49  : clause(std::move(_clause)), content(std::move(_content))
50  {
51  }
52  };
53 
55  {
56  std::string loop_type;
57  std::string identifier;
58  std::string content;
60  std::string _loop_type,
61  std::string _identifier,
62  std::string _content)
63  : loop_type(std::move(_loop_type)),
64  identifier(std::move(_identifier)),
65  content(std::move(_content))
66  {
67  }
68  };
69 
70  struct assertiont
71  {
72  std::string identifier;
73  std::string content;
74  assertiont(std::string _identifier, std::string _content)
75  : identifier(std::move(_identifier)), content(std::move(_content))
76  {
77  }
78  };
79 
80  struct functiont
81  {
82  // should be variant to preserve ordering
83  std::vector<contract_clauset> contract;
84  std::vector<loop_invariantt> loop_invariants;
85  std::vector<assertiont> assertions;
87  bool remove_static = false;
88  };
89 
90  using functionst = std::list<std::pair<std::regex, functiont>>;
92 
93  struct objectt
94  {
95  bool remove_static = false;
96  };
97 
98  using objectst = std::list<std::pair<std::regex, objectt>>;
100 
101  // output
102  std::string output;
103 
104  void configure_sources(const jsont &);
105  void configure_functions(const jsont &);
106  void configure_objects(const jsont &);
107  void configure_output(const jsont &);
108 };
109 
111 {
112  auto sources = config["sources"];
113 
114  if(!sources.is_null())
115  {
116  if(!sources.is_array())
117  throw deserialization_exceptiont("sources entry must be sequence");
118 
119  for(const auto &source : to_json_array(sources))
120  {
121  if(!source.is_string())
122  throw deserialization_exceptiont("source must be string");
123 
124  this->source_files.push_back(source.value);
125  }
126  }
127 
128  auto includes = config["includes"];
129 
130  if(!includes.is_null())
131  {
132  if(!includes.is_array())
133  throw deserialization_exceptiont("includes entry must be sequence");
134 
135  for(const auto &include : to_json_array(includes))
136  {
137  if(!include.is_string())
138  throw deserialization_exceptiont("include must be string");
139 
140  this->includes.push_back(include.value);
141  }
142  }
143 
144  auto defines = config["defines"];
145 
146  if(!defines.is_null())
147  {
148  if(!defines.is_array())
149  throw deserialization_exceptiont("defines entry must be sequence");
150 
151  for(const auto &define : to_json_array(defines))
152  {
153  if(!define.is_string())
154  throw deserialization_exceptiont("define must be string");
155 
156  this->defines.push_back(define.value);
157  }
158  }
159 }
160 
162 {
163  auto functions = config["functions"];
164 
165  if(functions.is_null())
166  return;
167 
168  if(!functions.is_array())
169  throw deserialization_exceptiont("functions entry must be sequence");
170 
171  for(const auto &function : to_json_array(functions))
172  {
173  if(!function.is_object())
174  throw deserialization_exceptiont("function entry must be object");
175 
176  for(const auto &function_entry : to_json_object(function))
177  {
178  const auto function_name = function_entry.first;
179  const auto &items = function_entry.second;
180 
181  if(!items.is_array())
182  throw deserialization_exceptiont("function entry must be sequence");
183 
184  this->functions.emplace_back(function_name, functiont{});
185  functiont &function_config = this->functions.back().second;
186 
187  for(const auto &function_item : to_json_array(items))
188  {
189  // These need to start with "ensures", "requires", "assigns",
190  // "invariant", "assert", "stub", "remove"
191  if(!function_item.is_string())
192  throw deserialization_exceptiont("function entry must be string");
193 
194  auto item_string = function_item.value;
195  auto split = split_string(item_string, ' ');
196  if(split.empty())
197  continue;
198 
199  if(
200  split[0] == "ensures" || split[0] == "requires" ||
201  split[0] == "assigns")
202  {
203  std::ostringstream rest;
204  join_strings(rest, split.begin() + 1, split.end(), ' ');
205 
206  function_config.contract.emplace_back(split[0], rest.str());
207  }
208  else if(split[0] == "assert" && split.size() >= 3)
209  {
210  std::ostringstream rest;
211  join_strings(rest, split.begin() + 2, split.end(), ' ');
212 
213  function_config.assertions.emplace_back(split[1], rest.str());
214  }
215  else if(
216  (split[0] == "for" && split.size() >= 3 && split[2] == "invariant") ||
217  (split[0] == "while" && split.size() >= 3 && split[2] == "invariant"))
218  {
219  std::ostringstream rest;
220  join_strings(rest, split.begin() + 3, split.end(), ' ');
221 
222  function_config.loop_invariants.emplace_back(
223  split[0], split[1], rest.str());
224  }
225  else if(split[0] == "stub")
226  {
227  std::ostringstream rest;
228  join_strings(rest, split.begin() + 1, split.end(), ' ');
229 
230  function_config.stub = rest.str();
231  }
232  else if(split[0] == "remove")
233  {
234  if(split.size() == 1)
235  throw deserialization_exceptiont("unexpected remove entry");
236 
237  if(split[1] == "static")
238  function_config.remove_static = true;
239  else
241  "unexpected remove entry " + split[1]);
242  }
243  else
245  "unexpected function entry " + split[0]);
246  }
247  }
248  }
249 }
250 
252 {
253  auto objects = config["objects"];
254 
255  if(objects.is_null())
256  return;
257 
258  if(!objects.is_array())
259  throw deserialization_exceptiont("objects entry must be sequence");
260 
261  for(const auto &object : to_json_array(objects))
262  {
263  if(!object.is_object())
264  throw deserialization_exceptiont("object entry must be object");
265 
266  for(const auto &object_entry : to_json_object(object))
267  {
268  const auto &object_name = object_entry.first;
269  const auto &items = object_entry.second;
270 
271  if(!items.is_array())
272  throw deserialization_exceptiont("object entry must be sequence");
273 
274  this->objects.emplace_back(object_name, objectt{});
275  objectt &object_config = this->objects.back().second;
276 
277  for(const auto &object_item : to_json_array(items))
278  {
279  // Needs to start with "remove"
280  if(!object_item.is_string())
281  throw deserialization_exceptiont("object entry must be string");
282 
283  auto item_string = object_item.value;
284  auto split = split_string(item_string, ' ');
285  if(split.empty())
286  continue;
287 
288  if(split[0] == "remove")
289  {
290  if(split.size() == 1)
291  throw deserialization_exceptiont("unexpected remove entry");
292 
293  if(split[1] == "static")
294  object_config.remove_static = true;
295  else
297  "unexpected remove entry " + split[1]);
298  }
299  else
301  "unexpected object entry " + split[0]);
302  }
303  }
304  }
305 }
306 
308 {
309  auto output = config["output"];
310 
311  if(output.is_null())
312  return;
313 
314  if(!output.is_string())
315  throw deserialization_exceptiont("output entry must be string");
316 
317  this->output = output.value;
318 }
319 
320 static std::string
321 preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
322 {
323  std::vector<std::string> argv = {"cc", "-E", source_file};
324 
325  for(const auto &include : c_wrangler.includes)
326  {
327  argv.push_back("-I");
328  argv.push_back(include);
329  }
330 
331  for(const auto &define : c_wrangler.defines)
332  argv.push_back(std::string("-D") + define);
333 
334  std::ostringstream result;
335 
336  auto run_result = run("cc", argv, "", result, "");
337  if(run_result != 0)
338  throw system_exceptiont("preprocessing " + source_file + " has failed");
339 
340  return result.str();
341 }
342 
343 static c_definest
344 get_defines(const std::string &source_file, const c_wranglert &config)
345 {
346  std::vector<std::string> argv = {"cc", "-E", "-dM", source_file};
347 
348  for(const auto &include : config.includes)
349  {
350  argv.push_back("-I");
351  argv.push_back(include);
352  }
353 
354  std::ostringstream result;
355 
356  auto run_result = run("cc", argv, "", result, "");
357  if(run_result != 0)
358  throw system_exceptiont("preprocessing " + source_file + " has failed");
359 
360  c_definest defines;
361  defines.parse(result.str());
362  return defines;
363 }
364 
365 static void mangle_function(
366  const c_declarationt &declaration,
367  const c_definest &defines,
368  const c_wranglert::functiont &function_config,
369  std::ostream &out)
370 {
371  if(function_config.stub.has_value())
372  {
373  // replace by stub
374  out << function_config.stub.value();
375  }
376  else
377  {
378  if(function_config.remove_static)
379  {
380  for(auto &t : declaration.pre_declarator)
381  {
382  if(t.text == "static")
383  {
384  // we replace by white space
385  out << std::string(6, ' ');
386  }
387  else
388  out << t.text;
389  }
390  }
391  else
392  {
393  for(auto &t : declaration.pre_declarator)
394  out << t.text;
395  }
396 
397  for(auto &t : declaration.declarator)
398  out << t.text;
399  for(auto &t : declaration.post_declarator)
400  out << t.text;
401 
402  for(const auto &entry : function_config.contract)
403  out << ' ' << CPROVER_PREFIX << entry.clause << '('
404  << defines(entry.content) << ')';
405 
406  std::map<std::string, std::string> loop_invariants;
407 
408  for(const auto &entry : function_config.loop_invariants)
409  loop_invariants[entry.loop_type + entry.identifier] = entry.content;
410 
411  if(loop_invariants.empty())
412  {
413  for(auto &t : declaration.initializer)
414  out << t.text;
415  }
416  else
417  {
418  std::size_t for_count = 0, while_count = 0;
419  ctokenitt t(declaration.initializer);
420 
421  while(t)
422  {
423  const auto &token = *(t++);
424  out << token.text;
425 
426  if(token == "while")
427  {
428  while_count++;
429  const auto &invariant =
430  loop_invariants["while" + std::to_string(while_count)];
431 
432  if(!invariant.empty())
433  {
434  auto t_end = match_bracket(t, '(', ')');
435  for(; t != t_end; t++)
436  out << t->text;
437  out << ' ' << CPROVER_PREFIX << "loop_invariant("
438  << defines(invariant) << ')';
439  }
440  }
441  else if(token == "for")
442  {
443  for_count++;
444  const auto &invariant =
445  loop_invariants["for" + std::to_string(for_count)];
446 
447  if(!invariant.empty())
448  {
449  auto t_end = match_bracket(t, '(', ')');
450  for(; t != t_end; t++)
451  out << t->text;
452  out << ' ' << CPROVER_PREFIX << "invariant(" << defines(invariant)
453  << ')';
454  }
455  }
456  }
457  }
458  }
459 }
460 
461 static void mangle_object(
462  const c_declarationt &declaration,
463  const c_definest &defines,
464  const c_wranglert::objectt &object_config,
465  std::ostream &out)
466 {
467  if(object_config.remove_static)
468  {
469  for(auto &t : declaration.pre_declarator)
470  {
471  if(t.text == "static")
472  {
473  // we replace by white space
474  out << std::string(6, ' ');
475  }
476  else
477  out << t.text;
478  }
479  }
480  else
481  {
482  for(auto &t : declaration.pre_declarator)
483  out << t.text;
484  }
485 
486  for(auto &t : declaration.declarator)
487  out << t.text;
488  for(auto &t : declaration.post_declarator)
489  out << t.text;
490  for(auto &t : declaration.initializer)
491  out << t.text;
492 }
493 
494 static void mangle(
495  const c_declarationt &declaration,
496  const c_definest &defines,
497  const c_wranglert &config,
498  std::ostream &out)
499 {
500  auto name_opt = declaration.declared_identifier();
501  if(
502  declaration.is_function() && name_opt.has_value() && declaration.has_body())
503  {
504  for(const auto &entry : config.functions)
505  {
506  if(std::regex_match(name_opt->text, entry.first))
507  {
508  // we are to modify this function
509  mangle_function(declaration, defines, entry.second, out);
510 
511  return;
512  }
513  }
514  }
515  else if(!declaration.is_function() && name_opt.has_value())
516  {
517  for(const auto &entry : config.objects)
518  {
519  if(std::regex_match(name_opt->text, entry.first))
520  {
521  // we are to modify this function
522  mangle_object(declaration, defines, entry.second, out);
523 
524  return;
525  }
526  }
527  }
528 
529  // output
530  out << declaration;
531 }
532 
533 static std::string mangle(
534  const std::string &in,
535  const c_definest &defines,
536  const c_wranglert &config)
537 {
538  std::ostringstream out;
539  std::istringstream in_str(in);
540 
541  auto parsed = parse_c(in_str);
542 
543  for(const auto &declaration : parsed)
544  mangle(declaration, defines, config, out);
545 
546  return out.str();
547 }
548 
549 void c_wrangler(const jsont &config)
550 {
552 
553  c_wrangler.configure_sources(config);
554  c_wrangler.configure_functions(config);
555  c_wrangler.configure_objects(config);
556  c_wrangler.configure_output(config);
557 
558  for(auto &source_file : c_wrangler.source_files)
559  {
560  // first preprocess
561  auto preprocessed = preprocess(source_file, c_wrangler);
562 
563  // get the defines
564  auto defines = get_defines(source_file, c_wrangler);
565 
566  // now mangle
567  auto mangled = mangle(preprocessed, defines, c_wrangler);
568 
569  // now output
570  if(c_wrangler.output == "stdout" || c_wrangler.output.empty())
571  {
572  std::cout << mangled;
573  }
574  else
575  {
576  std::ofstream out(c_wrangler.output);
577  out << mangled;
578  }
579  }
580 }
c_defines
static void mangle_function(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::functiont &function_config, std::ostream &out)
Definition: c_wrangler.cpp:365
static c_definest get_defines(const std::string &source_file, const c_wranglert &config)
Definition: c_wrangler.cpp:344
static void mangle_object(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::objectt &object_config, std::ostream &out)
Definition: c_wrangler.cpp:461
void c_wrangler(const jsont &config)
Definition: c_wrangler.cpp:549
static std::string preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
Definition: c_wrangler.cpp:321
static void mangle(const c_declarationt &declaration, const c_definest &defines, const c_wranglert &config, std::ostream &out)
Definition: c_wrangler.cpp:494
C Wrangler.
This class maintains a representation of one assignment to the preprocessor macros in a C program.
Definition: c_defines.h:24
void parse(const std::string &)
Definition: c_defines.cpp:21
std::string text
Definition: ctoken.h:40
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
Definition: json.h:27
Thrown when some external system fails unexpectedly.
configt config
Definition: config.cpp:25
#define CPROVER_PREFIX
ctokenitt match_bracket(ctokenitt t, char open, char close)
Definition: ctokenit.cpp:33
ctokenit
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
c_translation_unitt parse_c(std::istream &in)
Mini C Parser.
nonstd::optional< T > optionalt
Definition: optional.h:35
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
Stream & join_strings(Stream &&os, const It b, const It e, const Delimiter &delimiter, TransformFunc &&transform_func)
Prints items to an stream, separated by a constant delimiter.
Definition: string_utils.h:62
bool has_body() const
bool is_function() const
tokenst post_declarator
Definition: mini_c_parser.h:29
tokenst initializer
Definition: mini_c_parser.h:30
optionalt< ctokent > declared_identifier() const
tokenst declarator
Definition: mini_c_parser.h:28
tokenst pre_declarator
Definition: mini_c_parser.h:27
assertiont(std::string _identifier, std::string _content)
Definition: c_wrangler.cpp:74
contract_clauset(std::string _clause, std::string _content)
Definition: c_wrangler.cpp:48
std::vector< loop_invariantt > loop_invariants
Definition: c_wrangler.cpp:84
std::vector< assertiont > assertions
Definition: c_wrangler.cpp:85
std::vector< contract_clauset > contract
Definition: c_wrangler.cpp:83
optionalt< std::string > stub
Definition: c_wrangler.cpp:86
loop_invariantt(std::string _loop_type, std::string _identifier, std::string _content)
Definition: c_wrangler.cpp:59
void configure_output(const jsont &)
Definition: c_wrangler.cpp:307
void configure_objects(const jsont &)
Definition: c_wrangler.cpp:251
objectst objects
Definition: c_wrangler.cpp:99
std::vector< std::string > source_files
Definition: c_wrangler.cpp:39
std::string output
Definition: c_wrangler.cpp:102
std::list< std::pair< std::regex, functiont > > functionst
Definition: c_wrangler.cpp:90
std::list< std::pair< std::regex, objectt > > objectst
Definition: c_wrangler.cpp:98
functionst functions
Definition: c_wrangler.cpp:91
void configure_sources(const jsont &)
Definition: c_wrangler.cpp:110
std::vector< std::string > includes
Definition: c_wrangler.cpp:40
std::vector< std::string > defines
Definition: c_wrangler.cpp:41
void configure_functions(const jsont &)
Definition: c_wrangler.cpp:161