cprover
ansi_c_entry_point.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "ansi_c_entry_point.h"
10 
11 #include <util/arith_tools.h>
12 #include <util/c_types.h>
13 #include <util/config.h>
14 #include <util/message.h>
15 #include <util/pointer_expr.h>
16 #include <util/range.h>
17 #include <util/symbol_table.h>
18 
20 
22 
24 #include "expr2c.h"
25 
27  const code_typet::parameterst &parameters,
28  code_blockt &init_code,
29  symbol_tablet &symbol_table,
30  const c_object_factory_parameterst &object_factory_parameters)
31 {
32  exprt::operandst main_arguments;
33  main_arguments.resize(parameters.size());
34 
35  for(std::size_t param_number=0;
36  param_number<parameters.size();
37  param_number++)
38  {
39  const code_typet::parametert &p=parameters[param_number];
40  const irep_idt base_name=p.get_base_name().empty()?
41  ("argument#"+std::to_string(param_number)):p.get_base_name();
42 
43  main_arguments[param_number] = c_nondet_symbol_factory(
44  init_code,
45  symbol_table,
46  base_name,
47  p.type(),
48  p.source_location(),
49  object_factory_parameters,
51  }
52 
53  return main_arguments;
54 }
55 
57  const symbolt &function,
58  code_blockt &init_code,
59  symbol_tablet &symbol_table)
60 {
61  bool has_return_value =
62  to_code_type(function.type).return_type() != void_type();
63 
64  if(has_return_value)
65  {
66  const symbolt &return_symbol = symbol_table.lookup_ref("return'");
67 
68  // record return value
69  init_code.add(code_outputt{
70  return_symbol.base_name, return_symbol.symbol_expr(), function.location});
71  }
72 
73  #if 0
74  std::size_t i=0;
75 
76  for(const auto &p : parameters)
77  {
78  if(p.get_identifier().empty())
79  continue;
80 
81  irep_idt identifier=p.get_identifier();
82 
83  const symbolt &symbol=symbol_table.lookup(identifier);
84 
85  if(symbol.type.id()==ID_pointer)
86  {
87  codet output(ID_output);
88  output.operands().resize(2);
89 
90  output.op0()=
94  from_integer(0, index_type())));
95  output.op1()=symbol.symbol_expr();
96  output.add_source_location()=p.source_location();
97 
98  init_code.add(std::move(output));
99  }
100 
101  i++;
102  }
103  #endif
104 }
105 
107  symbol_tablet &symbol_table,
108  message_handlert &message_handler,
109  const c_object_factory_parameterst &object_factory_parameters)
110 {
111  // check if entry point is already there
112  if(symbol_table.symbols.find(goto_functionst::entry_point())!=
113  symbol_table.symbols.end())
114  return false; // silently ignore
115 
116  irep_idt main_symbol;
117 
118  // find main symbol, if any is given
119  if(config.main.has_value())
120  {
121  std::list<irep_idt> matches;
122 
123  for(const auto &symbol_name_entry :
124  equal_range(symbol_table.symbol_base_map, config.main.value()))
125  {
126  // look it up
127  symbol_tablet::symbolst::const_iterator s_it =
128  symbol_table.symbols.find(symbol_name_entry.second);
129 
130  if(s_it==symbol_table.symbols.end())
131  continue;
132 
133  if(s_it->second.type.id()==ID_code)
134  matches.push_back(symbol_name_entry.second);
135  }
136 
137  if(matches.empty())
138  {
139  messaget message(message_handler);
140  message.error() << "main symbol '" << config.main.value() << "' not found"
141  << messaget::eom;
142  return true; // give up
143  }
144 
145  if(matches.size()>=2)
146  {
147  messaget message(message_handler);
148  message.error() << "main symbol '" << config.main.value()
149  << "' is ambiguous" << messaget::eom;
150  return true;
151  }
152 
153  main_symbol=matches.front();
154  }
155  else
156  main_symbol=ID_main;
157 
158  // look it up
159  symbol_tablet::symbolst::const_iterator s_it=
160  symbol_table.symbols.find(main_symbol);
161 
162  if(s_it==symbol_table.symbols.end())
163  return false; // give up silently
164 
165  const symbolt &symbol=s_it->second;
166 
167  // check if it has a body
168  if(symbol.value.is_nil())
169  {
170  messaget message(message_handler);
171  message.error() << "main symbol '" << id2string(main_symbol)
172  << "' has no body" << messaget::eom;
173  return false; // give up
174  }
175 
176  static_lifetime_init(symbol_table, symbol.location);
177 
179  symbol, symbol_table, message_handler, object_factory_parameters);
180 }
181 
192  const symbolt &symbol,
193  symbol_tablet &symbol_table,
194  message_handlert &message_handler,
195  const c_object_factory_parameterst &object_factory_parameters)
196 {
197  PRECONDITION(!symbol.value.is_nil());
198  code_blockt init_code;
199 
200  // add 'HIDE' label
201  init_code.add(code_labelt(CPROVER_PREFIX "HIDE", code_skipt()));
202 
203  // build call to initialization function
204 
205  {
206  symbol_tablet::symbolst::const_iterator init_it=
207  symbol_table.symbols.find(INITIALIZE_FUNCTION);
208 
209  if(init_it==symbol_table.symbols.end())
210  {
211  messaget message(message_handler);
212  message.error() << "failed to find " INITIALIZE_FUNCTION " symbol"
213  << messaget::eom;
214  return true;
215  }
216 
217  code_function_callt call_init(init_it->second.symbol_expr());
218  call_init.add_source_location()=symbol.location;
219 
220  init_code.add(std::move(call_init));
221  }
222 
223  // build call to main function
224 
225  code_function_callt call_main(symbol.symbol_expr());
226  call_main.add_source_location()=symbol.location;
227  call_main.function().add_source_location()=symbol.location;
228 
229  if(to_code_type(symbol.type).return_type() != void_type())
230  {
231  auxiliary_symbolt return_symbol;
232  return_symbol.mode=ID_C;
233  return_symbol.is_static_lifetime=false;
234  return_symbol.name="return'";
235  return_symbol.base_name = "return'";
236  return_symbol.type=to_code_type(symbol.type).return_type();
237 
238  symbol_table.add(return_symbol);
239  call_main.lhs()=return_symbol.symbol_expr();
240  }
241 
242  const code_typet::parameterst &parameters=
243  to_code_type(symbol.type).parameters();
244 
245  if(symbol.name==ID_main)
246  {
247  if(parameters.empty())
248  {
249  // ok
250  }
251  else if(parameters.size()==2 || parameters.size()==3)
252  {
253  namespacet ns(symbol_table);
254 
255  {
256  symbolt argc_symbol;
257 
258  argc_symbol.base_name = "argc'";
259  argc_symbol.name = "argc'";
260  argc_symbol.type = signed_int_type();
261  argc_symbol.is_static_lifetime = true;
262  argc_symbol.is_lvalue = true;
263  argc_symbol.mode = ID_C;
264 
265  auto r = symbol_table.insert(argc_symbol);
266  if(!r.second && r.first != argc_symbol)
267  {
268  messaget message(message_handler);
269  message.error() << "argc already exists but is not usable"
270  << messaget::eom;
271  return true;
272  }
273  }
274 
275  const symbolt &argc_symbol = ns.lookup("argc'");
276 
277  {
278  // we make the type of this thing an array of pointers
279  // need to add one to the size -- the array is terminated
280  // with NULL
281  const exprt one_expr = from_integer(1, argc_symbol.type);
282  const plus_exprt size_expr(argc_symbol.symbol_expr(), one_expr);
283  const array_typet argv_type(pointer_type(char_type()), size_expr);
284 
285  symbolt argv_symbol;
286 
287  argv_symbol.base_name = "argv'";
288  argv_symbol.name = "argv'";
289  argv_symbol.type = argv_type;
290  argv_symbol.is_static_lifetime = true;
291  argv_symbol.is_lvalue = true;
292  argv_symbol.mode = ID_C;
293 
294  auto r = symbol_table.insert(argv_symbol);
295  if(!r.second && r.first != argv_symbol)
296  {
297  messaget message(message_handler);
298  message.error() << "argv already exists but is not usable"
299  << messaget::eom;
300  return true;
301  }
302  }
303 
304  const symbolt &argv_symbol=ns.lookup("argv'");
305 
306  {
307  // Assume argc is at least zero. Note that we don't assume it's
308  // at least one, since this isn't guaranteed, as exemplified by
309  // https://www.qualys.com/2022/01/25/cve-2021-4034/pwnkit.txt
310  // The C standard only guarantees "The value of argc shall be
311  // nonnegative." and "argv[argc] shall be a null pointer."
312  exprt zero = from_integer(0, argc_symbol.type);
313 
315  argc_symbol.symbol_expr(), ID_ge, std::move(zero));
316 
317  init_code.add(code_assumet(std::move(ge)));
318  }
319 
320  if(config.ansi_c.max_argc.has_value())
321  {
322  exprt bound_expr =
323  from_integer(*config.ansi_c.max_argc, argc_symbol.type);
324 
326  argc_symbol.symbol_expr(), ID_le, std::move(bound_expr));
327 
328  init_code.add(code_assumet(std::move(le)));
329  }
330 
331  // record argc as an input
332  init_code.add(code_inputt{"argc", argc_symbol.symbol_expr()});
333 
334  if(parameters.size()==3)
335  {
336  {
337  symbolt envp_size_symbol;
338  envp_size_symbol.base_name = "envp_size'";
339  envp_size_symbol.name = "envp_size'";
340  envp_size_symbol.type = size_type();
341  envp_size_symbol.is_static_lifetime = true;
342  envp_size_symbol.mode = ID_C;
343 
344  if(!symbol_table.insert(std::move(envp_size_symbol)).second)
345  {
346  messaget message(message_handler);
347  message.error()
348  << "failed to insert envp_size symbol" << messaget::eom;
349  return true;
350  }
351  }
352 
353  const symbolt &envp_size_symbol=ns.lookup("envp_size'");
354 
355  {
356  symbolt envp_symbol;
357  envp_symbol.base_name = "envp'";
358  envp_symbol.name = "envp'";
359  envp_symbol.type = array_typet(
360  pointer_type(char_type()), envp_size_symbol.symbol_expr());
361  envp_symbol.is_static_lifetime = true;
362  envp_symbol.mode = ID_C;
363 
364  if(!symbol_table.insert(std::move(envp_symbol)).second)
365  {
366  messaget message(message_handler);
367  message.error() << "failed to insert envp symbol" << messaget::eom;
368  return true;
369  }
370  }
371 
372  // assume envp_size is INTMAX-1
373  const mp_integer max =
374  to_integer_bitvector_type(envp_size_symbol.type).largest();
375 
376  exprt max_minus_one=from_integer(max-1, envp_size_symbol.type);
377 
379  envp_size_symbol.symbol_expr(), ID_le, std::move(max_minus_one));
380 
381  init_code.add(code_assumet(le));
382  }
383 
384  {
385  /* zero_string doesn't work yet */
386 
387  /*
388  exprt zero_string(ID_zero_string, array_typet());
389  zero_string.type().subtype()=char_type();
390  zero_string.type().set(ID_size, "infinity");
391  const index_exprt index(zero_string, from_integer(0, uint_type()));
392  exprt address_of =
393  typecast_exprt::conditional_cast(
394  address_of_exprt(index, pointer_type(char_type())),
395  argv_symbol.type.subtype());
396 
397  // assign argv[*] to the address of a string-object
398  array_of_exprt array_of(address_of, argv_symbol.type);
399 
400  init_code.copy_to_operands(
401  code_assignt(argv_symbol.symbol_expr(), array_of));
402  */
403  }
404 
405  {
406  // assign argv[argc] to NULL
407  const null_pointer_exprt null(
409 
410  index_exprt index_expr(
411  argv_symbol.symbol_expr(), argc_symbol.symbol_expr());
412 
413  // disable bounds check on that one
414  index_expr.set(ID_C_bounds_check, false);
415 
416  init_code.add(code_frontend_assignt(index_expr, null));
417  }
418 
419  if(parameters.size()==3)
420  {
421  const symbolt &envp_symbol=ns.lookup("envp'");
422  const symbolt &envp_size_symbol=ns.lookup("envp_size'");
423 
424  // assume envp[envp_size] is NULL
425  null_pointer_exprt null(
427 
428  index_exprt index_expr(
429  envp_symbol.symbol_expr(), envp_size_symbol.symbol_expr());
430  // disable bounds check on that one
431  index_expr.set(ID_C_bounds_check, false);
432 
433  equal_exprt is_null(std::move(index_expr), std::move(null));
434 
435  init_code.add(code_assumet(is_null));
436  }
437 
438  {
439  exprt::operandst &operands=call_main.arguments();
440 
441  if(parameters.size()==3)
442  operands.resize(3);
443  else
444  operands.resize(2);
445 
446  exprt &op0=operands[0];
447  exprt &op1=operands[1];
448 
450  argc_symbol.symbol_expr(), parameters[0].type());
451 
452  {
453  index_exprt index_expr(
454  argv_symbol.symbol_expr(), from_integer(0, c_index_type()));
455 
456  // disable bounds check on that one
457  index_expr.set(ID_C_bounds_check, false);
458 
459  const pointer_typet &pointer_type =
460  to_pointer_type(parameters[1].type());
461 
463  address_of_exprt(index_expr), pointer_type);
464  }
465 
466  // do we need envp?
467  if(parameters.size()==3)
468  {
469  const symbolt &envp_symbol=ns.lookup("envp'");
470 
471  index_exprt index_expr(
472  envp_symbol.symbol_expr(), from_integer(0, c_index_type()));
473 
474  const pointer_typet &pointer_type =
475  to_pointer_type(parameters[2].type());
476 
477  operands[2] = typecast_exprt::conditional_cast(
478  address_of_exprt(index_expr), pointer_type);
479  }
480  }
481  }
482  else
483  {
484  const namespacet ns{symbol_table};
485  const std::string main_signature = type2c(symbol.type, ns);
486  messaget message(message_handler);
487  message.error().source_location = symbol.location;
488  message.error() << "'main' with signature '" << main_signature
489  << "' found,"
490  << " but expecting one of:\n"
491  << " int main(void)\n"
492  << " int main(int argc, char *argv[])\n"
493  << " int main(int argc, char *argv[], char *envp[])\n"
494  << "If this is a non-standard main entry point please "
495  "provide a custom\n"
496  << "entry function and use --function instead"
497  << messaget::eom;
498  return true;
499  }
500  }
501  else
502  {
503  // produce nondet arguments
505  parameters, init_code, symbol_table, object_factory_parameters);
506  }
507 
508  init_code.add(std::move(call_main));
509 
510  // TODO: add read/modified (recursively in call graph) globals as INPUT/OUTPUT
511 
512  record_function_outputs(symbol, init_code, symbol_table);
513 
514  // now call destructor functions (a GCC extension)
515 
516  for(const auto &symbol_table_entry : symbol_table.symbols)
517  {
518  const symbolt &symbol = symbol_table_entry.second;
519 
520  if(symbol.type.id() != ID_code)
521  continue;
522 
523  const code_typet &code_type = to_code_type(symbol.type);
524  if(
525  code_type.return_type().id() == ID_destructor &&
526  code_type.parameters().empty())
527  {
528  code_function_callt destructor_call(symbol.symbol_expr());
529  destructor_call.add_source_location() = symbol.location;
530  init_code.add(std::move(destructor_call));
531  }
532  }
533 
534  // add the entry point symbol
535  symbolt new_symbol;
536 
537  new_symbol.name=goto_functionst::entry_point();
538  new_symbol.type = code_typet({}, void_type());
539  new_symbol.value.swap(init_code);
540  new_symbol.mode=symbol.mode;
541 
542  if(!symbol_table.insert(std::move(new_symbol)).second)
543  {
544  messaget message(message_handler);
545  message.error() << "failed to insert main symbol" << messaget::eom;
546  return true;
547  }
548 
549  return false;
550 }
@ AUTOMATIC_LOCAL
Allocate local objects with automatic lifetime.
exprt::operandst build_function_environment(const code_typet::parameterst &parameters, code_blockt &init_code, symbol_tablet &symbol_table, const c_object_factory_parameterst &object_factory_parameters)
bool generate_ansi_c_start_function(const symbolt &symbol, symbol_tablet &symbol_table, message_handlert &message_handler, const c_object_factory_parameterst &object_factory_parameters)
Generate a _start function for a specific function.
void record_function_outputs(const symbolt &function, code_blockt &init_code, symbol_tablet &symbol_table)
bool ansi_c_entry_point(symbol_tablet &symbol_table, message_handlert &message_handler, const c_object_factory_parameterst &object_factory_parameters)
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
symbol_exprt c_nondet_symbol_factory(code_blockt &init_code, symbol_tablet &symbol_table, const irep_idt base_name, const typet &type, const source_locationt &loc, const c_object_factory_parameterst &object_factory_parameters, const lifetimet lifetime)
Creates a symbol and generates code so that it can vary over all possible values for its type.
C Nondet Symbol Factory.
bitvector_typet index_type()
Definition: c_types.cpp:22
unsignedbv_typet size_type()
Definition: c_types.cpp:68
empty_typet void_type()
Definition: c_types.cpp:263
signedbv_typet signed_int_type()
Definition: c_types.cpp:40
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:253
bitvector_typet char_type()
Definition: c_types.cpp:124
bitvector_typet c_index_type()
Definition: c_types.cpp:16
Operator to return the address of an object.
Definition: pointer_expr.h:361
Arrays with given size.
Definition: std_types.h:763
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:777
Internally generated symbol table entryThis is a symbol generated as part of translation to or modifi...
Definition: symbol.h:147
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
An assumption, which must hold in subsequent code.
Definition: std_code.h:217
A codet representing sequential composition of program statements.
Definition: std_code.h:130
void add(const codet &code)
Definition: std_code.h:168
A codet representing an assignment in the program.
Definition: std_code.h:24
codet representation of a function call statement.
A codet representing the declaration that an input of a particular description has a value which corr...
codet representation of a label for branch targets.
Definition: std_code.h:959
A codet representing the declaration that an output of a particular description has a value which cor...
A codet representing a skip statement.
Definition: std_code.h:322
const irep_idt & get_base_name() const
Definition: std_types.h:595
Base type of functions.
Definition: std_types.h:539
std::vector< parametert > parameterst
Definition: std_types.h:541
const typet & return_type() const
Definition: std_types.h:645
const parameterst & parameters() const
Definition: std_types.h:655
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
exprt & op1()
Definition: expr.h:102
exprt & op0()
Definition: expr.h:99
optionalt< std::string > main
Definition: config.h:326
struct configt::ansi_ct ansi_c
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
bool empty() const
Definition: dstring.h:88
Equality.
Definition: std_expr.h:1225
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
source_locationt & add_source_location()
Definition: expr.h:235
const source_locationt & source_location() const
Definition: expr.h:230
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
Array index operator.
Definition: std_expr.h:1328
mp_integer largest() const
Return the largest value that can be represented using this type.
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:420
const irep_idt & id() const
Definition: irep.h:396
void swap(irept &irep)
Definition: irep.h:442
bool is_nil() const
Definition: irep.h:376
source_locationt source_location
Definition: message.h:247
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
mstreamt & error() const
Definition: message.h:399
static eomt eom
Definition: message.h:297
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:138
The null pointer constant.
Definition: pointer_expr.h:723
The plus expression Associativity is not specified.
Definition: std_expr.h:914
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
Definition: pointer_expr.h:24
const symbol_base_mapt & symbol_base_map
Read-only field, used to look up symbol names given their base names.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:14
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
bool is_static_lifetime
Definition: symbol.h:65
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
bool is_lvalue
Definition: symbol.h:66
exprt value
Initial value of symbol.
Definition: symbol.h:34
irep_idt mode
Language mode.
Definition: symbol.h:49
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:1928
configt config
Definition: config.cpp:25
#define CPROVER_PREFIX
std::string type2c(const typet &type, const namespacet &ns, const expr2c_configurationt &configuration)
Definition: expr2c.cpp:4027
Goto Programs with Functions.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
static int8_t r
Definition: irep_hash.h:60
API to expression classes for Pointers.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:79
Ranges: pair of begin and end iterators, which can be initialized from containers,...
ranget< typename multimapt::const_iterator > equal_range(const multimapt &multimap, const typename multimapt::key_type &key)
Utility function to make equal_range method of multimap easier to use by returning a ranget object.
Definition: range.h:541
BigInt mp_integer
Definition: smt_terms.h:12
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
static optionalt< codet > static_lifetime_init(const irep_idt &identifier, symbol_tablet &symbol_table)
#define INITIALIZE_FUNCTION
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:744
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:832
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
static exprt is_null(const array_string_exprt &string, array_poolt &array_pool)
Expression which is true when the string is equal to the literal "null".
optionalt< mp_integer > max_argc
Maximum value of argc, which is operating-systems dependent: Windows limits the number of characters ...
Definition: config.h:265
Author: Diffblue Ltd.