cprover
symex_dereference.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Symbolic Execution of ANSI-C
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "goto_symex.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/byte_operators.h>
16 #include <util/c_types.h>
17 #include <util/exception_utils.h>
18 #include <util/expr_iterator.h>
19 #include <util/expr_util.h>
20 #include <util/invariant.h>
22 
24 
26 
41  const exprt &expr,
42  statet &state,
43  bool keep_array)
44 {
45  exprt result;
46 
47  if(expr.id()==ID_byte_extract_little_endian ||
48  expr.id()==ID_byte_extract_big_endian)
49  {
50  // address_of(byte_extract(op, offset, t)) is
51  // address_of(op) + offset with adjustments for arrays
52 
54 
55  // recursive call
56  result = address_arithmetic(be.op(), state, keep_array);
57 
58  if(be.op().type().id() == ID_array && result.id() == ID_address_of)
59  {
61 
62  // turn &a of type T[i][j] into &(a[0][0])
63  for(const typet *t = &(a.type().subtype());
64  t->id() == ID_array && expr.type() != *t;
65  t = &(t->subtype()))
67  }
68 
69  // do (expr.type() *)(((char *)op)+offset)
70  result=typecast_exprt(result, pointer_type(char_type()));
71 
72  // there could be further dereferencing in the offset
73  exprt offset=be.offset();
74  dereference_rec(offset, state, false);
75 
76  result=plus_exprt(result, offset);
77 
78  // treat &array as &array[0]
79  const typet &expr_type = expr.type();
80  typet dest_type_subtype;
81 
82  if(expr_type.id()==ID_array && !keep_array)
83  dest_type_subtype=expr_type.subtype();
84  else
85  dest_type_subtype=expr_type;
86 
87  result=typecast_exprt(result, pointer_type(dest_type_subtype));
88  }
89  else if(expr.id()==ID_index ||
90  expr.id()==ID_member)
91  {
93  ode.build(expr, ns);
94 
95  const byte_extract_exprt be(
96  byte_extract_id(), ode.root_object(), ode.offset(), expr.type());
97 
98  // recursive call
99  result = address_arithmetic(be, state, keep_array);
100 
101  do_simplify(result);
102  }
103  else if(expr.id()==ID_dereference)
104  {
105  // ANSI-C guarantees &*p == p no matter what p is,
106  // even if it's complete garbage
107  // just grab the pointer, but be wary of further dereferencing
108  // in the pointer itself
109  result=to_dereference_expr(expr).pointer();
110  dereference_rec(result, state, false);
111  }
112  else if(expr.id()==ID_if)
113  {
114  if_exprt if_expr=to_if_expr(expr);
115 
116  // the condition is not an address
117  dereference_rec(if_expr.cond(), state, false);
118 
119  // recursive call
120  if_expr.true_case() =
121  address_arithmetic(if_expr.true_case(), state, keep_array);
122  if_expr.false_case() =
123  address_arithmetic(if_expr.false_case(), state, keep_array);
124 
125  result=if_expr;
126  }
127  else if(expr.id()==ID_symbol ||
128  expr.id()==ID_string_constant ||
129  expr.id()==ID_label ||
130  expr.id()==ID_array)
131  {
132  // give up, just dereference
133  result=expr;
134  dereference_rec(result, state, false);
135 
136  // turn &array into &array[0]
137  if(result.type().id() == ID_array && !keep_array)
138  result=index_exprt(result, from_integer(0, index_type()));
139 
140  // handle field-sensitive SSA symbol
141  mp_integer offset=0;
142  if(is_ssa_expr(expr))
143  {
144  auto offset_opt = compute_pointer_offset(expr, ns);
145  PRECONDITION(offset_opt.has_value());
146  offset = *offset_opt;
147  }
148 
149  if(offset>0)
150  {
151  const byte_extract_exprt be(
152  byte_extract_id(),
153  to_ssa_expr(expr).get_l1_object(),
154  from_integer(offset, index_type()),
155  expr.type());
156 
157  result = address_arithmetic(be, state, keep_array);
158 
159  do_simplify(result);
160  }
161  else
162  result=address_of_exprt(result);
163  }
164  else if(expr.id() == ID_typecast)
165  {
166  const typecast_exprt &tc_expr = to_typecast_expr(expr);
167 
168  result = address_arithmetic(tc_expr.op(), state, keep_array);
169 
170  // treat &array as &array[0]
171  const typet &expr_type = expr.type();
172  typet dest_type_subtype;
173 
174  if(expr_type.id() == ID_array && !keep_array)
175  dest_type_subtype = expr_type.subtype();
176  else
177  dest_type_subtype = expr_type;
178 
179  result = typecast_exprt(result, pointer_type(dest_type_subtype));
180  }
181  else
183  "goto_symext::address_arithmetic does not handle " + expr.id_string());
184 
185  const typet &expr_type = expr.type();
186  INVARIANT(
187  (expr_type.id() == ID_array && !keep_array) ||
188  pointer_type(expr_type) == result.type(),
189  "either non-persistent array or pointer to result");
190 
191  return result;
192 }
193 
201 void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
202 {
203  if(expr.id()==ID_dereference)
204  {
205  bool expr_is_not_null = false;
206 
207  if(state.threads.size() == 1)
208  {
209  const irep_idt &expr_function = state.source.function_id;
210  if(!expr_function.empty())
211  {
212  const dereference_exprt to_check =
214 
215  expr_is_not_null = path_storage.safe_pointers.at(expr_function)
216  .is_safe_dereference(to_check, state.source.pc);
217  }
218  }
219 
220  exprt tmp1;
221  tmp1.swap(to_dereference_expr(expr).pointer());
222 
223  // first make sure there are no dereferences in there
224  dereference_rec(tmp1, state, false);
225 
226  // Depending on the nature of the pointer expression, the recursive deref
227  // operation might have introduced a construct such as
228  // (x == &o1 ? o1 : o2).field, in which case we should simplify to push the
229  // member operator inside the if, then apply field-sensitivity to yield
230  // (x == &o1 ? o1..field : o2..field). value_set_dereferencet can then
231  // apply the dereference operation to each of o1..field and o2..field
232  // independently, as it special-cases the ternary conditional operator.
233  // There may also be index operators in tmp1 which can now be resolved to
234  // constant array cell references, so we replace symbols with constants
235  // first, hoping for a transformation such as
236  // (x == &o1 ? o1 : o2)[idx] =>
237  // (x == &o1 ? o1 : o2)[2] =>
238  // (x == &o1 ? o1[[2]] : o2[[2]])
239  // Note we don't L2 rename non-constant symbols at this point, because the
240  // value-set works in terms of L1 names and we don't want to ask it to
241  // dereference an L2 pointer, which it would not have an entry for.
242 
243  tmp1 = state.rename<L1_WITH_CONSTANT_PROPAGATION>(tmp1, ns).get();
244 
245  do_simplify(tmp1);
246 
248  {
249  // make sure simplify has not re-introduced any dereferencing that
250  // had previously been cleaned away
251  INVARIANT(
252  !has_subexpr(tmp1, ID_dereference),
253  "simplify re-introduced dereferencing");
254  }
255 
256  tmp1 = state.field_sensitivity.apply(ns, state, std::move(tmp1), false);
257 
258  // we need to set up some elaborate call-backs
259  symex_dereference_statet symex_dereference_state(state, ns);
260 
262  ns,
263  state.symbol_table,
264  symex_dereference_state,
266  expr_is_not_null,
267  log);
268 
269  // std::cout << "**** " << format(tmp1) << '\n';
270  exprt tmp2 =
271  dereference.dereference(tmp1, symex_config.show_points_to_sets);
272  // std::cout << "**** " << format(tmp2) << '\n';
273 
274  expr.swap(tmp2);
275 
276  // this may yield a new auto-object
277  trigger_auto_object(expr, state);
278  }
279  else if(
280  expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
281  to_array_type(to_index_expr(expr).array().type()).size().is_zero())
282  {
283  // This is an expression of the form x.a[i],
284  // where a is a zero-sized array. This gets
285  // re-written into *(&x.a+i)
286 
287  index_exprt index_expr=to_index_expr(expr);
288 
289  address_of_exprt address_of_expr(index_expr.array());
290  address_of_expr.type()=pointer_type(expr.type());
291 
292  dereference_exprt tmp{plus_exprt{address_of_expr, index_expr.index()}};
294 
295  // recursive call
296  dereference_rec(tmp, state, write);
297 
298  expr.swap(tmp);
299  }
300  else if(expr.id()==ID_index &&
301  to_index_expr(expr).array().type().id()==ID_pointer)
302  {
303  // old stuff, will go away
304  UNREACHABLE;
305  }
306  else if(expr.id()==ID_address_of)
307  {
308  address_of_exprt &address_of_expr=to_address_of_expr(expr);
309 
310  exprt &object=address_of_expr.object();
311 
312  expr = address_arithmetic(
313  object,
314  state,
315  to_pointer_type(expr.type()).subtype().id() == ID_array);
316  }
317  else if(expr.id()==ID_typecast)
318  {
319  exprt &tc_op=to_typecast_expr(expr).op();
320 
321  // turn &array into &array[0] when casting to pointer-to-element-type
322  if(
323  tc_op.id() == ID_address_of &&
324  to_address_of_expr(tc_op).object().type().id() == ID_array &&
325  expr.type() ==
326  pointer_type(to_address_of_expr(tc_op).object().type().subtype()))
327  {
328  expr=
330  index_exprt(
331  to_address_of_expr(tc_op).object(),
332  from_integer(0, index_type())));
333 
334  dereference_rec(expr, state, write);
335  }
336  else
337  {
338  dereference_rec(tc_op, state, write);
339  }
340  }
341  else
342  {
343  Forall_operands(it, expr)
344  dereference_rec(*it, state, write);
345  }
346 }
347 
348 static exprt
349 apply_to_objects_in_dereference(exprt e, const std::function<exprt(exprt)> &f)
350 {
351  if(auto deref = expr_try_dynamic_cast<dereference_exprt>(e))
352  {
353  deref->op() = f(std::move(deref->op()));
354  return *deref;
355  }
356 
357  for(auto &sub : e.operands())
358  sub = apply_to_objects_in_dereference(std::move(sub), f);
359  return e;
360 }
361 
399 void goto_symext::dereference(exprt &expr, statet &state, bool write)
400 {
401  PRECONDITION(!state.call_stack().empty());
402 
403  // Symbols whose address is taken need to be renamed to level 1
404  // in order to distinguish addresses of local variables
405  // from different frames.
406  expr = apply_to_objects_in_dereference(std::move(expr), [&](exprt e) {
407  return state.field_sensitivity.apply(
408  ns, state, state.rename<L1>(std::move(e), ns).get(), false);
409  });
410 
411  // start the recursion!
412  dereference_rec(expr, state, write);
413  // dereferencing may introduce new symbol_exprt
414  // (like __CPROVER_memory)
415  expr = state.rename<L1>(std::move(expr), ns).get();
416 
417  // Dereferencing is likely to introduce new member-of-if constructs --
418  // for example, "x->field" may have become "(x == &o1 ? o1 : o2).field."
419  // Run expression simplification, which converts that to
420  // (x == &o1 ? o1.field : o2.field))
421  // before applying field sensitivity. Field sensitivity can then turn such
422  // field-of-symbol expressions into atomic SSA expressions instead of having
423  // to rewrite all of 'o1' otherwise.
424  // Even without field sensitivity this can be beneficial: for example,
425  // "(b ? s1 : s2).member := X" results in
426  // (b ? s1 : s2) := (b ? s1 : s2) with (member := X)
427  // and then
428  // s1 := b ? ((b ? s1 : s2) with (member := X)) : s1
429  // when all we need is
430  // s1 := s1 with (member := X) [and guard b]
431  // s2 := s2 with (member := X) [and guard !b]
432  do_simplify(expr);
433 
435  {
436  // make sure simplify has not re-introduced any dereferencing that
437  // had previously been cleaned away
438  INVARIANT(
439  !has_subexpr(expr, ID_dereference),
440  "simplify re-introduced dereferencing");
441  }
442 
443  expr = state.field_sensitivity.apply(ns, state, std::move(expr), write);
444 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
Expression classes for byte-level operators.
const byte_extract_exprt & to_byte_extract_expr(const exprt &expr)
bitvector_typet index_type()
Definition: c_types.cpp:16
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
bitvector_typet char_type()
Definition: c_types.cpp:114
Operator to return the address of an object.
Definition: pointer_expr.h:200
exprt & object()
Definition: pointer_expr.h:209
Expression of type type extracted from some object op starting at position offset (given in number of...
Operator to dereference a pointer.
Definition: pointer_expr.h:256
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
bool empty() const
Definition: dstring.h:88
Base class for all expressions.
Definition: expr.h:54
source_locationt & add_source_location()
Definition: expr.h:239
const source_locationt & source_location() const
Definition: expr.h:234
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:96
NODISCARD exprt apply(const namespacet &ns, goto_symex_statet &state, exprt expr, bool write) const
Turn an expression expr into a field-sensitive SSA expression.
Central data structure: state.
NODISCARD renamedt< exprt, level > rename(exprt expr, const namespacet &ns)
Rewrites symbol expressions in exprt, applying a suffix to each symbol reflecting its most recent ver...
call_stackt & call_stack()
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
field_sensitivityt field_sensitivity
symex_targett::sourcet source
std::vector< threadt > threads
irep_idt language_mode
language_mode: ID_java, ID_C or another language identifier if we know the source language in use,...
Definition: goto_symex.h:239
path_storaget & path_storage
Symbolic execution paths to be resumed later.
Definition: goto_symex.h:797
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition: goto_symex.h:256
void trigger_auto_object(const exprt &, statet &)
exprt address_arithmetic(const exprt &, statet &, bool keep_array)
Transforms an lvalue expression by replacing any dereference operations it contains with explicit ref...
virtual void do_simplify(exprt &expr)
Definition: goto_symex.cpp:33
messaget log
The messaget to write log messages to.
Definition: goto_symex.h:276
const symex_configt symex_config
The configuration to use for this symbolic execution.
Definition: goto_symex.h:183
virtual void dereference(exprt &, statet &, bool write)
Replace all dereference operations within expr with explicit references to the objects they may refer...
void dereference_rec(exprt &, statet &, bool write)
If expr is a dereference_exprt, replace it with explicit references to the objects it may point to.
The trinary if-then-else operator.
Definition: std_expr.h:2087
exprt & true_case()
Definition: std_expr.h:2114
exprt & false_case()
Definition: std_expr.h:2124
exprt & cond()
Definition: std_expr.h:2104
Array index operator.
Definition: std_expr.h:1243
exprt & array()
Definition: std_expr.h:1259
exprt & index()
Definition: std_expr.h:1269
const std::string & id_string() const
Definition: irep.h:410
const irep_idt & id() const
Definition: irep.h:407
void swap(irept &irep)
Definition: irep.h:452
Split an expression into a base object and a (byte) offset.
Definition: pointer_expr.h:21
const exprt & root_object() const
void build(const exprt &expr, const namespacet &ns)
Given an expression expr, attempt to find the underlying object it represents by skipping over type c...
std::unordered_map< irep_idt, local_safe_pointerst > safe_pointers
Map function identifiers to local_safe_pointerst instances.
Definition: path_storage.h:100
The plus expression Associativity is not specified.
Definition: std_expr.h:831
Callback object that goto_symext::dereference_rec provides to value_set_dereferencet to provide value...
Semantic type conversion.
Definition: std_expr.h:1781
The type of an expression, extends irept.
Definition: type.h:28
const typet & subtype() const
Definition: type.h:47
const exprt & op() const
Definition: std_expr.h:294
Thrown when we encounter an instruction, parameters to an instruction etc.
Wrapper for a function dereferencing pointer expressions using a value set.
#define Forall_operands(it, expr)
Definition: expr.h:25
Forward depth-first search iterators These iterators' copy operations are expensive,...
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
Definition: expr_util.cpp:140
Deprecated expression utility functions.
Symbolic Execution.
BigInt mp_integer
Definition: mp_arith.h:19
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
Definition: pointer_expr.h:312
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:237
optionalt< mp_integer > compute_pointer_offset(const exprt &expr, const namespacet &ns)
Pointer Logic.
@ L1_WITH_CONSTANT_PROPAGATION
Definition: renamed.h:19
@ L1
Definition: renamed.h:18
exprt get_original_name(exprt expr)
Undo all levels of renaming.
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:504
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition: ssa_expr.h:145
bool is_ssa_expr(const exprt &expr)
Definition: ssa_expr.h:125
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2152
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition: std_expr.h:1815
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1297
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:1018
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1533
bool show_points_to_sets
Definition: symex_config.h:49
bool run_validation_checks
Should the additional validation checks be run? If this flag is set the checks for renaming (both lev...
Definition: symex_config.h:44
goto_programt::const_targett pc
Definition: symex_target.h:43
static exprt apply_to_objects_in_dereference(exprt e, const std::function< exprt(exprt)> &f)
Symbolic Execution of ANSI-C.
irep_idt byte_extract_id()
Pointer Dereferencing.