cprover
remove_instanceof.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove Instance-of Operators
4 
5 Author: Chris Smowton, chris.smowton@diffblue.com
6 
7 \*******************************************************************/
8 
11 
12 #include "remove_instanceof.h"
13 #include "java_utils.h"
14 
18 
20 
21 #include <util/arith_tools.h>
22 
23 #include <sstream>
24 
26 {
27 public:
36  {
37  }
38 
39  // Lower instanceof for a single function
40  bool lower_instanceof(const irep_idt &function_identifier, goto_programt &);
41 
42  // Lower instanceof for a single instruction
43  bool lower_instanceof(
44  const irep_idt &function_identifier,
45  goto_programt &,
47 
48 protected:
53 
54  bool lower_instanceof(
55  const irep_idt &function_identifier,
56  exprt &,
57  goto_programt &,
59 };
60 
71  const exprt &classid_field,
72  const irep_idt &target_type,
73  const class_hierarchyt &class_hierarchy)
74 {
75  std::vector<irep_idt> children =
76  class_hierarchy.get_children_trans(target_type);
77  children.push_back(target_type);
78  // Sort alphabetically to make order of generated disjuncts
79  // independent of class loading order
80  std::sort(
81  children.begin(), children.end(), [](const irep_idt &a, const irep_idt &b) {
82  return a.compare(b) < 0;
83  });
84 
85  exprt::operandst or_ops;
86  for(const auto &class_name : children)
87  {
88  constant_exprt class_expr(class_name, string_typet());
89  equal_exprt test(classid_field, class_expr);
90  or_ops.push_back(test);
91  }
92 
93  return disjunction(or_ops);
94 }
95 
105  const irep_idt &function_identifier,
106  exprt &expr,
107  goto_programt &goto_program,
108  goto_programt::targett this_inst)
109 {
110  if(expr.id()!=ID_java_instanceof)
111  {
112  bool changed = false;
113  Forall_operands(it, expr)
114  changed |=
115  lower_instanceof(function_identifier, *it, goto_program, this_inst);
116  return changed;
117  }
118 
119  INVARIANT(
120  expr.operands().size()==2,
121  "java_instanceof should have two operands");
122 
123  const exprt &check_ptr = to_binary_expr(expr).op0();
124  INVARIANT(
125  check_ptr.type().id()==ID_pointer,
126  "instanceof first operand should be a pointer");
127 
128  const exprt &target_arg = to_binary_expr(expr).op1();
129  INVARIANT(
130  target_arg.id()==ID_type,
131  "instanceof second operand should be a type");
132 
133  INVARIANT(
134  target_arg.type().id() == ID_struct_tag,
135  "instanceof second operand should have a simple type");
136 
137  const auto &target_type = to_struct_tag_type(target_arg.type());
138 
139  const auto underlying_type_and_dimension =
141 
142  bool target_type_is_reference_array =
143  underlying_type_and_dimension.second >= 1 &&
144  can_cast_type<java_reference_typet>(underlying_type_and_dimension.first);
145 
146  // If we're casting to a reference array type, check
147  // @class_identifier == "java::array[reference]" &&
148  // @array_dimension == target_array_dimension &&
149  // @array_element_type (subtype of) target_array_element_type
150  // Otherwise just check
151  // @class_identifier (subtype of) target_type
152 
153  // Exception: when the target type is Object, nothing needs checking; when
154  // it is Object[], Object[][] etc, then we check that
155  // @array_dimension >= target_array_dimension (because
156  // String[][] (subtype of) Object[] for example) and don't check the element
157  // type.
158 
159  // All tests are made directly against a pointer to the object whose type is
160  // queried. By operating directly on a pointer rather than using a temporary
161  // (x->@clsid == "A" rather than id = x->@clsid; id == "A") we enable symex's
162  // value-set filtering to discard no-longer-viable candidates for "x" (in the
163  // case where 'x' is a symbol, not a general expression like x->y->@clsid).
164  // This means there are more clean dereference operations (ones where there
165  // is no possibility of reading a null, invalid or incorrectly-typed object),
166  // and less pessimistic aliasing assumptions possibly causing goto-symex to
167  // explore in-fact-unreachable paths.
168 
169  // In all cases require the input pointer is not null for any cast to succeed:
170 
171  std::vector<exprt> test_conjuncts;
172  test_conjuncts.push_back(notequal_exprt(
173  check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type()))));
174 
175  auto jlo = to_struct_tag_type(java_lang_object_type().subtype());
176 
177  exprt object_class_identifier_field =
178  get_class_identifier_field(check_ptr, jlo, ns);
179 
180  if(target_type_is_reference_array)
181  {
182  const auto &underlying_type =
183  to_struct_tag_type(underlying_type_and_dimension.first.subtype());
184 
185  test_conjuncts.push_back(equal_exprt(
186  object_class_identifier_field,
188 
189  exprt object_array_dimension = get_array_dimension_field(check_ptr);
190  constant_exprt target_array_dimension = from_integer(
191  underlying_type_and_dimension.second, object_array_dimension.type());
192 
193  if(underlying_type == jlo)
194  {
195  test_conjuncts.push_back(binary_relation_exprt(
196  object_array_dimension, ID_ge, target_array_dimension));
197  }
198  else
199  {
200  test_conjuncts.push_back(
201  equal_exprt(object_array_dimension, target_array_dimension));
202  test_conjuncts.push_back(subtype_expr(
203  get_array_element_type_field(check_ptr),
204  underlying_type.get_identifier(),
205  class_hierarchy));
206  }
207  }
208  else if(target_type != jlo)
209  {
210  test_conjuncts.push_back(subtype_expr(
211  get_class_identifier_field(check_ptr, jlo, ns),
212  target_type.get_identifier(),
213  class_hierarchy));
214  }
215 
216  expr = conjunction(test_conjuncts);
217 
218  return true;
219 }
220 
221 static bool contains_instanceof(const exprt &e)
222 {
223  if(e.id() == ID_java_instanceof)
224  return true;
225 
226  for(const exprt &subexpr : e.operands())
227  {
228  if(contains_instanceof(subexpr))
229  return true;
230  }
231 
232  return false;
233 }
234 
243  const irep_idt &function_identifier,
244  goto_programt &goto_program,
245  goto_programt::targett target)
246 {
247  if(target->is_target() &&
248  (contains_instanceof(target->code) || contains_instanceof(target->guard)))
249  {
250  // If this is a branch target, add a skip beforehand so we can splice new
251  // GOTO programs before the target instruction without inserting into the
252  // wrong basic block.
253  goto_program.insert_before_swap(target);
254  target->turn_into_skip();
255  // Actually alter the now-moved instruction:
256  ++target;
257  }
258 
259  return lower_instanceof(
260  function_identifier, target->code, goto_program, target) |
262  function_identifier, target->guard, goto_program, target);
263 }
264 
272  const irep_idt &function_identifier,
273  goto_programt &goto_program)
274 {
275  bool changed=false;
276  for(goto_programt::instructionst::iterator target=
277  goto_program.instructions.begin();
278  target!=goto_program.instructions.end();
279  ++target)
280  {
281  changed =
282  lower_instanceof(function_identifier, goto_program, target) || changed;
283  }
284  if(!changed)
285  return false;
286  goto_program.update();
287  return true;
288 }
289 
300  const irep_idt &function_identifier,
301  goto_programt::targett target,
302  goto_programt &goto_program,
303  symbol_table_baset &symbol_table,
304  const class_hierarchyt &class_hierarchy,
305  message_handlert &message_handler)
306 {
307  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
308  rem.lower_instanceof(function_identifier, goto_program, target);
309 }
310 
320  const irep_idt &function_identifier,
322  symbol_table_baset &symbol_table,
323  const class_hierarchyt &class_hierarchy,
324  message_handlert &message_handler)
325 {
326  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
327  rem.lower_instanceof(function_identifier, function.body);
328 }
329 
338  goto_functionst &goto_functions,
339  symbol_table_baset &symbol_table,
340  const class_hierarchyt &class_hierarchy,
341  message_handlert &message_handler)
342 {
343  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
344  bool changed=false;
345  for(auto &f : goto_functions.function_map)
346  changed = rem.lower_instanceof(f.first, f.second.body) || changed;
347  if(changed)
348  goto_functions.compute_location_numbers();
349 }
350 
360  goto_modelt &goto_model,
361  const class_hierarchyt &class_hierarchy,
362  message_handlert &message_handler)
363 {
365  goto_model.goto_functions,
366  goto_model.symbol_table,
367  class_hierarchy,
368  message_handler);
369 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
Class Hierarchy.
exprt get_class_identifier_field(const exprt &this_expr_in, const struct_tag_typet &suggested_type, const namespacet &ns)
Extract class identifier.
exprt & op1()
Definition: expr.h:106
exprt & op0()
Definition: expr.h:103
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:675
Non-graph-based representation of the class hierarchy.
idst get_children_trans(const irep_idt &id) const
A constant literal expression.
Definition: std_expr.h:2668
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
Equality.
Definition: std_expr.h:1140
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:96
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
void compute_location_numbers()
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:74
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:556
void update()
Update all indices.
void insert_before_swap(targett target)
Insertion that preserves jumps to "target".
Definition: goto_program.h:577
instructionst::iterator targett
Definition: goto_program.h:550
const irep_idt & id() const
Definition: irep.h:407
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:92
Disequality.
Definition: std_expr.h:1198
The null pointer constant.
Definition: std_expr.h:2751
bool lower_instanceof(const irep_idt &function_identifier, goto_programt &)
Replace every instanceof in the passed function body with an explicit class-identifier test.
message_handlert & message_handler
remove_instanceoft(symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
const class_hierarchyt & class_hierarchy
symbol_table_baset & symbol_table
String type.
Definition: std_types.h:1669
The symbol table base class interface.
#define Forall_operands(it, expr)
Definition: expr.h:25
Program Transformation.
std::pair< typet, std::size_t > java_array_dimension_and_element_type(const struct_tag_typet &type)
Returns the underlying element type and array dimensionality of Java struct type.
Definition: java_types.cpp:188
exprt get_array_element_type_field(const exprt &pointer)
Definition: java_types.cpp:217
exprt get_array_dimension_field(const exprt &pointer)
Definition: java_types.cpp:205
java_reference_typet java_lang_object_type()
Definition: java_types.cpp:99
#define JAVA_REFERENCE_ARRAY_CLASSID
Definition: java_types.h:658
bool can_cast_type< java_reference_typet >(const typet &type)
Definition: java_types.h:626
static exprt subtype_expr(const exprt &classid_field, const irep_idt &target_type, const class_hierarchyt &class_hierarchy)
Produce an expression of the form classid_field == "A" || classid_field == "B" || ....
static bool contains_instanceof(const exprt &e)
void remove_instanceof(const irep_idt &function_identifier, goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
Replace an instanceof in the expression or guard of the passed instruction of the given function body...
Remove Instance-of Operators.
exprt conjunction(const exprt::operandst &op)
1) generates a conjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:41
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:29
const binary_exprt & to_binary_expr(const exprt &expr)
Cast an exprt to a binary_exprt.
Definition: std_expr.h:628
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1533
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:523