cprover
string_constraint_generator_code_points.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for Java functions dealing with
4  code points
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
12 
14 
20 std::pair<exprt, string_constraintst>
22  const array_string_exprt &res,
23  const exprt &code_point)
24 {
25  string_constraintst constraints;
26  const typet &char_type = res.content().type().subtype();
27  const typet &type = code_point.type();
28  PRECONDITION(type.id() == ID_signedbv);
29 
30  // We add axioms:
31  // a1 : code_point<0x010000 => |res|=1
32  // a2 : code_point>=0x010000 => |res|=2
33  // a3 : code_point<0x010000 => res[0]=code_point
34  // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400
35  // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400
36  // For more explenations about this conversion, see:
37  // https://en.wikipedia.org/wiki/UTF-16
38 
39  exprt hex010000 = from_integer(0x010000, type);
40  exprt hexD800 = from_integer(0xD800, type);
41  exprt hexDC00 = from_integer(0xDC00, type);
42  exprt hex0400 = from_integer(0x0400, type);
43 
44  binary_relation_exprt small(code_point, ID_lt, hex010000);
46  constraints.existential.push_back(a1);
47 
48  implies_exprt a2(
50  constraints.existential.push_back(a2);
51 
52  typecast_exprt code_point_as_char(code_point, char_type);
53  implies_exprt a3(small, equal_exprt(res[0], code_point_as_char));
54  constraints.existential.push_back(a3);
55 
56  plus_exprt first_char(
57  hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400));
58  implies_exprt a4(
59  not_exprt(small),
60  equal_exprt(res[0], typecast_exprt(first_char, char_type)));
61  constraints.existential.push_back(a4);
62 
63  plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400));
64  implies_exprt a5(
65  not_exprt(small),
66  equal_exprt(res[1], typecast_exprt(second_char, char_type)));
67  constraints.existential.push_back(a5);
68 
69  return {from_integer(0, get_return_code_type()), constraints};
70 }
71 
78 static exprt is_high_surrogate(const exprt &chr)
79 {
80  return and_exprt(
81  binary_relation_exprt(chr, ID_ge, from_integer(0xD800, chr.type())),
82  binary_relation_exprt(chr, ID_le, from_integer(0xDBFF, chr.type())));
83 }
84 
91 static exprt is_low_surrogate(const exprt &chr)
92 {
93  return and_exprt(
94  binary_relation_exprt(chr, ID_ge, from_integer(0xDC00, chr.type())),
95  binary_relation_exprt(chr, ID_le, from_integer(0xDFFF, chr.type())));
96 }
97 
107 exprt pair_value(exprt char1, exprt char2, typet return_type)
108 {
109  exprt hex010000 = from_integer(0x010000, return_type);
110  exprt hex0800 = from_integer(0x0800, return_type);
111  exprt hex0400 = from_integer(0x0400, return_type);
112  mult_exprt m1(mod_exprt(char1, hex0800), hex0400);
113  mod_exprt m2(char2, hex0400);
114  return plus_exprt(hex010000, plus_exprt(m1, m2));
115 }
116 
121 std::pair<exprt, string_constraintst>
124 {
125  string_constraintst constraints;
126  const typet &return_type = f.type();
127  PRECONDITION(return_type.id() == ID_signedbv);
128  PRECONDITION(f.arguments().size() == 2);
130  const exprt &pos = f.arguments()[1];
131 
132  const symbol_exprt result = fresh_symbol("char", return_type);
133  const exprt index1 = from_integer(1, str.length_type());
134  const exprt &char1 = str[pos];
135  const exprt &char2 = str[plus_exprt(pos, index1)];
136  const typecast_exprt char1_as_int(char1, return_type);
137  const typecast_exprt char2_as_int(char2, return_type);
138  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
139  const exprt is_low = is_low_surrogate(str[plus_exprt(pos, index1)]);
140  const and_exprt return_pair(is_high_surrogate(str[pos]), is_low);
141 
142  constraints.existential.push_back(
143  implies_exprt(return_pair, equal_exprt(result, pair)));
144  constraints.existential.push_back(
145  implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int)));
146  return {result, constraints};
147 }
148 
153 std::pair<exprt, string_constraintst>
156 {
158  PRECONDITION(args.size() == 2);
159  typet return_type = f.type();
160  PRECONDITION(return_type.id() == ID_signedbv);
161  symbol_exprt result = fresh_symbol("char", return_type);
163  string_constraintst constraints;
164 
165  const exprt &char1 = str[minus_exprt(
166  args[1], from_integer(2, array_pool.get_or_create_length(str).type()))];
167  const exprt &char2 = str[minus_exprt(
168  args[1], from_integer(1, array_pool.get_or_create_length(str).type()))];
169  const typecast_exprt char1_as_int(char1, return_type);
170  const typecast_exprt char2_as_int(char2, return_type);
171 
172  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
173  const and_exprt return_pair(
174  is_high_surrogate(char1), is_low_surrogate(char2));
175 
176  constraints.existential.push_back(
177  implies_exprt(return_pair, equal_exprt(result, pair)));
178  constraints.existential.push_back(
179  implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int)));
180  return {result, constraints};
181 }
182 
188 std::pair<exprt, string_constraintst>
191 {
192  PRECONDITION(f.arguments().size() == 3);
193  string_constraintst constraints;
194  const exprt &begin = f.arguments()[1];
195  const exprt &end = f.arguments()[2];
196  const typet &return_type = f.type();
197  const symbol_exprt result = fresh_symbol("code_point_count", return_type);
198  const minus_exprt length(end, begin);
199  const div_exprt minimum(length, from_integer(2, length.type()));
200  constraints.existential.push_back(
201  binary_relation_exprt(result, ID_le, length));
202  constraints.existential.push_back(
203  binary_relation_exprt(result, ID_ge, minimum));
204 
205  return {result, constraints};
206 }
207 
214 std::pair<exprt, string_constraintst>
217 {
218  PRECONDITION(f.arguments().size() == 3);
219  string_constraintst constraints;
220  const exprt &index = f.arguments()[1];
221  const exprt &offset = f.arguments()[2];
222  const typet &return_type = f.type();
223  const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type);
224 
225  const exprt minimum = plus_exprt(index, offset);
226  const exprt maximum = plus_exprt(minimum, offset);
227  constraints.existential.push_back(
228  binary_relation_exprt(result, ID_le, maximum));
229  constraints.existential.push_back(
230  binary_relation_exprt(result, ID_ge, minimum));
231 
232  return {result, constraints};
233 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
array_string_exprt get_string_expr(array_poolt &array_pool, const exprt &expr)
Fetch the string_exprt corresponding to the given refined_string_exprt.
Definition: array_pool.cpp:198
bitvector_typet char_type()
Definition: c_types.cpp:114
Boolean AND.
Definition: std_expr.h:1835
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
const typet & length_type() const
Definition: string_expr.h:69
exprt & content()
Definition: string_expr.h:74
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:675
Division.
Definition: std_expr.h:981
Equality.
Definition: std_expr.h:1140
Base class for all expressions.
Definition: expr.h:54
typet & type()
Return the type of the expression.
Definition: expr.h:82
Application of (mathematical) function.
Boolean implication.
Definition: std_expr.h:1898
const irep_idt & id() const
Definition: irep.h:407
Binary minus.
Definition: std_expr.h:890
Modulo.
Definition: std_expr.h:1050
Binary multiplication Associativity is not specified.
Definition: std_expr.h:936
Boolean negation.
Definition: std_expr.h:2042
The plus expression Associativity is not specified.
Definition: std_expr.h:831
std::pair< exprt, string_constraintst > add_axioms_for_offset_by_code_point(const function_application_exprt &f)
Add axioms corresponding the String.offsetByCodePointCount java function.
std::pair< exprt, string_constraintst > add_axioms_for_code_point(const array_string_exprt &res, const exprt &code_point)
add axioms for the conversion of an integer representing a java code point to a utf-16 string
std::pair< exprt, string_constraintst > add_axioms_for_code_point_count(const function_application_exprt &f)
Add axioms corresponding the String.codePointCount java function.
std::pair< exprt, string_constraintst > add_axioms_for_code_point_at(const function_application_exprt &f)
add axioms corresponding to the String.codePointAt java function
std::pair< exprt, string_constraintst > add_axioms_for_code_point_before(const function_application_exprt &f)
add axioms corresponding to the String.codePointBefore java function
Expression to hold a symbol (variable)
Definition: std_expr.h:81
Semantic type conversion.
Definition: std_expr.h:1781
The type of an expression, extends irept.
Definition: type.h:28
const typet & subtype() const
Definition: type.h:47
literalt pos(literalt a)
Definition: literal.h:194
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
Generates string constraints to link results from string functions with their arguments.
exprt maximum(const exprt &a, const exprt &b)
exprt minimum(const exprt &a, const exprt &b)
signedbv_typet get_return_code_type()
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en....
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en....
exprt pair_value(exprt char1, exprt char2, typet return_type)
the output corresponds to the unicode character given by the pair of characters of inputs assuming it...
equal_exprt equal_to(exprt lhs, exprt rhs)
Definition: string_expr.h:54
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< exprt > existential