diff --git a/.travis.yml b/.travis.yml index e8e19bc01ea..2f1c8103990 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,7 @@ matrix: packages: - libwww-perl - clang-3.7 + - libstdc++-5-dev - libubsan0 before_install: - mkdir bin ; ln -s /usr/bin/clang-3.7 bin/gcc diff --git a/regression/cbmc-java/lazyloading1/A.class b/regression/cbmc-java/lazyloading1/A.class new file mode 100644 index 00000000000..d6a62ca1ce4 Binary files /dev/null and b/regression/cbmc-java/lazyloading1/A.class differ diff --git a/regression/cbmc-java/lazyloading1/B.class b/regression/cbmc-java/lazyloading1/B.class new file mode 100644 index 00000000000..2d693bf7383 Binary files /dev/null and b/regression/cbmc-java/lazyloading1/B.class differ diff --git a/regression/cbmc-java/lazyloading1/test.class b/regression/cbmc-java/lazyloading1/test.class new file mode 100644 index 00000000000..25d52a223f4 Binary files /dev/null and b/regression/cbmc-java/lazyloading1/test.class differ diff --git a/regression/cbmc-java/lazyloading1/test.desc b/regression/cbmc-java/lazyloading1/test.desc new file mode 100644 index 00000000000..c0011248128 --- /dev/null +++ b/regression/cbmc-java/lazyloading1/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--lazy-methods --verbosity 10 --function test.main +^EXIT=0$ +^SIGNAL=0$ +elaborate java::A\.f:\(\)V +-- +elaborate java::B\.g:\(\)V diff --git a/regression/cbmc-java/lazyloading1/test.java b/regression/cbmc-java/lazyloading1/test.java new file mode 100644 index 00000000000..e0686d8c4d0 --- /dev/null +++ b/regression/cbmc-java/lazyloading1/test.java @@ -0,0 +1,21 @@ +// The most basic lazy loading test: A::f is directly called, B::g should be unreachable + +public class test +{ + A a; + B b; + public static void main() + { + A.f(); + } +} + +class A +{ + public static void f() {} +} + +class B +{ + public static void g() {} +} diff --git a/regression/cbmc-java/lazyloading2/A.class b/regression/cbmc-java/lazyloading2/A.class new file mode 100644 index 00000000000..6d789cdb6ec Binary files /dev/null and b/regression/cbmc-java/lazyloading2/A.class differ diff --git a/regression/cbmc-java/lazyloading2/B.class b/regression/cbmc-java/lazyloading2/B.class new file mode 100644 index 00000000000..d9cf49c418f Binary files /dev/null and b/regression/cbmc-java/lazyloading2/B.class differ diff --git a/regression/cbmc-java/lazyloading2/test.class b/regression/cbmc-java/lazyloading2/test.class new file mode 100644 index 00000000000..054809ef0f6 Binary files /dev/null and b/regression/cbmc-java/lazyloading2/test.class differ diff --git a/regression/cbmc-java/lazyloading2/test.desc b/regression/cbmc-java/lazyloading2/test.desc new file mode 100644 index 00000000000..c0011248128 --- /dev/null +++ b/regression/cbmc-java/lazyloading2/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--lazy-methods --verbosity 10 --function test.main +^EXIT=0$ +^SIGNAL=0$ +elaborate java::A\.f:\(\)V +-- +elaborate java::B\.g:\(\)V diff --git a/regression/cbmc-java/lazyloading2/test.java b/regression/cbmc-java/lazyloading2/test.java new file mode 100644 index 00000000000..b53bae71fa4 --- /dev/null +++ b/regression/cbmc-java/lazyloading2/test.java @@ -0,0 +1,23 @@ +// This test checks that because A is instantiated in main and B is not, +// A::f is reachable and B::g is not + +public class test +{ + A a; + B b; + public static void main() + { + A a = new A(); + a.f(); + } +} + +class A +{ + public void f() {} +} + +class B +{ + public void g() {} +} diff --git a/regression/cbmc-java/lazyloading3/A.class b/regression/cbmc-java/lazyloading3/A.class new file mode 100644 index 00000000000..affb565d625 Binary files /dev/null and b/regression/cbmc-java/lazyloading3/A.class differ diff --git a/regression/cbmc-java/lazyloading3/B.class b/regression/cbmc-java/lazyloading3/B.class new file mode 100644 index 00000000000..9a4ab54d369 Binary files /dev/null and b/regression/cbmc-java/lazyloading3/B.class differ diff --git a/regression/cbmc-java/lazyloading3/C.class b/regression/cbmc-java/lazyloading3/C.class new file mode 100644 index 00000000000..c249e24ace4 Binary files /dev/null and b/regression/cbmc-java/lazyloading3/C.class differ diff --git a/regression/cbmc-java/lazyloading3/D.class b/regression/cbmc-java/lazyloading3/D.class new file mode 100644 index 00000000000..7e16bd6527d Binary files /dev/null and b/regression/cbmc-java/lazyloading3/D.class differ diff --git a/regression/cbmc-java/lazyloading3/test.class b/regression/cbmc-java/lazyloading3/test.class new file mode 100644 index 00000000000..8e470f64650 Binary files /dev/null and b/regression/cbmc-java/lazyloading3/test.class differ diff --git a/regression/cbmc-java/lazyloading3/test.desc b/regression/cbmc-java/lazyloading3/test.desc new file mode 100644 index 00000000000..c0011248128 --- /dev/null +++ b/regression/cbmc-java/lazyloading3/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--lazy-methods --verbosity 10 --function test.main +^EXIT=0$ +^SIGNAL=0$ +elaborate java::A\.f:\(\)V +-- +elaborate java::B\.g:\(\)V diff --git a/regression/cbmc-java/lazyloading3/test.java b/regression/cbmc-java/lazyloading3/test.java new file mode 100644 index 00000000000..6d3129d1261 --- /dev/null +++ b/regression/cbmc-java/lazyloading3/test.java @@ -0,0 +1,30 @@ +// This test checks that because `main` has a parameter of type C, which has a field of type A, +// A::f is considered reachable, but B::g is not. + +public class test +{ + public static void main(C c) + { + c.a.f(); + } +} + +class A +{ + public void f() {} +} + +class B +{ + public void g() {} +} + +class C +{ + A a; +} + +class D +{ + B b; +} diff --git a/regression/failed-tests-printer.pl b/regression/failed-tests-printer.pl index 832ba2d2c64..40767185d5c 100755 --- a/regression/failed-tests-printer.pl +++ b/regression/failed-tests-printer.pl @@ -4,23 +4,25 @@ open LOG,") { chomp; if (/^Test '(.+)'/) { $current_test = $1; - $ignore = 0; - } elsif (1 == $ignore) { - next; + $printed_this_test = 0; } elsif (/\[FAILED\]\s*$/) { - $ignore = 1; - print "Failed test: $current_test\n"; - my $outf = `sed -n '2p' $current_test/test.desc`; - $outf =~ s/\..*$/.out/; - system("cat $current_test/$outf"); - print "\n\n"; + if(0 == $printed_this_test) { + $printed_this_test = 1; + print "\n\n"; + print "Failed test: $current_test\n"; + my $outf = `sed -n '2p' $current_test/test.desc`; + $outf =~ s/\..*$/.out/; + system("cat $current_test/$outf"); + print "\n\nFailed test.desc lines:\n"; + } + print "$_\n"; } } diff --git a/src/Makefile b/src/Makefile index 3a452b7fe09..269ddeda140 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ DIRS = ansi-c big-int cbmc cpp goto-cc goto-instrument goto-programs \ goto-symex langapi pointer-analysis solvers util linking xmllang \ - assembler analyses java_bytecode aa-path-symex path-symex musketeer \ - json cegis goto-analyzer jsil symex goto-diff aa-symex clobber \ + assembler analyses java_bytecode path-symex musketeer \ + json cegis goto-analyzer jsil symex goto-diff clobber \ memory-models all: cbmc.dir goto-cc.dir goto-instrument.dir symex.dir goto-analyzer.dir goto-diff.dir diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index 87e9beb45d2..adc6e2ee517 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -56,6 +56,7 @@ class optionst; "(graphml-witness):" \ "(java-max-vla-length):(java-unwind-enum-static)" \ "(localize-faults)(localize-faults-method):" \ + "(lazy-methods)" \ "(fixedbv)(floatbv)(all-claims)(all-properties)" // legacy, and will eventually disappear // NOLINT(whitespace/line_length) class cbmc_parse_optionst: diff --git a/src/goto-cc/compile.cpp b/src/goto-cc/compile.cpp index f900b9824a7..04d50e76f82 100644 --- a/src/goto-cc/compile.cpp +++ b/src/goto-cc/compile.cpp @@ -544,8 +544,8 @@ bool compilet::parse(const std::string &file_name) language_filet language_file; - std::pair - res=language_files.filemap.insert( + std::pair + res=language_files.file_map.insert( std::pair(file_name, language_file)); language_filet &lf=res.first->second; @@ -736,7 +736,7 @@ bool compilet::parse_source(const std::string &file_name) return true; // so we remove it from the list afterwards - language_files.filemap.erase(file_name); + language_files.file_map.erase(file_name); return false; } diff --git a/src/goto-programs/get_goto_model.cpp b/src/goto-programs/get_goto_model.cpp index 5afef435596..02aae876030 100644 --- a/src/goto-programs/get_goto_model.cpp +++ b/src/goto-programs/get_goto_model.cpp @@ -75,8 +75,8 @@ bool get_goto_modelt::operator()(const std::vector &files) return true; } - std::pair - result=language_files.filemap.insert( + std::pair + result=language_files.file_map.insert( std::pair(filename, language_filet())); language_filet &lf=result.first->second; diff --git a/src/goto-programs/remove_virtual_functions.cpp b/src/goto-programs/remove_virtual_functions.cpp index 4b950593aed..c93b50a2500 100644 --- a/src/goto-programs/remove_virtual_functions.cpp +++ b/src/goto-programs/remove_virtual_functions.cpp @@ -339,7 +339,8 @@ void remove_virtual_functionst::get_functions( component_name, functions); - functions.push_back(root_function); + if(root_function.symbol_expr!=symbol_exprt()) + functions.push_back(root_function); } /*******************************************************************\ diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 8d42611bfe0..f21b0bb1ce2 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_root_class.h" #include "java_types.h" #include "java_bytecode_convert_method.h" +#include "java_bytecode_language.h" #include #include @@ -27,11 +28,17 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, bool _disable_runtime_checks, - size_t _max_array_length): + size_t _max_array_length, + lazy_methodst& _lazy_methods, + lazy_methods_modet _lazy_methods_mode, + bool _string_refinement_enabled): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), - max_array_length(_max_array_length) + max_array_length(_max_array_length), + lazy_methods(_lazy_methods), + lazy_methods_mode(_lazy_methods_mode), + string_refinement_enabled(_string_refinement_enabled) { } @@ -41,6 +48,9 @@ class java_bytecode_convert_classt:public messaget if(parse_tree.loading_successful) convert(parse_tree.parsed_class); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.String") + add_string_type(); else generate_class_stub(parse_tree.parsed_class.name); } @@ -52,6 +62,9 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; + lazy_methodst &lazy_methods; + lazy_methods_modet lazy_methods_mode; + bool string_refinement_enabled; // conversion void convert(const classt &c); @@ -59,6 +72,7 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); + void add_string_type(); }; /*******************************************************************\ @@ -75,6 +89,13 @@ Function: java_bytecode_convert_classt::convert void java_bytecode_convert_classt::convert(const classt &c) { + std::string qualified_classname="java::"+id2string(c.name); + if(symbol_table.has_symbol(qualified_classname)) + { + debug() << "Skip class " << c.name << " (already loaded)" << eom; + return; + } + class_typet class_type; class_type.set_tag(c.name); @@ -107,7 +128,7 @@ void java_bytecode_convert_classt::convert(const classt &c) symbolt new_symbol; new_symbol.base_name=c.name; new_symbol.pretty_name=c.name; - new_symbol.name="java::"+id2string(c.name); + new_symbol.name=qualified_classname; class_type.set(ID_name, new_symbol.name); new_symbol.type=class_type; new_symbol.mode=ID_java; @@ -128,13 +149,35 @@ void java_bytecode_convert_classt::convert(const classt &c) // now do methods for(const auto &method : c.methods) - java_bytecode_convert_method( + { + const irep_idt method_identifier= + id2string(qualified_classname)+ + "."+id2string(method.name)+ + ":"+method.signature; + // Always run the lazy pre-stage, as it symbol-table + // registers the function. + java_bytecode_convert_method_lazy( *class_symbol, + method_identifier, method, - symbol_table, - get_message_handler(), - disable_runtime_checks, - max_array_length); + symbol_table); + if(lazy_methods_mode==LAZY_METHODS_MODE_EAGER) + { + // Upgrade to a fully-realized symbol now: + java_bytecode_convert_method( + *class_symbol, + method, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_array_length); + } + else + { + // Wait for our caller to decide what needs elaborating. + lazy_methods[method_identifier]=std::make_pair(class_symbol, &method); + } + } // is this a root class? if(c.extends.empty()) @@ -322,13 +365,19 @@ bool java_bytecode_convert_class( symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length) + size_t max_array_length, + lazy_methodst &lazy_methods, + lazy_methods_modet lazy_methods_mode, + bool string_refinement_enabled) { java_bytecode_convert_classt java_bytecode_convert_class( symbol_table, message_handler, disable_runtime_checks, - max_array_length); + max_array_length, + lazy_methods, + lazy_methods_mode, + string_refinement_enabled); try { @@ -352,3 +401,64 @@ bool java_bytecode_convert_class( return true; } + +/*******************************************************************\ + +Function: java_bytecode_convert_classt::add_string_type + + Purpose: Implements the java.lang.String type in the case that + we provide an internal implementation. + +\*******************************************************************/ + +void java_bytecode_convert_classt::add_string_type() +{ + class_typet string_type; + string_type.set_tag("java.lang.String"); + string_type.components().resize(3); + string_type.components()[0].set_name("@java.lang.Object"); + string_type.components()[0].set_pretty_name("@java.lang.Object"); + string_type.components()[0].type()=symbol_typet("java::java.lang.Object"); + string_type.components()[1].set_name("length"); + string_type.components()[1].set_pretty_name("length"); + string_type.components()[1].type()=java_int_type(); + string_type.components()[2].set_name("data"); + string_type.components()[2].set_pretty_name("data"); + // Use a pointer-to-unbounded-array instead of a pointer-to-char. + // Saves some casting in the string refinement algorithm but may + // be unnecessary. + string_type.components()[2].type()=pointer_typet( + array_typet(java_char_type(), infinity_exprt(java_int_type()))); + string_type.add_base(symbol_typet("java::java.lang.Object")); + + symbolt string_symbol; + string_symbol.name="java::java.lang.String"; + string_symbol.base_name="java.lang.String"; + string_symbol.type=string_type; + string_symbol.is_type=true; + + symbol_table.add(string_symbol); + + // Also add a stub of `String.equals` so that remove-virtual-functions + // generates a check for Object.equals vs. String.equals. + // No need to fill it in, as pass_preprocess will replace the calls again. + symbolt string_equals_symbol; + string_equals_symbol.name= + "java::java.lang.String.equals:(Ljava/lang/Object;)Z"; + string_equals_symbol.base_name="java.lang.String.equals"; + string_equals_symbol.pretty_name="java.lang.String.equals"; + string_equals_symbol.mode=ID_java; + + code_typet string_equals_type; + string_equals_type.return_type()=java_boolean_type(); + code_typet::parametert thisparam; + thisparam.set_this(); + thisparam.type()=pointer_typet(symbol_typet(string_symbol.name)); + code_typet::parametert otherparam; + otherparam.type()=pointer_typet(symbol_typet("java::java.lang.Object")); + string_equals_type.parameters().push_back(thisparam); + string_equals_type.parameters().push_back(otherparam); + string_equals_symbol.type=std::move(string_equals_type); + + symbol_table.add(string_equals_symbol); +} diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index ffaaf6e34da..0d2be3d8202 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -13,12 +13,16 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_bytecode_parse_tree.h" +#include "java_bytecode_language.h" bool java_bytecode_convert_class( const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length); + size_t max_array_length, + lazy_methodst &, + lazy_methods_modet, + bool string_refinement_enabled); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 54412747708..f101d19c824 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -183,43 +183,93 @@ const exprt java_bytecode_convert_methodt::variable( /*******************************************************************\ -Function: java_bytecode_convert_methodt::convert +Function: java_bytecode_convert_method_lazy - Inputs: + Inputs: `class_symbol`: class this method belongs to + `method_identifier`: fully qualified method name, including + type signature (e.g. "x.y.z.f:(I)") + `m`: parsed method object to convert + `symbol_table`: global symbol table (will be modified) Outputs: - Purpose: + Purpose: This creates a method symbol in the symtab, but doesn't + actually perform method conversion just yet. The caller + should call java_bytecode_convert_method later to give the + symbol/method a body. \*******************************************************************/ -void java_bytecode_convert_methodt::convert( +void java_bytecode_convert_method_lazy( const symbolt &class_symbol, - const methodt &m) + const irep_idt &method_identifier, + const java_bytecode_parse_treet::methodt &m, + symbol_tablet &symbol_table) { + symbolt method_symbol; typet member_type=java_type_from_string(m.signature); - assert(member_type.id()==ID_code); - - const irep_idt method_identifier= - id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; - method_id=method_identifier; + method_symbol.name=method_identifier; + method_symbol.base_name=m.base_name; + method_symbol.mode=ID_java; + method_symbol.location=m.source_location; + method_symbol.location.set_function(method_identifier); - code_typet &code_type=to_code_type(member_type); - method_return_type=code_type.return_type(); - code_typet::parameterst ¶meters=code_type.parameters(); + if(method_symbol.base_name=="") + { + method_symbol.pretty_name= + id2string(class_symbol.pretty_name)+"."+ + id2string(class_symbol.base_name)+"()"; + member_type.set(ID_constructor, true); + } + else + method_symbol.pretty_name= + id2string(class_symbol.pretty_name)+"."+ + id2string(m.base_name)+"()"; // do we need to add 'this' as a parameter? if(!m.is_static) { + code_typet &code_type=to_code_type(member_type); + code_typet::parameterst ¶meters=code_type.parameters(); code_typet::parametert this_p; - const reference_typet object_ref_type( - symbol_typet(class_symbol.name)); + const reference_typet object_ref_type(symbol_typet(class_symbol.name)); this_p.type()=object_ref_type; this_p.set_this(); parameters.insert(parameters.begin(), this_p); } + method_symbol.type=member_type; + symbol_table.add(method_symbol); +} + +/*******************************************************************\ + +Function: java_bytecode_convert_methodt::convert + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +void java_bytecode_convert_methodt::convert( + const symbolt &class_symbol, + const methodt &m) +{ + const irep_idt method_identifier= + id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; + method_id=method_identifier; + + const auto &old_sym=symbol_table.lookup(method_identifier); + + typet member_type=old_sym.type; + code_typet &code_type=to_code_type(member_type); + method_return_type=code_type.return_type(); + code_typet::parameterst ¶meters=code_type.parameters(); + variables.clear(); // find parameter names in the local variable table: @@ -347,10 +397,10 @@ void java_bytecode_convert_methodt::convert( if((!m.is_abstract) && (!m.is_native)) method_symbol.value=convert_instructions(m, code_type); - // do we have the method symbol already? + // Replace the existing stub symbol with the real deal: const auto s_it=symbol_table.symbols.find(method.get_name()); - if(s_it!=symbol_table.symbols.end()) - symbol_table.symbols.erase(s_it); // erase, we stubbed it + assert(s_it!=symbol_table.symbols.end()); + symbol_table.symbols.erase(s_it); symbol_table.add(method_symbol); } @@ -1037,7 +1087,11 @@ codet java_bytecode_convert_methodt::convert_instructions( { if(as_string(arg0.get(ID_identifier)) .find("")!=std::string::npos) + { + if(needed_classes) + needed_classes->insert(classname); code_type.set(ID_constructor, true); + } else code_type.set(ID_java_super_method_call, true); } @@ -1119,11 +1173,15 @@ codet java_bytecode_convert_methodt::convert_instructions( assert(use_this); assert(!call.arguments().empty()); call.function()=arg0; + // Populate needed methods later, + // once we know what object types can exist. } else { // static binding call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); + if(needed_methods) + needed_methods->push_back(arg0.get(ID_identifier)); } call.function().add_source_location()=loc; @@ -1665,6 +1723,11 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); + if(needed_classes && arg0.type().id()==ID_symbol) + { + needed_classes->insert( + to_symbol_type(arg0.type()).get_identifier()); + } results[0]=java_bytecode_promotion(symbol_expr); // set $assertionDisabled to false @@ -1682,6 +1745,11 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); + if(needed_classes && arg0.type().id()==ID_symbol) + { + needed_classes->insert( + to_symbol_type(arg0.type()).get_identifier()); + } c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -2144,13 +2212,17 @@ void java_bytecode_convert_method( symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length) + size_t max_array_length, + safe_pointer > needed_methods, + safe_pointer > needed_classes) { java_bytecode_convert_methodt java_bytecode_convert_method( symbol_table, message_handler, disable_runtime_checks, - max_array_length); + max_array_length, + needed_methods, + needed_classes); java_bytecode_convert_method(class_symbol, method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 8945af95bd1..e81881f44e1 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -11,15 +11,46 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "java_bytecode_parse_tree.h" +class class_hierarchyt; + void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &, symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length); + size_t max_array_length, + safe_pointer > needed_methods, + safe_pointer > needed_classes); + +// Must provide both the optional parameters or neither. +inline void java_bytecode_convert_method( + const symbolt &class_symbol, + const java_bytecode_parse_treet::methodt &method, + symbol_tablet &symbol_table, + message_handlert &message_handler, + bool disable_runtime_checks, + size_t max_array_length) +{ + java_bytecode_convert_method( + class_symbol, + method, + symbol_table, + message_handler, + disable_runtime_checks, + max_array_length, + safe_pointer >::create_null(), + safe_pointer >::create_null()); +} + +void java_bytecode_convert_method_lazy( + const symbolt &class_symbol, + const irep_idt &method_identifier, + const java_bytecode_parse_treet::methodt &, + symbol_tablet &symbol_table); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_H diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 8c0e708a530..ae4ba711640 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -13,8 +13,10 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include "java_bytecode_parse_tree.h" +#include "java_bytecode_convert_class.h" #include #include @@ -29,11 +31,15 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, bool _disable_runtime_checks, - size_t _max_array_length): + size_t _max_array_length, + safe_pointer > _needed_methods, + safe_pointer > _needed_classes): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), - max_array_length(_max_array_length) + max_array_length(_max_array_length), + needed_methods(_needed_methods), + needed_classes(_needed_classes) { } @@ -52,6 +58,8 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; + safe_pointer > needed_methods; + safe_pointer > needed_classes; irep_idt method_id; irep_idt current_method; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 1d9d9f57b41..668960dbe17 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -14,8 +14,11 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include + #include "java_bytecode_language.h" #include "java_bytecode_convert_class.h" +#include "java_bytecode_convert_method.h" #include "java_bytecode_internal_additions.h" #include "java_bytecode_typecheck.h" #include "java_entry_point.h" @@ -40,11 +43,18 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) { disable_runtime_checks=cmd.isset("disable-runtime-check"); assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null"); + string_refinement_enabled=cmd.isset("string-refine"); if(cmd.isset("java-max-input-array-length")) max_nondet_array_length= std::stoi(cmd.get_value("java-max-input-array-length")); if(cmd.isset("java-max-vla-length")) max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length")); + if(cmd.isset("lazy-methods-context-sensitive")) + lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_SENSITIVE; + else if(cmd.isset("lazy-methods")) + lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_INSENSITIVE; + else + lazy_methods_mode=LAZY_METHODS_MODE_EAGER; } /*******************************************************************\ @@ -147,6 +157,8 @@ bool java_bytecode_languaget::parse( { status() << "JAR file without entry point: loading it all" << eom; java_class_loader.load_entire_jar(path); + for(const auto &kv : java_class_loader.jar_map.at(path).entries) + main_jar_classes.push_back(kv.first); } else java_class_loader.add_jar_file(path); @@ -170,6 +182,287 @@ bool java_bytecode_languaget::parse( /*******************************************************************\ +Function: get_virtual_method_target + + Inputs: `needed_classes`: set of classes that can be instantiated. + Any potential callee not in this set will be ignored. + `call_basename`: unqualified function name with type + signature (e.g. "f:(I)") + `classname`: class name that may define or override a + function named `call_basename`. + `symbol_table`: global symtab + + Outputs: Returns the fully qualified name of `classname`'s definition + of `call_basename` if found and `classname` is present in + `needed_classes`, or irep_idt() otherwise. + + Purpose: Find a virtual callee, if one is defined and the callee type + is known to exist. + +\*******************************************************************/ + +static irep_idt get_virtual_method_target( + const std::set &needed_classes, + const irep_idt &call_basename, + const irep_idt &classname, + const symbol_tablet &symbol_table) +{ + // Program-wide, is this class ever instantiated? + if(!needed_classes.count(classname)) + return irep_idt(); + auto methodid=id2string(classname)+"."+id2string(call_basename); + if(symbol_table.has_symbol(methodid)) + return methodid; + else + return irep_idt(); +} + +/*******************************************************************\ + +Function: get_virtual_method_target + + Inputs: `c`: function call whose potential target functions should + be determined. + `needed_classes`: set of classes that can be instantiated. + Any potential callee not in this set will be ignored. + `symbol_table`: global symtab + `class_hierarchy`: global class hierarchy + + Outputs: Populates `needed_methods` with all possible `c` callees, + taking `needed_classes` into account (virtual function + overrides defined on classes that are not 'needed' are + ignored) + + Purpose: Find possible callees, excluding types that are not known + to be instantiated. + +\*******************************************************************/ + +static void get_virtual_method_targets( + const code_function_callt &c, + const std::set &needed_classes, + std::vector &needed_methods, + symbol_tablet &symbol_table, + const class_hierarchyt &class_hierarchy) +{ + const auto &called_function=c.function(); + assert(called_function.id()==ID_virtual_function); + + const auto &call_class=called_function.get(ID_C_class); + assert(call_class!=irep_idt()); + const auto &call_basename=called_function.get(ID_component_name); + assert(call_basename!=irep_idt()); + + auto old_size=needed_methods.size(); + + auto child_classes=class_hierarchy.get_children_trans(call_class); + for(const auto &child_class : child_classes) + { + auto child_method= + get_virtual_method_target( + needed_classes, + call_basename, + child_class, + symbol_table); + if(child_method!=irep_idt()) + needed_methods.push_back(child_method); + } + + irep_idt parent_class_id=call_class; + while(1) + { + auto parent_method= + get_virtual_method_target( + needed_classes, + call_basename, + parent_class_id, + symbol_table); + if(parent_method!=irep_idt()) + { + needed_methods.push_back(parent_method); + break; + } + else + { + auto findit=class_hierarchy.class_map.find(parent_class_id); + if(findit==class_hierarchy.class_map.end()) + break; + else + { + const auto &entry=findit->second; + if(entry.parents.empty()) + break; + else + parent_class_id=entry.parents[0]; + } + } + } + + if(needed_methods.size()==old_size) + { + // Didn't find any candidate callee. Generate a stub. + std::string stubname=id2string(call_class)+"."+id2string(call_basename); + symbolt symbol; + symbol.name=stubname; + symbol.base_name=call_basename; + symbol.type=c.function().type(); + symbol.value.make_nil(); + symbol.mode=ID_java; + symbol_table.add(symbol); + } +} + +/*******************************************************************\ + +Function: gather_virtual_callsites + + Inputs: `e`: expression tree to search + + Outputs: Populates `result` with pointers to each function call + within e that calls a virtual function. + + Purpose: See output + +\*******************************************************************/ + +static void gather_virtual_callsites( + const exprt &e, + std::vector &result) +{ + if(e.id()!=ID_code) + return; + const codet &c=to_code(e); + if(c.get_statement()==ID_function_call && + to_code_function_call(c).function().id()==ID_virtual_function) + result.push_back(&to_code_function_call(c)); + else + forall_operands(it, e) + gather_virtual_callsites(*it, result); +} + +/*******************************************************************\ + +Function: gather_needed_globals + + Inputs: `e`: expression tree to search + `symbol_table`: global symtab + + Outputs: Populates `needed` with global variable symbols referenced + from `e` or its children. + + Purpose: See output + +\*******************************************************************/ + +static void gather_needed_globals( + const exprt &e, + const symbol_tablet &symbol_table, + symbol_tablet &needed) +{ + if(e.id()==ID_symbol) + { + const auto &sym=symbol_table.lookup(to_symbol_expr(e).get_identifier()); + if(sym.is_static_lifetime) + needed.add(sym); + } + else + forall_operands(opit, e) + gather_needed_globals(*opit, symbol_table, needed); +} + +/*******************************************************************\ + +Function: gather_field_types + + Inputs: `class_type`: root of class tree to search + `ns`: global namespace + + Outputs: Populates `needed_classes` with all Java reference types + reachable starting at `class_type`. For example if + `class_type` is `symbol_typet("java::A")` and A has a B + field, then `B` (but not `A`) will be added to + `needed_classes`. + + Purpose: See output + +\*******************************************************************/ + +static void gather_field_types( + const typet &class_type, + const namespacet &ns, + std::set &needed_classes) +{ + const auto &underlying_type=to_struct_type(ns.follow(class_type)); + for(const auto &field : underlying_type.components()) + { + if(field.type().id()==ID_struct || field.type().id()==ID_symbol) + gather_field_types(field.type(), ns, needed_classes); + else if(field.type().id()==ID_pointer) + { + // Skip array primitive pointers, for example: + if(field.type().subtype().id()!=ID_symbol) + continue; + const auto &field_classid= + to_symbol_type(field.type().subtype()).get_identifier(); + if(needed_classes.insert(field_classid).second) + gather_field_types(field.type().subtype(), ns, needed_classes); + } + } +} + +/*******************************************************************\ + +Function: initialize_needed_classes + + Inputs: `entry_points`: list of fully-qualified function names that + we should assume are reachable + `ns`: global namespace + `ch`: global class hierarchy + + Outputs: Populates `needed_classes` with all Java reference types + whose references may be passed, directly or indirectly, + to any of the functions in `entry_points`. + + Purpose: See output + +\*******************************************************************/ + +static void initialize_needed_classes( + const std::vector &entry_points, + const namespacet &ns, + const class_hierarchyt &ch, + std::set &needed_classes) +{ + for(const auto &mname : entry_points) + { + const auto &symbol=ns.lookup(mname); + const auto &mtype=to_code_type(symbol.type); + for(const auto ¶m : mtype.parameters()) + { + if(param.type().id()==ID_pointer) + { + const auto ¶m_classid= + to_symbol_type(param.type().subtype()).get_identifier(); + std::vector class_and_parents= + ch.get_parents_trans(param_classid); + class_and_parents.push_back(param_classid); + for(const auto &classid : class_and_parents) + needed_classes.insert(classid); + gather_field_types(param.type().subtype(), ns, needed_classes); + } + } + } + + // Also add classes whose instances are magically + // created by the JVM and so won't be spotted by + // looking for constructors and calls as usual: + needed_classes.insert("java::java.lang.String"); + needed_classes.insert("java::java.lang.Class"); + needed_classes.insert("java::java.lang.Object"); +} + +/*******************************************************************\ + Function: java_bytecode_languaget::typecheck Inputs: @@ -200,13 +493,25 @@ bool java_bytecode_languaget::typecheck( symbol_table, get_message_handler(), disable_runtime_checks, - max_user_array_length)) + max_user_array_length, + lazy_methods, + lazy_methods_mode, + string_refinement_enabled)) + return true; + } + + // Now incrementally elaborate methods + // that are reachable from this entry point. + if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) + { + // ci: context-insensitive. + if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) return true; } // now typecheck all if(java_bytecode_typecheck( - symbol_table, get_message_handler())) + symbol_table, get_message_handler(), string_refinement_enabled)) return true; return false; @@ -214,6 +519,221 @@ bool java_bytecode_languaget::typecheck( /*******************************************************************\ +Function: java_bytecode_languaget::do_ci_lazy_method_conversion + + Inputs: `symbol_table`: global symbol table + `lazy_methods`: map from method names to relevant symbol + and parsed-method objects. + + Outputs: Elaborates lazily-converted methods that may be reachable + starting from the main entry point (usually provided with + the --function command-line option) (side-effect on the + symbol_table). Returns false on success. + + Purpose: Uses a simple context-insensitive ('ci') analysis to + determine which methods may be reachable from the main + entry point. In brief, static methods are reachable if we + find a callsite in another reachable site, while virtual + methods are reachable if we find a virtual callsite + targeting a compatible type *and* a constructor callsite + indicating an object of that type may be instantiated (or + evidence that an object of that type exists before the + main function is entered, such as being passed as a + parameter). + +\*******************************************************************/ + +bool java_bytecode_languaget::do_ci_lazy_method_conversion( + symbol_tablet &symbol_table, + lazy_methodst &lazy_methods) +{ + class_hierarchyt ch; + ch(symbol_table); + + std::vector method_worklist1; + std::vector method_worklist2; + + auto main_function= + get_main_symbol(symbol_table, main_class, get_message_handler(), true); + if(main_function.stop_convert) + { + // Failed, mark all functions in the given main class(es) reachable. + std::vector reachable_classes; + if(!main_class.empty()) + reachable_classes.push_back(main_class); + else + reachable_classes=main_jar_classes; + for(const auto &classname : reachable_classes) + { + const auto &methods= + java_class_loader.class_map.at(classname).parsed_class.methods; + for(const auto &method : methods) + { + const irep_idt methodid="java::"+id2string(classname)+"."+ + id2string(method.name)+":"+ + id2string(method.signature); + method_worklist2.push_back(methodid); + } + } + } + else + method_worklist2.push_back(main_function.main_function.name); + + std::set needed_classes; + initialize_needed_classes( + method_worklist2, + namespacet(symbol_table), + ch, + needed_classes); + + std::set methods_already_populated; + std::vector virtual_callsites; + + bool any_new_methods; + do + { + any_new_methods=false; + while(method_worklist2.size()!=0) + { + std::swap(method_worklist1, method_worklist2); + for(const auto &mname : method_worklist1) + { + if(!methods_already_populated.insert(mname).second) + continue; + auto findit=lazy_methods.find(mname); + if(findit==lazy_methods.end()) + { + debug() << "Skip " << mname << eom; + continue; + } + debug() << "CI lazy methods: elaborate " << mname << eom; + const auto &parsed_method=findit->second; + java_bytecode_convert_method( + *parsed_method.first, + *parsed_method.second, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_user_array_length, + safe_pointer >::create_non_null( + &method_worklist2), + safe_pointer >::create_non_null( + &needed_classes)); + gather_virtual_callsites( + symbol_table.lookup(mname).value, + virtual_callsites); + any_new_methods=true; + } + method_worklist1.clear(); + } + + // Given the object types we now know may be created, populate more + // possible virtual function call targets: + + debug() << "CI lazy methods: add virtual method targets (" + << virtual_callsites.size() + << " callsites)" + << eom; + + for(const auto &callsite : virtual_callsites) + { + // This will also create a stub if a virtual callsite has no targets. + get_virtual_method_targets( + *callsite, + needed_classes, + method_worklist2, + symbol_table, + ch); + } + } + while(any_new_methods); + + // Remove symbols for methods that were declared but never used: + symbol_tablet keep_symbols; + + for(const auto &sym : symbol_table.symbols) + { + if(sym.second.is_static_lifetime) + continue; + if(lazy_methods.count(sym.first) && + !methods_already_populated.count(sym.first)) + { + continue; + } + if(sym.second.type.id()==ID_code) + gather_needed_globals(sym.second.value, symbol_table, keep_symbols); + keep_symbols.add(sym.second); + } + + debug() << "CI lazy methods: removed " + << symbol_table.symbols.size() - keep_symbols.symbols.size() + << " unreachable methods and globals" + << eom; + + symbol_table.swap(keep_symbols); + + return false; +} + +/*******************************************************************\ + +Function: java_bytecode_languaget::lazy_methods_provided + + Inputs: None + + Outputs: Populates `methods` with the complete list of lazy methods + that are available to convert (those which are valid + parameters for `convert_lazy_method`) + + Purpose: Provide feedback to `language_filest` so that when asked + for a lazy method, it can delegate to this instance of + java_bytecode_languaget. + +\*******************************************************************/ + +void java_bytecode_languaget::lazy_methods_provided( + std::set &methods) const +{ + for(const auto &kv : lazy_methods) + methods.insert(kv.first); +} + +/*******************************************************************\ + +Function: java_bytecode_languaget::convert_lazy_method + + Inputs: `id`: method ID to convert + `symtab`: global symbol table + + Outputs: Amends the symbol table entry for function `id`, which + should be a lazy method provided by this instance of + `java_bytecode_languaget`. It should initially have a nil + value. After this method completes, it will have a value + representing the method body, identical to that produced + using eager method conversion. + + Purpose: Promote a lazy-converted method (one whose type is known + but whose body hasn't been converted) into a fully- + elaborated one. + +\*******************************************************************/ + +void java_bytecode_languaget::convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab) +{ + const auto &lazy_method_entry=lazy_methods.at(id); + java_bytecode_convert_method( + *lazy_method_entry.first, + *lazy_method_entry.second, + symtab, + get_message_handler(), + disable_runtime_checks, + max_user_array_length); +} + +/*******************************************************************\ + Function: java_bytecode_languaget::final Inputs: diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 5937f6f4fd4..643eacd5b7f 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -16,6 +16,20 @@ Author: Daniel Kroening, kroening@kroening.com #define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5 +enum lazy_methods_modet +{ + LAZY_METHODS_MODE_EAGER, + LAZY_METHODS_MODE_CONTEXT_INSENSITIVE, + LAZY_METHODS_MODE_CONTEXT_SENSITIVE +}; + +typedef std::pair< + const symbolt *, + const java_bytecode_parse_treet::methodt *> + lazy_method_valuet; +typedef std::map + lazy_methodst; + class java_bytecode_languaget:public languaget { public: @@ -69,9 +83,15 @@ class java_bytecode_languaget:public languaget std::set extensions() const override; void modules_provided(std::set &modules) override; + virtual void lazy_methods_provided(std::set &) const override; + virtual void convert_lazy_method( + const irep_idt &id, symbol_tablet &) override; protected: + bool do_ci_lazy_method_conversion(symbol_tablet &, lazy_methodst &); + irep_idt main_class; + std::vector main_jar_classes; java_class_loadert java_class_loader; bool assume_inputs_non_null; // assume inputs variables to be non-null @@ -82,6 +102,9 @@ class java_bytecode_languaget:public languaget // - array size for newarray size_t max_nondet_array_length; // maximal length for non-det array creation size_t max_user_array_length; // max size for user code created arrays + lazy_methodst lazy_methods; + lazy_methods_modet lazy_methods_mode; + bool string_refinement_enabled; }; languaget *new_java_bytecode_language(); diff --git a/src/java_bytecode/java_bytecode_typecheck.cpp b/src/java_bytecode/java_bytecode_typecheck.cpp index e72b80a885d..4f09e366452 100644 --- a/src/java_bytecode/java_bytecode_typecheck.cpp +++ b/src/java_bytecode/java_bytecode_typecheck.cpp @@ -126,10 +126,11 @@ Function: java_bytecode_typecheck bool java_bytecode_typecheck( symbol_tablet &symbol_table, - message_handlert &message_handler) + message_handlert &message_handler, + bool string_refinement_enabled) { java_bytecode_typecheckt java_bytecode_typecheck( - symbol_table, message_handler); + symbol_table, message_handler, string_refinement_enabled); return java_bytecode_typecheck.typecheck_main(); } diff --git a/src/java_bytecode/java_bytecode_typecheck.h b/src/java_bytecode/java_bytecode_typecheck.h index cceba54dee5..618b6fce44d 100644 --- a/src/java_bytecode/java_bytecode_typecheck.h +++ b/src/java_bytecode/java_bytecode_typecheck.h @@ -21,7 +21,8 @@ Author: Daniel Kroening, kroening@kroening.com bool java_bytecode_typecheck( symbol_tablet &symbol_table, - message_handlert &message_handler); + message_handlert &message_handler, + bool string_refinement_enabled); bool java_bytecode_typecheck( exprt &expr, @@ -33,10 +34,12 @@ class java_bytecode_typecheckt:public typecheckt public: java_bytecode_typecheckt( symbol_tablet &_symbol_table, - message_handlert &_message_handler): + message_handlert &_message_handler, + bool _string_refinement_enabled): typecheckt(_message_handler), symbol_table(_symbol_table), - ns(symbol_table) + ns(symbol_table), + string_refinement_enabled(_string_refinement_enabled) { } @@ -48,6 +51,7 @@ class java_bytecode_typecheckt:public typecheckt protected: symbol_tablet &symbol_table; const namespacet ns; + bool string_refinement_enabled; void typecheck_type_symbol(symbolt &); void typecheck_non_type_symbol(symbolt &); diff --git a/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/src/java_bytecode/java_bytecode_typecheck_expr.cpp index c2ba2949d7d..11e488ff7fe 100644 --- a/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -10,9 +10,14 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include +#include + +#include #include "java_bytecode_typecheck.h" #include "java_pointer_casts.h" +#include "java_types.h" /*******************************************************************\ @@ -114,6 +119,27 @@ static std::string escape_non_alnum(const std::string &toescape) /*******************************************************************\ +Function: utf16_to_array + + Inputs: `in`: wide string to convert + + Outputs: Returns a Java char array containing the same wchars. + + Purpose: Convert UCS-2 or UTF-16 to an array expression. + +\*******************************************************************/ + +static array_exprt utf16_to_array(const std::wstring &in) +{ + const auto jchar=java_char_type(); + array_exprt ret(array_typet(jchar, infinity_exprt(java_int_type()))); + for(const auto c : in) + ret.copy_to_operands(from_integer(c, jchar)); + return ret; +} + +/*******************************************************************\ + Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal Inputs: @@ -136,14 +162,14 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) auto findit=symbol_table.symbols.find(escaped_symbol_name); if(findit!=symbol_table.symbols.end()) { - expr=findit->second.symbol_expr(); + expr=address_of_exprt(findit->second.symbol_expr()); return; } // Create a new symbol: symbolt new_symbol; new_symbol.name=escaped_symbol_name; - new_symbol.type=pointer_typet(string_type); + new_symbol.type=string_type; new_symbol.base_name="Literal"; new_symbol.pretty_name=value; new_symbol.mode=ID_java; @@ -151,13 +177,91 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) new_symbol.is_lvalue=true; new_symbol.is_static_lifetime=true; // These are basically const global data. + // Regardless of string refinement setting, at least initialize + // the literal with @clsid = String and @lock = false: + symbol_typet jlo_symbol("java::java.lang.Object"); + const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol)); + struct_exprt jlo_init(jlo_symbol); + const auto &jls_struct=to_struct_type(ns.follow(string_type)); + + jlo_init.copy_to_operands( + constant_exprt( + "java::java.lang.String", + jlo_struct.components()[0].type())); + jlo_init.copy_to_operands( + from_integer( + 0, + jlo_struct.components()[1].type())); + + // If string refinement *is* around, populate the actual + // contents as well: + if(string_refinement_enabled) + { + struct_exprt literal_init(new_symbol.type); + literal_init.move_to_operands(jlo_init); + + // Initialize the string with a constant utf-16 array: + symbolt array_symbol; + array_symbol.name=escaped_symbol_name+"_constarray"; + array_symbol.type=array_typet( + java_char_type(), infinity_exprt(java_int_type())); + array_symbol.base_name="Literal_constarray"; + array_symbol.pretty_name=value; + array_symbol.mode=ID_java; + array_symbol.is_type=false; + array_symbol.is_lvalue=true; + // These are basically const global data: + array_symbol.is_static_lifetime=true; + array_symbol.is_state_var=true; + auto literal_array=utf16_to_array( + utf8_to_utf16_little_endian(id2string(value))); + array_symbol.value=literal_array; + + if(symbol_table.add(array_symbol)) + throw "failed to add constarray symbol to symbol table"; + + literal_init.copy_to_operands( + from_integer(literal_array.operands().size(), + jls_struct.components()[1].type())); + literal_init.copy_to_operands( + address_of_exprt(array_symbol.symbol_expr())); + + new_symbol.value=literal_init; + } + else if(jls_struct.components().size()>=1 && + jls_struct.components()[0].get_name()=="@java.lang.Object") + { + // Case where something defined java.lang.String, so it has + // a proper base class (always java.lang.Object in practical + // JDKs seen so far) + struct_exprt literal_init(new_symbol.type); + literal_init.move_to_operands(jlo_init); + for(const auto &comp : jls_struct.components()) + { + if(comp.get_name()=="@java.lang.Object") + continue; + // Other members of JDK's java.lang.String we don't understand + // without string-refinement. Just zero-init them; consider using + // test-gen-like nondet object trees instead. + literal_init.copy_to_operands( + zero_initializer(comp.type(), expr.source_location(), ns)); + } + new_symbol.value=literal_init; + } + else if(jls_struct.get_bool(ID_incomplete_class)) + { + // Case where java.lang.String was stubbed, and so directly defines + // @class_identifier and @lock: + new_symbol.value=jlo_init; + } + if(symbol_table.add(new_symbol)) { error() << "failed to add string literal symbol to symbol table" << eom; throw 0; } - expr=new_symbol.symbol_expr(); + expr=address_of_exprt(new_symbol.symbol_expr()); } /*******************************************************************\ diff --git a/src/java_bytecode/java_class_loader.h b/src/java_bytecode/java_class_loader.h index 4d67b3e7164..b2a4953445a 100644 --- a/src/java_bytecode/java_class_loader.h +++ b/src/java_bytecode/java_class_loader.h @@ -37,7 +37,6 @@ class java_class_loadert:public messaget jar_poolt jar_pool; -protected: class jar_map_entryt { public: diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index dcc3dead0a8..44a59091ba2 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -327,11 +327,11 @@ void java_record_outputs( } } - main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, - message_handlert &message_handler) + message_handlert &message_handler, + bool allow_no_body) { symbolt symbol; main_function_resultt res; @@ -415,7 +415,7 @@ main_function_resultt get_main_symbol( } // check if it has a body - if(symbol.value.is_nil()) + if(symbol.value.is_nil() && !allow_no_body) { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; @@ -480,7 +480,7 @@ main_function_resultt get_main_symbol( symbol=symbol_table.symbols.find(*matches.begin())->second; // check if it has a body - if(symbol.value.is_nil()) + if(symbol.value.is_nil() && !allow_no_body) { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; diff --git a/src/java_bytecode/java_entry_point.h b/src/java_bytecode/java_entry_point.h index 7fa562de89a..e6575734d80 100644 --- a/src/java_bytecode/java_entry_point.h +++ b/src/java_bytecode/java_entry_point.h @@ -28,6 +28,7 @@ typedef struct main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, - message_handlert &); + message_handlert &, + bool allow_no_body=false); #endif // CPROVER_JAVA_BYTECODE_JAVA_ENTRY_POINT_H diff --git a/src/langapi/language_ui.cpp b/src/langapi/language_ui.cpp index 8640d735631..95b5d104a4f 100644 --- a/src/langapi/language_ui.cpp +++ b/src/langapi/language_ui.cpp @@ -104,8 +104,8 @@ bool language_uit::parse(const std::string &filename) return true; } - std::pair - result=language_files.filemap.insert( + std::pair + result=language_files.file_map.insert( std::pair(filename, language_filet())); language_filet &lf=result.first->second; diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/solvers/refinement/refined_string_type.cpp index cfe44ba4c29..012d30eb8de 100644 --- a/src/solvers/refinement/refined_string_type.cpp +++ b/src/solvers/refinement/refined_string_type.cpp @@ -10,10 +10,10 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ -#include -#include #include +#include "refined_string_type.h" + /*******************************************************************\ Constructor: refined_string_typet::refined_string_typet @@ -22,11 +22,12 @@ Constructor: refined_string_typet::refined_string_typet \*******************************************************************/ -refined_string_typet::refined_string_typet(typet char_type) +refined_string_typet::refined_string_typet( + const typet &index_type, const typet &char_type) { - infinity_exprt infinite_index(refined_string_typet::index_type()); + infinity_exprt infinite_index(index_type); array_typet char_array(char_type, infinite_index); - components().emplace_back("length", refined_string_typet::index_type()); + components().emplace_back("length", index_type); components().emplace_back("content", char_array); } diff --git a/src/solvers/refinement/refined_string_type.h b/src/solvers/refinement/refined_string_type.h index 5ad67cc2c31..3ecc0ac3a9f 100644 --- a/src/solvers/refinement/refined_string_type.h +++ b/src/solvers/refinement/refined_string_type.h @@ -17,14 +17,12 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include #include -#include -#include // Internal type used for string refinement class refined_string_typet: public struct_typet { public: - explicit refined_string_typet(typet char_type); + refined_string_typet(const typet &index_type, const typet &char_type); // Type for the content (list of characters) of a string const array_typet &get_content_type() const @@ -33,7 +31,7 @@ class refined_string_typet: public struct_typet return to_array_type(components()[1].type()); } - const typet &get_char_type() + const typet &get_char_type() const { assert(components().size()==2); return components()[0].type(); @@ -44,16 +42,6 @@ class refined_string_typet: public struct_typet return get_content_type().size().type(); } - static typet index_type() - { - return signed_int_type(); - } - - static typet java_index_type() - { - return java_int_type(); - } - // For C the unrefined string type is __CPROVER_string, for java it is a // pointer to a struct with tag java.lang.String @@ -67,22 +55,6 @@ class refined_string_typet: public struct_typet static bool is_java_char_sequence_type(const typet &type); - static typet get_char_type(const exprt &expr) - { - if(is_c_string_type(expr.type())) - return char_type(); - else - return java_char_type(); - } - - static typet get_index_type(const exprt &expr) - { - if(is_c_string_type(expr.type())) - return index_type(); - else - return java_index_type(); - } - static bool is_unrefined_string_type(const typet &type) { return ( diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 1f5a56ae9b7..61a6f3ebdd6 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -111,7 +111,7 @@ class string_not_contains_constraintt: public exprt { public: // string_not contains_constraintt are formula of the form: - // forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y] + // forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s0[x+y] != s1[y] string_not_contains_constraintt( exprt univ_lower_bound, @@ -119,8 +119,8 @@ class string_not_contains_constraintt: public exprt exprt premise, exprt exists_bound_inf, exprt exists_bound_sup, - exprt s0, - exprt s1) : + const string_exprt &s0, + const string_exprt &s1): exprt(ID_string_not_contains_constraint) { copy_to_operands(univ_lower_bound, univ_bound_sup, premise); @@ -153,14 +153,14 @@ class string_not_contains_constraintt: public exprt return operands()[4]; } - const exprt &s0() const + const string_exprt &s0() const { - return operands()[5]; + return to_string_expr(operands()[5]); } - const exprt &s1() const + const string_exprt &s1() const { - return operands()[6]; + return to_string_expr(operands()[6]); } }; diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index a78f1473510..2a56f09fddd 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -13,9 +13,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H -#include +#include #include -#include #include class string_constraint_generatort @@ -26,7 +25,7 @@ class string_constraint_generatort // to the axiom list. string_constraint_generatort(): - mode(ID_unknown), refined_string_type(char_type()) + mode(ID_unknown) { } void set_mode(irep_idt _mode) @@ -34,25 +33,10 @@ class string_constraint_generatort // only C and java modes supported assert((_mode==ID_java) || (_mode==ID_C)); mode=_mode; - refined_string_type=refined_string_typet(get_char_type()); } irep_idt &get_mode() { return mode; } - typet get_char_type() const; - typet get_index_type() const - { - if(mode==ID_java) - return refined_string_typet::java_index_type(); - assert(mode==ID_C); - return refined_string_typet::index_type(); - } - - const refined_string_typet &get_refined_string_type() const - { - return refined_string_type; - } - // Axioms are of three kinds: universally quantified string constraint, // not contains string constraints and simple formulas. std::list axioms; @@ -73,9 +57,14 @@ class string_constraint_generatort return index_exprt(witness.at(c), univ_val); } - symbol_exprt fresh_exist_index(const irep_idt &prefix); - symbol_exprt fresh_univ_index(const irep_idt &prefix); + static unsigned next_symbol_id; + + static symbol_exprt fresh_symbol( + const irep_idt &prefix, const typet &type=bool_typet()); + symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type); + symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); symbol_exprt fresh_boolean(const irep_idt &prefix); + string_exprt fresh_string(const refined_string_typet &type); // We maintain a map from symbols to strings. std::map symbol_to_string; @@ -95,7 +84,7 @@ class string_constraint_generatort exprt add_axioms_for_function_application( const function_application_exprt &expr); - constant_exprt constant_char(int i) const; + static constant_exprt constant_char(int i, const typet &char_type); private: // The integer with the longest string is Integer.MIN_VALUE which is -2^31, @@ -107,7 +96,7 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; - irep_idt extract_java_string(const symbol_exprt &s) const; + static irep_idt extract_java_string(const symbol_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); @@ -123,7 +112,6 @@ class string_constraint_generatort exprt add_axioms_for_contains(const function_application_exprt &f); exprt add_axioms_for_equals(const function_application_exprt &f); exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f); - exprt add_axioms_for_data(const function_application_exprt &f); // Add axioms corresponding to the String.hashCode java function // The specification is partial: the actual value is not actually computed @@ -153,7 +141,8 @@ class string_constraint_generatort string_exprt add_axioms_for_concat_float(const function_application_exprt &f); string_exprt add_axioms_for_concat_code_point( const function_application_exprt &f); - string_exprt add_axioms_for_constant(irep_idt sval); + string_exprt add_axioms_for_constant( + irep_idt sval, const refined_string_typet &ref_type); string_exprt add_axioms_for_delete( const string_exprt &str, const exprt &start, const exprt &end); string_exprt add_axioms_for_delete(const function_application_exprt &expr); @@ -173,15 +162,19 @@ class string_constraint_generatort const function_application_exprt &f); string_exprt add_axioms_from_literal(const function_application_exprt &f); string_exprt add_axioms_from_int(const function_application_exprt &f); - string_exprt add_axioms_from_int(const exprt &i, size_t max_size); - string_exprt add_axioms_from_int_hex(const exprt &i); + string_exprt add_axioms_from_int( + const exprt &i, size_t max_size, const refined_string_typet &ref_type); + string_exprt add_axioms_from_int_hex( + const exprt &i, const refined_string_typet &ref_type); string_exprt add_axioms_from_int_hex(const function_application_exprt &f); string_exprt add_axioms_from_long(const function_application_exprt &f); string_exprt add_axioms_from_long(const exprt &i, size_t max_size); string_exprt add_axioms_from_bool(const function_application_exprt &f); - string_exprt add_axioms_from_bool(const exprt &i); + string_exprt add_axioms_from_bool( + const exprt &i, const refined_string_typet &ref_type); string_exprt add_axioms_from_char(const function_application_exprt &f); - string_exprt add_axioms_from_char(const exprt &i); + string_exprt add_axioms_from_char( + const exprt &i, const refined_string_typet &ref_type); string_exprt add_axioms_from_char_array(const function_application_exprt &f); string_exprt add_axioms_from_char_array( const exprt &length, @@ -257,7 +250,8 @@ class string_constraint_generatort // TODO: not working correctly at the moment string_exprt add_axioms_for_value_of(const function_application_exprt &f); - string_exprt add_axioms_for_code_point(const exprt &code_point); + string_exprt add_axioms_for_code_point( + const exprt &code_point, const refined_string_typet &ref_type); string_exprt add_axioms_for_java_char_array(const exprt &char_array); string_exprt add_axioms_for_if(const if_exprt &expr); exprt add_axioms_for_char_literal(const function_application_exprt &f); @@ -287,9 +281,6 @@ class string_constraint_generatort // Tells which language is used. C and Java are supported irep_idt mode; - // Type of strings used in the refinement - refined_string_typet refined_string_type; - // assert that the number of argument is equal to nb and extract them static const function_application_exprt::argumentst &args( const function_application_exprt &expr, size_t nb) @@ -299,7 +290,7 @@ class string_constraint_generatort return args; } - exprt int_of_hex_char(exprt chr) const; + exprt int_of_hex_char(const exprt &chr) const; exprt is_high_surrogate(const exprt &chr) const; exprt is_low_surrogate(const exprt &chr) const; static exprt character_equals_ignore_case( diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 807587cbc45..7c035a0d3e4 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -23,9 +23,9 @@ Function: string_constraint_generatort::add_axioms_for_code_point \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_for_code_point( - const exprt &code_point) + const exprt &code_point, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); const typet &type=code_point.type(); assert(type.id()==ID_signedbv); @@ -50,7 +50,7 @@ string_exprt string_constraint_generatort::add_axioms_for_code_point( implies_exprt a2(not_exprt(small), res.axiom_for_has_length(2)); axioms.push_back(a2); - typecast_exprt code_point_as_char(code_point, get_char_type()); + typecast_exprt code_point_as_char(code_point, ref_type.get_char_type()); implies_exprt a3(small, equal_exprt(res[0], code_point_as_char)); axioms.push_back(a3); @@ -58,13 +58,13 @@ string_exprt string_constraint_generatort::add_axioms_for_code_point( hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400)); implies_exprt a4( not_exprt(small), - equal_exprt(res[0], typecast_exprt(first_char, get_char_type()))); + equal_exprt(res[0], typecast_exprt(first_char, ref_type.get_char_type()))); axioms.push_back(a4); plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400)); implies_exprt a5( not_exprt(small), - equal_exprt(res[1], typecast_exprt(second_char, get_char_type()))); + equal_exprt(res[1], typecast_exprt(second_char, ref_type.get_char_type()))); axioms.push_back(a5); return res; @@ -88,8 +88,8 @@ Function: string_constraint_generatort::is_high_surrogate exprt string_constraint_generatort::is_high_surrogate(const exprt &chr) const { return and_exprt( - binary_relation_exprt(chr, ID_ge, constant_char(0xD800)), - binary_relation_exprt(chr, ID_le, constant_char(0xDBFF))); + binary_relation_exprt(chr, ID_ge, constant_char(0xD800, chr.type())), + binary_relation_exprt(chr, ID_le, constant_char(0xDBFF, chr.type()))); } /*******************************************************************\ @@ -110,8 +110,8 @@ Function: string_constraint_generatort::is_low_surrogate exprt string_constraint_generatort::is_low_surrogate(const exprt &chr) const { return and_exprt( - binary_relation_exprt(chr, ID_ge, constant_char(0xDC00)), - binary_relation_exprt(chr, ID_le, constant_char(0xDFFF))); + binary_relation_exprt(chr, ID_ge, constant_char(0xDC00, chr.type())), + binary_relation_exprt(chr, ID_le, constant_char(0xDFFF, chr.type()))); } /*******************************************************************\ @@ -163,8 +163,8 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); const exprt &pos=args(f, 2)[1]; - symbol_exprt result=string_exprt::fresh_symbol("char", return_type); - exprt index1=from_integer(1, get_index_type()); + symbol_exprt result=fresh_symbol("char", return_type); + exprt index1=from_integer(1, str.length().type()); const exprt &char1=str[pos]; const exprt &char2=str[plus_exprt(pos, index1)]; exprt char1_as_int=typecast_exprt(char1, return_type); @@ -198,13 +198,13 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( assert(args.size()==2); typet return_type=f.type(); assert(return_type.id()==ID_signedbv); - symbol_exprt result=string_exprt::fresh_symbol("char", return_type); + symbol_exprt result=fresh_symbol("char", return_type); string_exprt str=add_axioms_for_string_expr(args[0]); const exprt &char1= - str[minus_exprt(args[1], from_integer(2, get_index_type()))]; + str[minus_exprt(args[1], from_integer(2, str.length().type()))]; const exprt &char2= - str[minus_exprt(args[1], from_integer(1, get_index_type()))]; + str[minus_exprt(args[1], from_integer(1, str.length().type()))]; exprt char1_as_int=typecast_exprt(char1, return_type); exprt char2_as_int=typecast_exprt(char2, return_type); @@ -238,10 +238,9 @@ exprt string_constraint_generatort::add_axioms_for_code_point_count( const exprt &begin=args(f, 3)[1]; const exprt &end=args(f, 3)[2]; const typet &return_type=f.type(); - symbol_exprt result= - string_exprt::fresh_symbol("code_point_count", return_type); + symbol_exprt result=fresh_symbol("code_point_count", return_type); minus_exprt length(end, begin); - div_exprt minimum(length, from_integer(2, get_index_type())); + div_exprt minimum(length, from_integer(2, length.type())); axioms.push_back(binary_relation_exprt(result, ID_le, length)); axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); @@ -270,8 +269,7 @@ exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const exprt &index=args(f, 3)[1]; const exprt &offset=args(f, 3)[2]; const typet &return_type=f.type(); - symbol_exprt result= - string_exprt::fresh_symbol("offset_by_code_point", return_type); + symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); exprt minimum=plus_exprt(index, offset); exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 1cec8cf5442..321282ee9c1 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -31,6 +31,7 @@ exprt string_constraint_generatort::add_axioms_for_equals( string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + typet index_type=s1.length().type(); // We want to write: // eq <=> (s1.length=s2.length &&forall i |s1|=|s2| @@ -130,14 +133,15 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( implies_exprt a1(eq, s1.axiom_for_has_same_length_as(s2)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_equal_ignore_case"); + symbol_exprt qvar=fresh_univ_index("QA_equal_ignore_case", index_type); exprt constr2=character_equals_ignore_case( s1[qvar], s2[qvar], char_a, char_A, char_Z); string_constraintt a2(qvar, s1.length(), eq, constr2); axioms.push_back(a2); - symbol_exprt witness=fresh_exist_index("witness_unequal_ignore_case"); - exprt zero=from_integer(0, get_index_type()); + symbol_exprt witness=fresh_exist_index( + "witness_unequal_ignore_case", index_type); + exprt zero=from_integer(0, witness.type()); and_exprt bound_witness( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); @@ -172,12 +176,13 @@ exprt string_constraint_generatort::add_axioms_for_hash_code( { string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); typet return_type=f.type(); + typet index_type=str.length().type(); // initialisation of the missing pool variable std::map::iterator it; for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) if(hash.find(it->second)==hash.end()) - hash[it->second]=string_exprt::fresh_symbol("hash", return_type); + hash[it->second]=fresh_symbol("hash", return_type); // for each string s. either: // c1: hash(str)=hash(s) @@ -187,7 +192,7 @@ exprt string_constraint_generatort::add_axioms_for_hash_code( // WARNING: the specification may be incomplete for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) { - symbol_exprt i=fresh_exist_index("index_hash"); + symbol_exprt i=fresh_exist_index("index_hash", index_type); equal_exprt c1(hash[it->second], hash[str]); not_exprt c2(equal_exprt(it->second.length(), str.length())); and_exprt c3( @@ -220,7 +225,8 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); const typet &return_type=f.type(); - symbol_exprt res=string_exprt::fresh_symbol("compare_to", return_type); + symbol_exprt res=fresh_symbol("compare_to", return_type); + typet index_type=s1.length().type(); // In the lexicographic comparison, x is the first point where the two // strings differ. @@ -241,11 +247,11 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( implies_exprt a1(res_null, s1.axiom_for_has_same_length_as(s2)); axioms.push_back(a1); - symbol_exprt i=fresh_univ_index("QA_compare_to"); + symbol_exprt i=fresh_univ_index("QA_compare_to", index_type); string_constraintt a2(i, s1.length(), res_null, equal_exprt(s1[i], s2[i])); axioms.push_back(a2); - symbol_exprt x=fresh_exist_index("index_compare_to"); + symbol_exprt x=fresh_exist_index("index_compare_to", index_type); equal_exprt ret_char_diff( res, minus_exprt( @@ -300,20 +306,19 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( { string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); const typet &return_type=f.type(); + typet index_type=str.length().type(); // initialisation of the missing pool variable std::map::iterator it; for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) if(pool.find(it->second)==pool.end()) - pool[it->second]=string_exprt::fresh_symbol("pool", return_type); + pool[it->second]=fresh_symbol("pool", return_type); // intern(str)=s_0 || s_1 || ... // for each string s. // intern(str)=intern(s) || |str|!=|s| // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) - // symbol_exprt intern=string_exprt::fresh_symbol("intern",return_type); - exprt disj=false_exprt(); for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) disj=or_exprt( @@ -326,7 +331,7 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) if(it->second!=str) { - symbol_exprt i=fresh_exist_index("index_intern"); + symbol_exprt i=fresh_exist_index("index_intern", index_type); axioms.push_back( or_exprt( equal_exprt(pool[it->second], pool[str]), diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index c6a1a7e52e7..a7d9c4921c4 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -25,7 +25,8 @@ Function: string_constraint_generatort::add_axioms_for_concat string_exprt string_constraint_generatort::add_axioms_for_concat( const string_exprt &s1, const string_exprt &s2) { - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res|=|s1|+|s2| @@ -40,11 +41,11 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); - symbol_exprt idx=fresh_univ_index("QA_index_concat"); + symbol_exprt idx=fresh_univ_index("QA_index_concat", res.length().type()); string_constraintt a4(idx, s1.length(), equal_exprt(s1[idx], res[idx])); axioms.push_back(a4); - symbol_exprt idx2=fresh_univ_index("QA_index_concat2"); + symbol_exprt idx2=fresh_univ_index("QA_index_concat2", res.length().type()); equal_exprt res_eq(s2[idx2], res[plus_exprt(idx2, s1.length())]); string_constraintt a5(idx2, s2.length(), res_eq); axioms.push_back(a5); @@ -93,8 +94,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_int string_exprt string_constraint_generatort::add_axioms_for_concat_int( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_INTEGER_LENGTH); + string_exprt s2=add_axioms_from_int( + args(f, 2)[1], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -114,8 +117,9 @@ Function: string_constraint_generatort::add_axioms_for_long string_exprt string_constraint_generatort::add_axioms_for_concat_long( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH); + string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -135,7 +139,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_bool( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_bool(args(f, 2)[1]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); } @@ -155,7 +160,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_char(args(f, 2)[1]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_char(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); } @@ -217,6 +223,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_code_point(args(f, 2)[1]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); } diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index aa3466a2e57..90769747949 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -25,7 +25,7 @@ Function: string_constraint_generatort::extract_java_string \*******************************************************************/ irep_idt string_constraint_generatort::extract_java_string( - const symbol_exprt &s) const + const symbol_exprt &s) { std::string tmp=id2string(s.get_identifier()); std::string prefix("java::java.lang.String.Literal."); @@ -48,9 +48,9 @@ Function: string_constraint_generatort::add_axioms_for_constant \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_for_constant( - irep_idt sval) + irep_idt sval, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); std::string c_str=id2string(sval); std::wstring str; @@ -65,13 +65,13 @@ string_exprt string_constraint_generatort::add_axioms_for_constant( for(std::size_t i=0; i str[n]!=c // a5 : forall m, from_index<=n<|str|. !contains => str[m]!=c - exprt minus1=from_integer(-1, get_index_type()); + exprt minus1=from_integer(-1, index_type); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, str.length())); @@ -51,12 +52,12 @@ exprt string_constraint_generatort::add_axioms_for_index_of( equal_exprt(str[index], c))); axioms.push_back(a3); - symbol_exprt n=fresh_univ_index("QA_index_of"); + symbol_exprt n=fresh_univ_index("QA_index_of", index_type); string_constraintt a4( n, from_index, index, contains, not_exprt(equal_exprt(str[n], c))); axioms.push_back(a4); - symbol_exprt m=fresh_univ_index("QA_index_of"); + symbol_exprt m=fresh_univ_index("QA_index_of", index_type); string_constraintt a5( m, from_index, @@ -86,7 +87,8 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( const string_exprt &substring, const exprt &from_index) { - symbol_exprt offset=fresh_exist_index("index_of"); + const typet &index_type=str.length().type(); + symbol_exprt offset=fresh_exist_index("index_of", index_type); symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: @@ -104,10 +106,10 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( implies_exprt a2( not_exprt(contains), - equal_exprt(offset, from_integer(-1, get_index_type()))); + equal_exprt(offset, from_integer(-1, index_type))); axioms.push_back(a2); - symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); string_constraintt a3( qvar, substring.length(), @@ -123,7 +125,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( const string_exprt &substring, const exprt &from_index) { - symbol_exprt offset=fresh_exist_index("index_of"); + const typet &index_type=str.length().type(); + symbol_exprt offset=fresh_exist_index("index_of", index_type); symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: @@ -141,10 +144,10 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( implies_exprt a2( not_exprt(contains), - equal_exprt(offset, from_integer(-1, get_index_type()))); + equal_exprt(offset, from_integer(-1, index_type))); axioms.push_back(a2); - symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); equal_exprt constr3(str[plus_exprt(qvar, offset)], substring[qvar]); string_constraintt a3(qvar, substring.length(), contains, constr3); axioms.push_back(a3); @@ -170,13 +173,14 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - assert(f.type()==get_index_type()); string_exprt str=add_axioms_for_string_expr(args[0]); const exprt &c=args[1]; + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + assert(f.type()==ref_type.get_index_type()); exprt from_index; if(args.size()==2) - from_index=from_integer(0, get_index_type()); + from_index=from_integer(0, ref_type.get_index_type()); else if(args.size()==3) from_index=args[2]; else @@ -189,13 +193,15 @@ exprt string_constraint_generatort::add_axioms_for_index_of( } else return add_axioms_for_index_of( - str, typecast_exprt(c, get_char_type()), from_index); + str, typecast_exprt(c, ref_type.get_char_type()), from_index); } exprt string_constraint_generatort::add_axioms_for_last_index_of( const string_exprt &str, const exprt &c, const exprt &from_index) { - symbol_exprt index=fresh_exist_index("last_index_of"); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &index_type=ref_type.get_index_type(); + symbol_exprt index=fresh_exist_index("last_index_of", index_type); symbol_exprt contains=fresh_boolean("contains_in_last_index_of"); // We add axioms: @@ -205,8 +211,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( // a4 : forall n. i+1 <= n < from_index +1 &&contains => s[n]!=c // a5 : forall m. 0 <= m < from_index +1 &&!contains => s[m]!=c - exprt index1=from_integer(1, get_index_type()); - exprt minus1=from_integer(-1, get_index_type()); + exprt index1=from_integer(1, index_type); + exprt minus1=from_integer(-1, index_type); exprt from_index_plus_one=plus_exprt(from_index, index1); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), @@ -223,7 +229,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( equal_exprt(str[index], c))); axioms.push_back(a3); - symbol_exprt n=fresh_univ_index("QA_last_index_of"); + symbol_exprt n=fresh_univ_index("QA_last_index_of", index_type); string_constraintt a4( n, plus_exprt(index, index1), @@ -232,7 +238,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( not_exprt(equal_exprt(str[n], c))); axioms.push_back(a4); - symbol_exprt m=fresh_univ_index("QA_last_index_of"); + symbol_exprt m=fresh_univ_index("QA_last_index_of", index_type); string_constraintt a5( m, from_index_plus_one, @@ -261,13 +267,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - assert(f.type()==get_index_type()); string_exprt str=add_axioms_for_string_expr(args[0]); exprt c=args[1]; + const refined_string_typet &ref_type=to_refined_string_type(str.type()); exprt from_index; + assert(f.type()==ref_type.get_index_type()); if(args.size()==2) - from_index=minus_exprt(str.length(), from_integer(1, get_index_type())); + from_index=minus_exprt(str.length(), from_integer(1, str.length().type())); else if(args.size()==3) from_index=args[2]; else @@ -280,6 +287,5 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( } else return add_axioms_for_last_index_of( - str, typecast_exprt(c, get_char_type()), from_index); + str, typecast_exprt(c, ref_type.get_char_type()), from_index); } - diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 8073ab4cb1f..6a080a5dc6c 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -24,9 +24,9 @@ Function: string_constraint_generatort::add_axioms_for_insert string_exprt string_constraint_generatort::add_axioms_for_insert( const string_exprt &s1, const string_exprt &s2, const exprt &offset) { - assert(offset.type()==get_index_type()); + assert(offset.type()==s1.length().type()); string_exprt pref=add_axioms_for_substring( - s1, from_integer(0, get_index_type()), offset); + s1, from_integer(0, offset.type()), offset); string_exprt suf=add_axioms_for_substring(s1, offset, s1.length()); string_exprt concat1=add_axioms_for_concat(pref, s2); return add_axioms_for_concat(concat1, suf); @@ -69,8 +69,10 @@ Function: string_constraint_generatort::add_axioms_for_insert_int string_exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_INTEGER_LENGTH); + string_exprt s2=add_axioms_from_int( + args(f, 3)[2], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -90,8 +92,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_long string_exprt string_constraint_generatort::add_axioms_for_insert_long( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH); + string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -111,8 +114,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_bool string_exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { + const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_bool(args(f, 3)[2]); + string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -133,7 +137,8 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_char(args(f, 3)[2]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -208,7 +213,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( { assert(f.arguments().size()==4); count=f.arguments()[2]; - offset=from_integer(0, get_index_type()); + offset=from_integer(0, count.type()); } string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index e76ae3f1f3d..5a269e5ad3b 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -17,12 +17,15 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include +unsigned string_constraint_generatort::next_symbol_id=1; + /*******************************************************************\ Function: string_constraint_generatort::constant_char - Inputs: integer representing a character, we do not use char type here - because java characters use more than 8 bits. + Inputs: integer representing a character, and a type for characters; + we do not use char type here because in some languages + (for instance java) characters use more than 8 bits. Outputs: constant expression corresponding to the character. @@ -30,29 +33,32 @@ Function: string_constraint_generatort::constant_char \*******************************************************************/ -constant_exprt string_constraint_generatort::constant_char(int i) const +constant_exprt string_constraint_generatort::constant_char( + int i, const typet &char_type) { - return from_integer(i, get_char_type()); + return from_integer(i, char_type); } /*******************************************************************\ -Function: string_constraint_generatort::get_char_type +Function: string_constraint_generator::fresh_symbol + + Inputs: a prefix and a type - Outputs: a type for characters + Outputs: a symbol of type tp whose name starts with + "string_refinement#" followed by prefix - Purpose: returns the type of characters that is adapted to the current mode + Purpose: generate a new symbol expression of the given type with some prefix \*******************************************************************/ -typet string_constraint_generatort::get_char_type() const +symbol_exprt string_constraint_generatort::fresh_symbol( + const irep_idt &prefix, const typet &type) { - if(mode==ID_C) - return char_type(); - else if(mode==ID_java) - return java_char_type(); - else - assert(false); // only C and java modes supported + std::ostringstream buf; + buf << "string_refinement#" << prefix << "#" << (next_symbol_id++); + irep_idt name(buf.str()); + return symbol_exprt(name, type); } /*******************************************************************\ @@ -69,9 +75,9 @@ Function: string_constraint_generatort::fresh_univ_index \*******************************************************************/ symbol_exprt string_constraint_generatort::fresh_univ_index( - const irep_idt &prefix) + const irep_idt &prefix, const typet &type) { - return string_exprt::fresh_symbol(prefix, get_index_type()); + return fresh_symbol(prefix, type); } /*******************************************************************\ @@ -87,9 +93,9 @@ Function: string_constraint_generatort::fresh_exist_index \*******************************************************************/ symbol_exprt string_constraint_generatort::fresh_exist_index( - const irep_idt &prefix) + const irep_idt &prefix, const typet &type) { - symbol_exprt s=string_exprt::fresh_symbol(prefix, get_index_type()); + symbol_exprt s=fresh_symbol(prefix, type); index_symbols.push_back(s); return s; } @@ -109,13 +115,35 @@ Function: string_constraint_generatort::fresh_boolean symbol_exprt string_constraint_generatort::fresh_boolean( const irep_idt &prefix) { - symbol_exprt b=string_exprt::fresh_symbol(prefix, bool_typet()); + symbol_exprt b=fresh_symbol(prefix, bool_typet()); boolean_symbols.push_back(b); return b; } /*******************************************************************\ +Function: string_constraint_generatort::fresh_string + + Inputs: a type for string + + Outputs: a string expression + + Purpose: construct a string expression whose length and content are new + variables + +\*******************************************************************/ + +string_exprt string_constraint_generatort::fresh_string( + const refined_string_typet &type) +{ + symbol_exprt length= + fresh_symbol("string_length", type.get_index_type()); + symbol_exprt content=fresh_symbol("string_content", type.get_content_type()); + return string_exprt(length, content, type); +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_string_expr Inputs: an expression of type string @@ -137,7 +165,6 @@ string_exprt string_constraint_generatort::add_axioms_for_string_expr( { exprt res=add_axioms_for_function_application( to_function_application_expr(unrefined_string)); - assert(res.type()==refined_string_type); s=to_string_expr(res); } else if(unrefined_string.id()==ID_symbol) @@ -158,7 +185,6 @@ string_exprt string_constraint_generatort::add_axioms_for_string_expr( { exprt arg=to_typecast_expr(unrefined_string).op(); exprt res=add_axioms_for_string_expr(arg); - assert(res.type()==refined_string_typet(get_char_type())); s=to_string_expr(res); } else @@ -169,7 +195,7 @@ string_exprt string_constraint_generatort::add_axioms_for_string_expr( } axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, get_index_type()))); + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); return s; } @@ -188,22 +214,24 @@ Function: string_constraint_generatort::add_axioms_for_if string_exprt string_constraint_generatort::add_axioms_for_if( const if_exprt &expr) { - string_exprt res(get_char_type()); assert( refined_string_typet::is_unrefined_string_type(expr.true_case().type())); string_exprt t=add_axioms_for_string_expr(expr.true_case()); assert( refined_string_typet::is_unrefined_string_type(expr.false_case().type())); string_exprt f=add_axioms_for_string_expr(expr.false_case()); + const refined_string_typet &ref_type=to_refined_string_type(t.type()); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); axioms.push_back( implies_exprt(expr.cond(), res.axiom_for_has_same_length_as(t))); - symbol_exprt qvar=fresh_univ_index("QA_string_if_true"); + symbol_exprt qvar=fresh_univ_index("QA_string_if_true", index_type); equal_exprt qequal(res[qvar], t[qvar]); axioms.push_back(string_constraintt(qvar, t.length(), expr.cond(), qequal)); axioms.push_back( implies_exprt(not_exprt(expr.cond()), res.axiom_for_has_same_length_as(f))); - symbol_exprt qvar2=fresh_univ_index("QA_string_if_false"); + symbol_exprt qvar2=fresh_univ_index("QA_string_if_false", index_type); equal_exprt qequal2(res[qvar2], f[qvar2]); string_constraintt sc2(qvar2, f.length(), not_exprt(expr.cond()), qequal2); axioms.push_back(sc2); @@ -229,8 +257,9 @@ string_exprt string_constraint_generatort::find_or_add_string_of_symbol( const symbol_exprt &sym) { irep_idt id=sym.get_identifier(); - auto entry=symbol_to_string.insert( - std::make_pair(id, string_exprt(get_char_type()))); + const refined_string_typet &ref_type=to_refined_string_type(sym.type()); + string_exprt str=fresh_string(ref_type); + auto entry=symbol_to_string.insert(std::make_pair(id, str)); return entry.first->second; } @@ -376,11 +405,10 @@ exprt string_constraint_generatort::add_axioms_for_function_application( return add_axioms_for_delete_char_at(expr); else if(id==ID_cprover_string_replace_func) return add_axioms_for_replace(expr); - else if(id==ID_cprover_string_data_func) - return add_axioms_for_data(expr); else { - std::string msg("string_exprt::function_application: unknown symbol :"); + std::string msg( + "string_constraint_generator::function_application: unknown symbol :"); msg+=id2string(id); throw msg; } @@ -403,7 +431,8 @@ string_exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res|=|s1| @@ -411,7 +440,7 @@ string_exprt string_constraint_generatort::add_axioms_for_copy( axioms.push_back(res.axiom_for_has_same_length_as(s1)); - symbol_exprt idx=fresh_univ_index("QA_index_copy"); + symbol_exprt idx=fresh_univ_index("QA_index_copy", ref_type.get_index_type()); string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); axioms.push_back(a2); return res; @@ -432,7 +461,8 @@ Function: string_constraint_generatort::add_axioms_for_java_char_array string_exprt string_constraint_generatort::add_axioms_for_java_char_array( const exprt &char_array) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string( + refined_string_typet(java_int_type(), java_char_type())); exprt arr=to_address_of_expr(char_array).object(); exprt len=member_exprt(arr, "length", res.length().type()); exprt cont=member_exprt(arr, "data", res.content().type()); @@ -462,55 +492,6 @@ exprt string_constraint_generatort::add_axioms_for_length( /*******************************************************************\ -Function: string_constraint_generatort::add_axioms_for_data - - Inputs: function application with three arguments: one string, a java - Array object and the corresponding data field - - Outputs: an expression of type void - - Purpose: add axioms stating that the content of the string argument is - equal to the content of the array argument - -\*******************************************************************/ - -exprt string_constraint_generatort::add_axioms_for_data( - const function_application_exprt &f) -{ - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); - const exprt &tab_data=args(f, 3)[1]; - const exprt &data=args(f, 3)[2]; - symbol_exprt qvar=fresh_univ_index("QA_string_data"); - - // translating data[qvar] to the correct expression - // which is (signed int)byte_extract_little_endian - // (data, (2l*qvar) + POINTER_OFFSET(byte_extract_little_endian - // (tab.data, 0l, unsigned short int *)), unsigned short int) - mult_exprt qvar2( - from_integer(2, java_long_type()), - typecast_exprt(qvar, java_long_type())); - byte_extract_exprt extract( - ID_byte_extract_little_endian, - tab_data, - from_integer(0, java_long_type()), - pointer_typet(java_char_type())); - plus_exprt arg2(qvar2, pointer_offset(extract)); - - byte_extract_exprt extract2( - ID_byte_extract_little_endian, data, arg2, java_char_type()); - exprt char_in_tab=typecast_exprt(extract2, get_char_type()); - - string_constraintt eq( - qvar, str.length(), equal_exprt(str[qvar], char_in_tab)); - axioms.push_back(eq); - - exprt void_expr; - void_expr.type()=void_typet(); - return void_expr; -} - -/*******************************************************************\ - Function: string_constraint_generatort::add_axioms_from_char_array Inputs: a length expression, an array expression, a offset index, and a @@ -530,13 +511,16 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( const exprt &offset, const exprt &count) { - string_exprt str(get_char_type()); + const typet &char_type=to_array_type(data.type()).subtype(); + const typet &index_type=length.type(); + refined_string_typet ref_type(index_type, char_type); + string_exprt str=fresh_string(ref_type); // We add axioms: // a1 : forall q < count. str[q] = data[q+offset] // a2 : |str| = count - symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array"); + symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array", index_type); exprt char_in_tab=data; assert(char_in_tab.id()==ID_index); char_in_tab.op1()=plus_exprt(qvar, offset); @@ -576,7 +560,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( { assert(f.arguments().size()==2); count=f.arguments()[0]; - offset=from_integer(0, get_index_type()); + offset=from_integer(0, count.type()); } const exprt &tab_length=f.arguments()[0]; const exprt &data=f.arguments()[1]; @@ -598,7 +582,7 @@ Function: string_constraint_generatort::add_axioms_for_is_positive_index exprt string_constraint_generatort::axiom_for_is_positive_index(const exprt &x) { return binary_relation_exprt( - x, ID_ge, from_integer(0, get_index_type())); + x, ID_ge, from_integer(0, x.type())); } /*******************************************************************\ @@ -630,7 +614,7 @@ exprt string_constraint_generatort::add_axioms_for_char_literal( const string_constantt s=to_string_constant(arg.op0().op0().op0()); irep_idt sval=s.get_value(); assert(sval.size()==1); - return from_integer(unsigned(sval[0]), get_char_type()); + return from_integer(unsigned(sval[0]), arg.type()); } else { @@ -655,7 +639,8 @@ exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); - symbol_exprt char_sym=string_exprt::fresh_symbol("char", get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); return char_sym; } diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 6df308c533e..8b1c7bca9d4 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -27,6 +27,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( const string_exprt &prefix, const string_exprt &str, const exprt &offset) { symbol_exprt isprefix=fresh_boolean("isprefix"); + const typet &index_type=str.length().type(); // We add axioms: // a1 : isprefix => |str| >= |prefix|+offset @@ -41,7 +42,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_isprefix"); + symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); string_constraintt a2( qvar, prefix.length(), @@ -49,7 +50,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); axioms.push_back(a2); - symbol_exprt witness=fresh_exist_index("witness_not_isprefix"); + symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); and_exprt witness_diff( axiom_for_is_positive_index(witness), and_exprt( @@ -91,7 +92,7 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); exprt offset; if(args.size()==2) - offset=from_integer(0, get_index_type()); + offset=from_integer(0, s0.length().type()); else if(args.size()==3) offset=args[2]; return typecast_exprt(add_axioms_for_is_prefix(s0, s1, offset), f.type()); @@ -151,6 +152,7 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( typecast_exprt tc_issuffix(issuffix, f.type()); string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + const typet &index_type=s0.length().type(); // We add axioms: // a1 : issufix => s0.length >= s1.length @@ -164,14 +166,14 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( implies_exprt a1(issuffix, s1.axiom_for_is_longer_than(s0)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_suffix"); + symbol_exprt qvar=fresh_univ_index("QA_suffix", index_type); exprt qvar_shifted=plus_exprt( qvar, minus_exprt(s1.length(), s0.length())); string_constraintt a2( qvar, s0.length(), issuffix, equal_exprt(s0[qvar], s1[qvar_shifted])); axioms.push_back(a2); - symbol_exprt witness=fresh_exist_index("witness_not_suffix"); + symbol_exprt witness=fresh_exist_index("witness_not_suffix", index_type); exprt shifted=plus_exprt( witness, minus_exprt(s1.length(), s0.length())); or_exprt constr3( @@ -207,6 +209,7 @@ exprt string_constraint_generatort::add_axioms_for_contains( typecast_exprt tc_contains(contains, f.type()); string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); + const typet &index_type=s0.type(); // We add axioms: // a1 : contains => s0.length >= s1.length @@ -219,14 +222,14 @@ exprt string_constraint_generatort::add_axioms_for_contains( implies_exprt a1(contains, s0.axiom_for_is_longer_than(s1)); axioms.push_back(a1); - symbol_exprt startpos=fresh_exist_index("startpos_contains"); + symbol_exprt startpos=fresh_exist_index("startpos_contains", index_type); minus_exprt length_diff(s0.length(), s1.length()); and_exprt a2( axiom_for_is_positive_index(startpos), binary_relation_exprt(startpos, ID_le, length_diff)); axioms.push_back(a2); - symbol_exprt qvar=fresh_univ_index("QA_contains"); + symbol_exprt qvar=fresh_univ_index("QA_contains", index_type); exprt qvar_shifted=plus_exprt(qvar, startpos); string_constraintt a3( qvar, s1.length(), contains, equal_exprt(s1[qvar], s0[qvar_shifted])); @@ -236,10 +239,10 @@ exprt string_constraint_generatort::add_axioms_for_contains( // forall startpos <= |s0|-|s1|. (!contains &&|s0| >= |s1| ) // ==> exists witness<|s1|. s1[witness]!=s0[startpos+witness] string_not_contains_constraintt a4( - from_integer(0, get_index_type()), - plus_exprt(from_integer(1, get_index_type()), length_diff), + from_integer(0, index_type), + plus_exprt(from_integer(1, index_type), length_diff), and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)), - from_integer(0, get_index_type()), s1.length(), s0, s1); + from_integer(0, index_type), s1.length(), s0, s1); axioms.push_back(a4); return tc_contains; diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index c361c9d4e48..83735e0d5b5 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -31,7 +31,8 @@ string_exprt string_constraint_generatort::add_axioms_for_set_length( { string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); exprt k=args(f, 2)[1]; - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res|=k @@ -39,7 +40,8 @@ string_exprt string_constraint_generatort::add_axioms_for_set_length( axioms.push_back(res.axiom_for_has_length(k)); - symbol_exprt idx=fresh_univ_index("QA_index_set_length"); + symbol_exprt idx=fresh_univ_index( + "QA_index_set_length", ref_type.get_index_type()); string_constraintt a2( idx, k, and_exprt( implies_exprt( @@ -47,7 +49,7 @@ string_exprt string_constraint_generatort::add_axioms_for_set_length( equal_exprt(s1[idx], res[idx])), implies_exprt( s1.axiom_for_is_shorter_than(idx), - equal_exprt(s1[idx], constant_char(0))))); + equal_exprt(s1[idx], constant_char(0, ref_type.get_char_type()))))); axioms.push_back(a2); return res; @@ -106,10 +108,12 @@ Function: string_constraint_generatort::add_axioms_for_substring string_exprt string_constraint_generatort::add_axioms_for_substring( const string_exprt &str, const exprt &start, const exprt &end) { - symbol_exprt idx=fresh_exist_index("index_substring"); - assert(start.type()==get_index_type()); - assert(end.type()==get_index_type()); - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &index_type=ref_type.get_index_type(); + symbol_exprt idx=fresh_exist_index("index_substring", index_type); + assert(start.type()==index_type); + assert(end.type()==index_type); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : start < end => |res| = end - start @@ -122,8 +126,7 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( res.axiom_for_has_length(minus_exprt(end, start))); axioms.push_back(a1); - exprt is_empty=res.axiom_for_has_length( - from_integer(0, get_index_type())); + exprt is_empty=res.axiom_for_has_length(from_integer(0, index_type)); implies_exprt a2(binary_relation_exprt(start, ID_ge, end), is_empty); axioms.push_back(a2); @@ -152,9 +155,11 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( const function_application_exprt &expr) { string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); - string_exprt res(get_char_type()); - symbol_exprt idx=fresh_exist_index("index_trim"); - exprt space_char=constant_char(' '); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); + symbol_exprt idx=fresh_exist_index("index_trim", index_type); + exprt space_char=constant_char(' ', ref_type.get_char_type()); // We add axioms: // a1 : m + |s1| <= |str| @@ -171,25 +176,25 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); axioms.push_back(a1); - binary_relation_exprt a2(idx, ID_ge, from_integer(0, get_index_type())); + binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); axioms.push_back(a2); exprt a3=str.axiom_for_is_longer_than(idx); axioms.push_back(a3); exprt a4=res.axiom_for_is_longer_than( - from_integer(0, get_index_type())); + from_integer(0, index_type)); axioms.push_back(a4); exprt a5=res.axiom_for_is_shorter_than(str); axioms.push_back(a5); - symbol_exprt n=fresh_univ_index("QA_index_trim"); + symbol_exprt n=fresh_univ_index("QA_index_trim", index_type); binary_relation_exprt non_print(str[n], ID_le, space_char); string_constraintt a6(n, idx, non_print); axioms.push_back(a6); - symbol_exprt n2=fresh_univ_index("QA_index_trim2"); + symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); minus_exprt bound(str.length(), plus_exprt(idx, res.length())); binary_relation_exprt eqn2( str[plus_exprt(idx, plus_exprt(res.length(), n2))], @@ -199,13 +204,13 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( string_constraintt a7(n2, bound, eqn2); axioms.push_back(a7); - symbol_exprt n3=fresh_univ_index("QA_index_trim3"); + symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type); equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]); string_constraintt a8(n3, res.length(), eqn3); axioms.push_back(a8); minus_exprt index_before( - plus_exprt(idx, res.length()), from_integer(1, get_index_type())); + plus_exprt(idx, res.length()), from_integer(1, index_type)); binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); or_exprt a9( equal_exprt(idx, str.length()), @@ -232,11 +237,14 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( const function_application_exprt &expr) { string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); - string_exprt res(get_char_type()); - exprt char_a=constant_char('a'); - exprt char_A=constant_char('A'); - exprt char_z=constant_char('z'); - exprt char_Z=constant_char('Z'); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); + exprt char_a=constant_char('a', char_type); + exprt char_A=constant_char('A', char_type); + exprt char_z=constant_char('z', char_type); + exprt char_Z=constant_char('Z', char_type); // TODO: add support for locales using case mapping information // from the UnicodeData file. @@ -251,7 +259,7 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( exprt a1=res.axiom_for_has_same_length_as(str); axioms.push_back(a1); - symbol_exprt idx=fresh_univ_index("QA_lower_case"); + symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type); exprt is_upper_case=and_exprt( binary_relation_exprt(char_A, ID_le, str[idx]), binary_relation_exprt(str[idx], ID_le, char_Z)); @@ -282,11 +290,14 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( const function_application_exprt &expr) { string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); - string_exprt res(get_char_type()); - exprt char_a=constant_char('a'); - exprt char_A=constant_char('A'); - exprt char_z=constant_char('z'); - exprt char_Z=constant_char('Z'); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + string_exprt res=fresh_string(ref_type); + exprt char_a=constant_char('a', char_type); + exprt char_A=constant_char('A', char_type); + exprt char_z=constant_char('z', char_type); + exprt char_Z=constant_char('Z', char_type); // TODO: add support for locales using case mapping information // from the UnicodeData file. @@ -299,7 +310,7 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( exprt a1=res.axiom_for_has_same_length_as(str); axioms.push_back(a1); - symbol_exprt idx=fresh_univ_index("QA_upper_case"); + symbol_exprt idx=fresh_univ_index("QA_upper_case", index_type); exprt is_lower_case=and_exprt( binary_relation_exprt(char_a, ID_le, str[idx]), binary_relation_exprt(str[idx], ID_le, char_z)); @@ -334,8 +345,9 @@ Function: string_constraint_generatort::add_axioms_for_char_set string_exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt res(get_char_type()); string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + string_exprt res=fresh_string(ref_type); with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); // We add axiom: @@ -367,9 +379,10 @@ string_exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); const exprt &old_char=args(f, 3)[1]; const exprt &new_char=args(f, 3)[2]; - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res| = |str| @@ -379,7 +392,7 @@ string_exprt string_constraint_generatort::add_axioms_for_replace( axioms.push_back(res.axiom_for_has_same_length_as(str)); - symbol_exprt qvar=fresh_univ_index("QA_replace"); + symbol_exprt qvar=fresh_univ_index("QA_replace", ref_type.get_index_type()); implies_exprt case1( equal_exprt(str[qvar], old_char), equal_exprt(res[qvar], new_char)); @@ -409,7 +422,7 @@ string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); - exprt index_one=from_integer(1, get_index_type()); + exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); } @@ -431,10 +444,10 @@ Function: string_constraint_generatort::add_axioms_for_delete string_exprt string_constraint_generatort::add_axioms_for_delete( const string_exprt &str, const exprt &start, const exprt &end) { - assert(start.type()==get_index_type()); - assert(end.type()==get_index_type()); + assert(start.type()==str.length().type()); + assert(end.type()==str.length().type()); string_exprt str1=add_axioms_for_substring( - str, from_integer(0, get_index_type()), start); + str, from_integer(0, str.length().type()), start); string_exprt str2=add_axioms_for_substring(str, end, str.length()); return add_axioms_for_concat(str1, str2); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index 3712b297386..d1030e5e085 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -25,7 +25,8 @@ Function: string_constraint_generatort::add_axioms_from_int string_exprt string_constraint_generatort::add_axioms_from_int( const function_application_exprt &expr) { - return add_axioms_from_int(args(expr, 1)[0], MAX_INTEGER_LENGTH); + const refined_string_typet &ref_type=to_refined_string_type(expr.type()); + return add_axioms_from_int(args(expr, 1)[0], MAX_INTEGER_LENGTH, ref_type); } /*******************************************************************\ @@ -43,7 +44,8 @@ Function: string_constraint_generatort::add_axioms_from_long string_exprt string_constraint_generatort::add_axioms_from_long( const function_application_exprt &expr) { - return add_axioms_from_int(args(expr, 1)[0], MAX_LONG_LENGTH); + const refined_string_typet &ref_type=to_refined_string_type(expr.type()); + return add_axioms_from_int(args(expr, 1)[0], MAX_LONG_LENGTH, ref_type); } /*******************************************************************\ @@ -99,14 +101,16 @@ Function: string_constraint_generatort::add_axioms_from_float string_exprt string_constraint_generatort::add_axioms_from_float( const exprt &f, bool double_precision) { - typet char_type=get_char_type(); - string_exprt res(char_type); - const exprt &index24=from_integer(24, get_index_type()); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + const typet &index_type=ref_type.get_index_type(); + const typet &char_type=ref_type.get_char_type(); + string_exprt res=fresh_string(ref_type); + const exprt &index24=from_integer(24, ref_type.get_index_type()); axioms.push_back(res.axiom_for_is_shorter_than(index24)); - string_exprt magnitude(char_type); - string_exprt sign_string(char_type); - string_exprt nan_string=add_axioms_for_constant("NaN"); + string_exprt magnitude=fresh_string(ref_type); + string_exprt sign_string=fresh_string(ref_type); + string_exprt nan_string=add_axioms_for_constant("NaN", ref_type); // We add the axioms: // a1 : If the argument is NaN, the result length is that of "NaN". @@ -127,7 +131,7 @@ string_exprt string_constraint_generatort::add_axioms_from_float( implies_exprt a1(isnan, magnitude.axiom_for_has_same_length_as(nan_string)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_equal_nan"); + symbol_exprt qvar=fresh_univ_index("QA_equal_nan", index_type); string_constraintt a2( qvar, nan_string.length(), @@ -150,20 +154,21 @@ string_exprt string_constraint_generatort::add_axioms_from_float( implies_exprt a4(not_exprt(isneg), sign_string.axiom_for_has_length(0)); axioms.push_back(a4); - implies_exprt a5(isneg, equal_exprt(sign_string[0], constant_char('-'))); + implies_exprt a5( + isneg, equal_exprt(sign_string[0], constant_char('-', char_type))); axioms.push_back(a5); // If m is infinity, it is represented by the characters "Infinity"; // thus, positive infinity produces the result "Infinity" and negative // infinity produces the result "-Infinity". - string_exprt infinity_string=add_axioms_for_constant("Infinity"); + string_exprt infinity_string=add_axioms_for_constant("Infinity", ref_type); exprt isinf=float_bvt().isinf(f, fspec); implies_exprt a6( isinf, magnitude.axiom_for_has_same_length_as(infinity_string)); axioms.push_back(a6); - symbol_exprt qvar_inf=fresh_univ_index("QA_equal_infinity"); + symbol_exprt qvar_inf=fresh_univ_index("QA_equal_infinity", index_type); equal_exprt meq(magnitude[qvar_inf], infinity_string[qvar_inf]); string_constraintt a7(qvar_inf, infinity_string.length(), isinf, meq); axioms.push_back(a7); @@ -171,12 +176,12 @@ string_exprt string_constraint_generatort::add_axioms_from_float( // If m is zero, it is represented by the characters "0.0"; thus, negative // zero produces the result "-0.0" and positive zero produces "0.0". - string_exprt zero_string=add_axioms_for_constant("0.0"); + string_exprt zero_string=add_axioms_for_constant("0.0", ref_type); exprt iszero=float_bvt().is_zero(f, fspec); implies_exprt a8(iszero, magnitude.axiom_for_has_same_length_as(zero_string)); axioms.push_back(a8); - symbol_exprt qvar_zero=fresh_univ_index("QA_equal_zero"); + symbol_exprt qvar_zero=fresh_univ_index("QA_equal_zero", index_type); equal_exprt eq_zero(magnitude[qvar_zero], zero_string[qvar_zero]); string_constraintt a9(qvar_zero, zero_string.length(), iszero, eq_zero); axioms.push_back(a9); @@ -200,7 +205,8 @@ Function: string_constraint_generatort::add_axioms_from_bool string_exprt string_constraint_generatort::add_axioms_from_bool( const function_application_exprt &f) { - return add_axioms_from_bool(args(f, 1)[0]); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_bool(args(f, 1)[0], ref_type); } @@ -218,17 +224,17 @@ Function: string_constraint_generatort::add_axioms_from_bool \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_bool( - const exprt &b) + const exprt &b, const refined_string_typet &ref_type) { - typet char_type=get_char_type(); - string_exprt res(char_type); + string_exprt res=fresh_string(ref_type); + const typet &index_type=ref_type.get_index_type(); assert(b.type()==bool_typet() || b.type().id()==ID_c_bool); typecast_exprt eq(b, bool_typet()); - string_exprt true_string=add_axioms_for_constant("true"); - string_exprt false_string=add_axioms_for_constant("false"); + string_exprt true_string=add_axioms_for_constant("true", ref_type); + string_exprt false_string=add_axioms_for_constant("false", ref_type); // We add axioms: // a1 : eq => res = |"true"| @@ -238,7 +244,7 @@ string_exprt string_constraint_generatort::add_axioms_from_bool( implies_exprt a1(eq, res.axiom_for_has_same_length_as(true_string)); axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_equal_true"); + symbol_exprt qvar=fresh_univ_index("QA_equal_true", index_type); string_constraintt a2( qvar, true_string.length(), @@ -250,7 +256,7 @@ string_exprt string_constraint_generatort::add_axioms_from_bool( not_exprt(eq), res.axiom_for_has_same_length_as(false_string)); axioms.push_back(a3); - symbol_exprt qvar1=fresh_univ_index("QA_equal_false"); + symbol_exprt qvar1=fresh_univ_index("QA_equal_false", index_type); string_constraintt a4( qvar, false_string.length(), @@ -294,17 +300,19 @@ Function: string_constraint_generatort::add_axioms_from_int \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_int( - const exprt &i, size_t max_size) + const exprt &i, size_t max_size, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); const typet &type=i.type(); assert(type.id()==ID_signedbv); exprt ten=from_integer(10, type); - exprt zero_char=constant_char('0'); - exprt nine_char=constant_char('9'); - exprt minus_char=constant_char('-'); - exprt zero=from_integer(0, get_index_type()); - exprt max=from_integer(max_size, get_index_type()); + const typet &char_type=ref_type.get_char_type(); + const typet &index_type=ref_type.get_index_type(); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt minus_char=constant_char('-', char_type); + exprt zero=from_integer(0, index_type); + exprt max=from_integer(max_size, index_type); // We add axioms: // a1 : 0 <|res|<=max_size @@ -371,7 +379,7 @@ string_exprt string_constraint_generatort::add_axioms_from_int( not_exprt(r0_zero)); axioms.push_back(a5); - exprt one=from_integer(1, get_index_type()); + exprt one=from_integer(1, index_type); equal_exprt r1_zero(res[one], zero_char); implies_exprt a6( and_exprt(premise, starts_with_minus), @@ -404,14 +412,14 @@ Function: string_constraint_generatort::int_of_hex_char \*******************************************************************/ -exprt string_constraint_generatort::int_of_hex_char(exprt chr) const +exprt string_constraint_generatort::int_of_hex_char(const exprt &chr) const { - exprt zero_char=constant_char('0'); - exprt nine_char=constant_char('9'); - exprt a_char=constant_char('a'); + exprt zero_char=constant_char('0', chr.type()); + exprt nine_char=constant_char('9', chr.type()); + exprt a_char=constant_char('a', chr.type()); return if_exprt( binary_relation_exprt(chr, ID_gt, nine_char), - plus_exprt(constant_char(10), minus_exprt(chr, a_char)), + plus_exprt(constant_char(10, chr.type()), minus_exprt(chr, a_char)), minus_exprt(chr, zero_char)); } @@ -429,17 +437,19 @@ Function: string_constraint_generatort::add_axioms_from_int_hex \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_int_hex( - const exprt &i) + const exprt &i, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); const typet &type=i.type(); assert(type.id()==ID_signedbv); - exprt sixteen=from_integer(16, type); - exprt minus_char=constant_char('-'); - exprt zero_char=constant_char('0'); - exprt nine_char=constant_char('9'); - exprt a_char=constant_char('a'); - exprt f_char=constant_char('f'); + const typet &index_type=ref_type.get_index_type(); + const typet &char_type=ref_type.get_char_type(); + exprt sixteen=from_integer(16, index_type); + exprt minus_char=constant_char('-', char_type); + exprt zero_char=constant_char('0', char_type); + exprt nine_char=constant_char('9', char_type); + exprt a_char=constant_char('a', char_type); + exprt f_char=constant_char('f', char_type); size_t max_size=8; axioms.push_back( @@ -494,7 +504,8 @@ Function: string_constraint_generatort::add_axioms_for_int_hex string_exprt string_constraint_generatort::add_axioms_from_int_hex( const function_application_exprt &f) { - return add_axioms_from_int_hex(args(f, 1)[0]); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_int_hex(args(f, 1)[0], ref_type); } /*******************************************************************\ @@ -512,7 +523,8 @@ Function: string_constraint_generatort::add_axioms_from_char string_exprt string_constraint_generatort::add_axioms_from_char( const function_application_exprt &f) { - return add_axioms_from_char(args(f, 1)[0]); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_char(args(f, 1)[0], ref_type); } /*******************************************************************\ @@ -523,14 +535,15 @@ Function: string_constraint_generatort::add_axioms_from_char Outputs: a new string expression - Purpose: add axioms stating that the returned string as length 1 and + Purpose: add axioms stating that the returned string has length 1 and the character it contains correspond to the input expression \*******************************************************************/ -string_exprt string_constraint_generatort::add_axioms_from_char(const exprt &c) +string_exprt string_constraint_generatort::add_axioms_from_char( + const exprt &c, const refined_string_typet &ref_type) { - string_exprt res(get_char_type()); + string_exprt res=fresh_string(ref_type); and_exprt lemma(equal_exprt(res[0], c), res.axiom_for_has_length(1)); axioms.push_back(lemma); return res; @@ -555,7 +568,8 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( const function_application_exprt::argumentst &args=f.arguments(); if(args.size()==3) { - string_exprt res(get_char_type()); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + string_exprt res=fresh_string(ref_type); exprt char_array=args[0]; exprt offset=args[1]; exprt count=args[2]; @@ -567,7 +581,8 @@ string_exprt string_constraint_generatort::add_axioms_for_value_of( string_exprt str=add_axioms_for_java_char_array(char_array); axioms.push_back(res.axiom_for_has_length(count)); - symbol_exprt idx=fresh_univ_index("QA_index_value_of"); + symbol_exprt idx=fresh_univ_index( + "QA_index_value_of", ref_type.get_index_type()); equal_exprt eq(str[plus_exprt(idx, offset)], res[idx]); string_constraintt a2(idx, count, eq); axioms.push_back(a2); @@ -596,12 +611,13 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); - typet type=f.type(); - symbol_exprt i=string_exprt::fresh_symbol("parsed_int", type); - - exprt zero_char=constant_char('0'); - exprt minus_char=constant_char('-'); - exprt plus_char=constant_char('+'); + const typet &type=f.type(); + symbol_exprt i=fresh_symbol("parsed_int", type); + const refined_string_typet &ref_type=to_refined_string_type(str.type()); + const typet &char_type=ref_type.get_char_type(); + exprt zero_char=constant_char('0', char_type); + exprt minus_char=constant_char('-', char_type); + exprt plus_char=constant_char('+', char_type); assert(type.id()==ID_signedbv); exprt ten=from_integer(10, type); diff --git a/src/solvers/refinement/string_expr.cpp b/src/solvers/refinement/string_expr.cpp deleted file mode 100644 index 0c284621d8d..00000000000 --- a/src/solvers/refinement/string_expr.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/*******************************************************************\ - -Module: String expressions for the string solver - -Author: Romain Brenguier, romain.brenguier@diffblue.com - -\*******************************************************************/ - -#include - -unsigned string_exprt::next_symbol_id=1; - -/*******************************************************************\ - -Function: string_exprt::fresh_symbol - - Inputs: a prefix and a type - - Outputs: a symbol of type tp whose name starts with - "string_refinement#" followed by prefix - - Purpose: generate a new symbol expression of the given type with some prefix - -\*******************************************************************/ - -symbol_exprt string_exprt::fresh_symbol( - const irep_idt &prefix, const typet &type) -{ - std::ostringstream buf; - buf << "string_refinement#" << prefix << "#" << (next_symbol_id++); - irep_idt name(buf.str()); - return symbol_exprt(name, type); -} - -/*******************************************************************\ - -Constructor: string_exprt - - Inputs: a type for characters - - Purpose: construct a string expression whose length and content are new - variables - -\*******************************************************************/ - -string_exprt::string_exprt(typet char_type): - struct_exprt(refined_string_typet(char_type)) -{ - refined_string_typet t(char_type); - symbol_exprt length= - fresh_symbol("string_length", refined_string_typet::index_type()); - symbol_exprt content=fresh_symbol("string_content", t.get_content_type()); - move_to_operands(length, content); -} - diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index cb406cbacb2..16b89bc43c8 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -10,11 +10,11 @@ Author: Alberto Griggio, alberto.griggio@gmail.com \*******************************************************************/ +#include #include #include #include #include -#include #include #include @@ -254,7 +254,7 @@ Function: string_refinementt::dec_solve Outputs: result of the decision procedure - Purpose: use a refinement loop to instantiate universal axioms, + Purpose: use a refinement loop to instantiate universal axioms, call the sat solver, and instantiate more indexes if needed. \*******************************************************************/ @@ -271,11 +271,11 @@ decision_proceduret::resultt string_refinementt::dec_solve() { string_not_contains_constraintt nc_axiom= to_string_not_contains_constraint(axiom); - array_typet witness_type - (refined_string_typet::index_type(), - infinity_exprt(refined_string_typet::index_type())); + refined_string_typet rtype=to_refined_string_type(nc_axiom.s0().type()); + const typet &index_type=rtype.get_index_type(); + array_typet witness_type(index_type, infinity_exprt(index_type)); generator.witness[nc_axiom]= - string_exprt::fresh_symbol("not_contains_witness", witness_type); + generator.fresh_symbol("not_contains_witness", witness_type); not_contains_axioms.push_back(nc_axiom); } else @@ -464,23 +464,20 @@ Function: string_refinementt::get_array exprt string_refinementt::get_array(const exprt &arr, const exprt &size) { exprt val=get(arr); - typet chart; - if(arr.type().subtype()==generator.get_char_type()) - chart=generator.get_char_type(); - else - assert(false); + typet char_type=arr.type().subtype(); + typet index_type=size.type(); if(val.id()=="array-list") { - array_typet ret_type(chart, infinity_exprt(generator.get_index_type())); - exprt ret=array_of_exprt(generator.constant_char(0), ret_type); + array_typet ret_type(char_type, infinity_exprt(index_type)); + exprt ret=array_of_exprt(from_integer(0, char_type), ret_type); for(size_t i=0; i &m, bool negated) const { - exprt sum=from_integer(0, generator.get_index_type()); + exprt sum=nil_exprt(); mp_integer constants=0; + typet index_type; + if(m.empty()) + return nil_exprt(); + else + index_type=m.begin()->first.type(); for(const auto &term : m) { @@ -705,14 +707,14 @@ exprt string_refinementt::sum_over_map( switch(second) { case -1: - if(sum==from_integer(0, generator.get_index_type())) + if(sum.is_nil()) sum=unary_minus_exprt(t); else sum=minus_exprt(sum, t); break; case 1: - if(sum==from_integer(0, generator.get_index_type())) + if(sum.is_nil()) sum=t; else sum=plus_exprt(sum, t); @@ -733,8 +735,11 @@ exprt string_refinementt::sum_over_map( } } - exprt index_const=from_integer(constants, generator.get_index_type()); - return plus_exprt(sum, index_const); + exprt index_const=from_integer(constants, index_type); + if(sum.is_not_nil()) + return plus_exprt(sum, index_const); + else + return index_const; } /*******************************************************************\ @@ -910,7 +915,7 @@ void string_refinementt::initial_index_set(const string_constraintt &axiom) exprt e(i); minus_exprt kminus1( axiom.upper_bound(), - from_integer(1, generator.get_index_type())); + from_integer(1, axiom.upper_bound().type())); replace_expr(qvar, kminus1, e); current_index_set[s].insert(e); index_set[s].insert(e); @@ -1041,7 +1046,7 @@ exprt string_refinementt::instantiate( exprt bounds=and_exprt( axiom.univ_within_bounds(), binary_relation_exprt( - from_integer(0, generator.get_index_type()), ID_le, val)); + from_integer(0, val.type()), ID_le, val)); replace_expr(axiom.univ_var(), r, bounds); return implies_exprt(bounds, instance); } @@ -1077,7 +1082,7 @@ void string_refinementt::instantiate_not_contains( new_lemmas.push_back(lemma); // we put bounds on the witnesses: // 0 <= v <= |s0| - |s1| ==> 0 <= v+w[v] < |s0| && 0 <= w[v] < |s1| - exprt zero=from_integer(0, generator.get_index_type()); + exprt zero=from_integer(0, val.type()); binary_relation_exprt c1(zero, ID_le, plus_exprt(val, witness)); binary_relation_exprt c2 (to_string_expr(s0).length(), ID_gt, plus_exprt(val, witness)); diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index aac4eed6dab..be0cd332bc3 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -13,11 +13,8 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #ifndef CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H #define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H -#include - -#include +#include #include -#include #include // Defines a limit on the string witnesses we will output. diff --git a/src/util/language.h b/src/util/language.h index edd83471cd5..b19615496ec 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -49,6 +49,15 @@ class languaget:public messaget virtual void modules_provided(std::set &modules) { } + // add lazy functions provided to set + + virtual void lazy_methods_provided(std::set &methods) const + { } + + // populate a lazy method + virtual void convert_lazy_method(const irep_idt &id, symbol_tablet &) + { } + // final adjustments, e.g., initialization and call to main() virtual bool final( diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index dffad975c45..46f51564b54 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -65,6 +65,13 @@ void language_filet::get_modules() language->modules_provided(modules); } +void language_filet::convert_lazy_method( + const irep_idt &id, + symbol_tablet &symbol_table) +{ + language->convert_lazy_method(id, symbol_table); +} + /*******************************************************************\ Function: language_filest::show_parse @@ -79,8 +86,8 @@ Function: language_filest::show_parse void language_filest::show_parse(std::ostream &out) { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) it->second.language->show_parse(out); } @@ -98,8 +105,8 @@ Function: language_filest::parse bool language_filest::parse() { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { // open file @@ -145,8 +152,8 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) { // typecheck interfaces - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.language->interfaces(symbol_table)) return true; @@ -156,8 +163,8 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) unsigned collision_counter=0; - for(filemapt::iterator fm_it=filemap.begin(); - fm_it!=filemap.end(); fm_it++) + for(file_mapt::iterator fm_it=file_map.begin(); + fm_it!=file_map.end(); fm_it++) { const language_filet::modulest &modules= fm_it->second.modules; @@ -170,7 +177,7 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) // these may collide, and then get renamed std::string module_name=*mo_it; - while(modulemap.find(module_name)!=modulemap.end()) + while(module_map.find(module_name)!=module_map.end()) { module_name=*mo_it+"#"+std::to_string(collision_counter); collision_counter++; @@ -179,25 +186,34 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) language_modulet module; module.file=&fm_it->second; module.name=module_name; - modulemap.insert( + module_map.insert( std::pair(module.name, module)); } } // typecheck files - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.modules.empty()) + { if(it->second.language->typecheck(symbol_table, "")) return true; + // register lazy methods. + // TODO: learn about modules and generalise this + // to module-providing languages if required. + std::set lazy_method_ids; + it->second.language->lazy_methods_provided(lazy_method_ids); + for(const auto &id : lazy_method_ids) + lazy_method_map[id]=&it->second; + } } // typecheck modules - for(modulemapt::iterator it=modulemap.begin(); - it!=modulemap.end(); it++) + for(module_mapt::iterator it=module_map.begin(); + it!=module_map.end(); it++) { if(typecheck_module(symbol_table, it->second)) return true; @@ -223,8 +239,8 @@ bool language_filest::final( { std::set languages; - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(languages.insert(it->second.language->id()).second) if(it->second.language->final(symbol_table)) @@ -249,8 +265,8 @@ Function: language_filest::interfaces bool language_filest::interfaces( symbol_tablet &symbol_table) { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.language->interfaces(symbol_table)) return true; @@ -277,9 +293,9 @@ bool language_filest::typecheck_module( { // check module map - modulemapt::iterator it=modulemap.find(module); + module_mapt::iterator it=module_map.find(module); - if(it==modulemap.end()) + if(it==module_map.end()) { error() << "found no file that provides module " << module << eom; return true; diff --git a/src/util/language_file.h b/src/util/language_file.h index 533e8d5240e..d3184d48697 100644 --- a/src/util/language_file.h +++ b/src/util/language_file.h @@ -42,6 +42,10 @@ class language_filet void get_modules(); + void convert_lazy_method( + const irep_idt &id, + symbol_tablet &symbol_table); + language_filet(const language_filet &rhs); language_filet():language(NULL) @@ -54,15 +58,21 @@ class language_filet class language_filest:public messaget { public: - typedef std::map filemapt; - filemapt filemap; + typedef std::map file_mapt; + file_mapt file_map; + + // Contains pointers into file_mapt! + typedef std::map module_mapt; + module_mapt module_map; - typedef std::map modulemapt; - modulemapt modulemap; + // Contains pointers into filemapt! + // This is safe-ish as long as this is std::map. + typedef std::map lazy_method_mapt; + lazy_method_mapt lazy_method_map; void clear_files() { - filemap.clear(); + file_map.clear(); } bool parse(); @@ -75,10 +85,26 @@ class language_filest:public messaget bool interfaces(symbol_tablet &symbol_table); + bool has_lazy_method(const irep_idt &id) + { + return lazy_method_map.count(id); + } + + // The method must have been added to the symbol table and registered + // in lazy_method_map (currently always in language_filest::typecheck) + // for this to be legal. + void convert_lazy_method( + const irep_idt &id, + symbol_tablet &symbol_table) + { + return lazy_method_map.at(id)->convert_lazy_method(id, symbol_table); + } + void clear() { - filemap.clear(); - modulemap.clear(); + file_map.clear(); + module_map.clear(); + lazy_method_map.clear(); } protected: diff --git a/src/util/safe_pointer.h b/src/util/safe_pointer.h new file mode 100644 index 00000000000..47e52ac13fe --- /dev/null +++ b/src/util/safe_pointer.h @@ -0,0 +1,64 @@ +/*******************************************************************\ + +Module: Simple checked pointers + +Author: Chris Smowton, chris@smowton.net + +\*******************************************************************/ + +#ifndef CPROVER_UTIL_SAFE_POINTER_H +#define CPROVER_UTIL_SAFE_POINTER_H + +template class safe_pointer +{ + public: + operator bool() const + { + return !!ptr; + } + + T *get() const + { + assert(ptr && "dereferenced a null safe pointer"); + return ptr; + } + + T &operator*() const + { + return *get(); + } + + T *operator->() const + { + return get(); + } + + static safe_pointer create_null() + { + return safe_pointer(); + } + + static safe_pointer create_non_null( + T *target) + { + assert(target && "initialized safe pointer with null"); + return safe_pointer(target); + } + + static safe_pointer create_maybe_null( + T *target) + { + return safe_pointer(target); + } + + protected: + T *ptr; + + explicit safe_pointer(T *target) : ptr(target) + {} + + safe_pointer() : ptr(nullptr) + {} +}; + +#endif diff --git a/src/solvers/refinement/string_expr.h b/src/util/string_expr.h similarity index 71% rename from src/solvers/refinement/string_expr.h rename to src/util/string_expr.h index 29d6b12eae7..317e3c1ee6e 100644 --- a/src/solvers/refinement/string_expr.h +++ b/src/util/string_expr.h @@ -6,29 +6,27 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ -#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H -#define CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H +#ifndef CPROVER_UTIL_STRING_EXPR_H +#define CPROVER_UTIL_STRING_EXPR_H -#include +#include +#include -#include -#include -#include - - -// Expressions that encode strings class string_exprt: public struct_exprt { public: - // Initialize string from the type of characters - explicit string_exprt(typet char_type); + string_exprt(): struct_exprt() {} - // Default uses C character type - string_exprt() : string_exprt(char_type()) {} + explicit string_exprt(typet type): struct_exprt(type) + { + operands().resize(2); + } - // Generate a new symbol of the given type with a prefix - static symbol_exprt fresh_symbol( - const irep_idt &prefix, const typet &type=bool_typet()); + string_exprt(const exprt &_length, const exprt &_content, typet type): + struct_exprt(type) + { + copy_to_operands(_length, _content); + } // Expression corresponding to the length of the string const exprt &length() const { return op0(); } @@ -38,12 +36,6 @@ class string_exprt: public struct_exprt const exprt &content() const { return op1(); } exprt &content() { return op1(); } - // Type of the expression as a refined string type - const refined_string_typet &refined_type() const - { - return to_refined_string_type(type()); - } - static exprt within_bounds(const exprt &idx, const exprt &bound); // Expression of the character at position idx in the string @@ -54,7 +46,7 @@ class string_exprt: public struct_exprt index_exprt operator[] (int i) const { - return index_exprt(content(), refined_type().index_of_int(i)); + return index_exprt(content(), from_integer(i, length().type())); } // Comparison on the length of the strings @@ -84,7 +76,7 @@ class string_exprt: public struct_exprt binary_relation_exprt axiom_for_is_strictly_longer_than(int i) const { - return axiom_for_is_strictly_longer_than(refined_type().index_of_int(i)); + return axiom_for_is_strictly_longer_than(from_integer(i, length().type())); } binary_relation_exprt axiom_for_is_shorter_than( @@ -101,7 +93,7 @@ class string_exprt: public struct_exprt binary_relation_exprt axiom_for_is_shorter_than(int i) const { - return axiom_for_is_shorter_than(refined_type().index_of_int(i)); + return axiom_for_is_shorter_than(from_integer(i, length().type())); } binary_relation_exprt axiom_for_is_strictly_shorter_than( @@ -129,22 +121,24 @@ class string_exprt: public struct_exprt equal_exprt axiom_for_has_length(int i) const { - return axiom_for_has_length(refined_type().index_of_int(i)); + return axiom_for_has_length(from_integer(i, length().type())); } - static irep_idt extract_java_string(const symbol_exprt &s); - - static unsigned next_symbol_id; - friend inline string_exprt &to_string_expr(exprt &expr); }; - inline string_exprt &to_string_expr(exprt &expr) { assert(expr.id()==ID_struct); + assert(expr.operands().size()==2); return static_cast(expr); } +inline const string_exprt &to_string_expr(const exprt &expr) +{ + assert(expr.id()==ID_struct); + assert(expr.operands().size()==2); + return static_cast(expr); +} #endif diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index 82acd36d4ee..1e280783aff 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -7,6 +7,10 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ #include +#include +#include +#include +#include #include "unicode.h" @@ -258,3 +262,79 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide) return argv_narrow; } + +/*******************************************************************\ + +Function: utf8_to_utf16_big_endian + + Inputs: String in UTF-8 format + + Outputs: String in UTF-16BE format + + Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+ + +\*******************************************************************/ + +std::wstring utf8_to_utf16_big_endian(const std::string& in) +{ + std::wstring_convert > converter; + return converter.from_bytes(in); +} + +/*******************************************************************\ + +Function: utf8_to_utf16_little_endian + + Inputs: String in UTF-8 format + + Outputs: String in UTF-16LE format + + Purpose: Note this requires g++-5 libstdc++ / libc++ / MSVC2010+ + +\*******************************************************************/ + +std::wstring utf8_to_utf16_little_endian(const std::string& in) +{ + const std::codecvt_mode mode=std::codecvt_mode::little_endian; + + // default largest value codecvt_utf8_utf16 reads without error is 0x10ffff + // see: http://en.cppreference.com/w/cpp/locale/codecvt_utf8_utf16 + const unsigned long maxcode=0x10ffff; + + typedef std::codecvt_utf8_utf16 codecvt_utf8_utf16t; + std::wstring_convert converter; + return converter.from_bytes(in); +} + +/*******************************************************************\ + +Function: utf16_little_endian_to_ascii + + Inputs: String in UTF-16LE format + + Outputs: String in US-ASCII format, with \uxxxx escapes for other + characters + + Purpose: + +\*******************************************************************/ + +std::string utf16_little_endian_to_ascii(const std::wstring& in) +{ + std::ostringstream result; + std::locale loc; + for(const auto c : in) + { + if(c<=255 && isprint(c, loc)) + result << (unsigned char)c; + else + { + result << "\\u" + << std::hex + << std::setw(4) + << std::setfill('0') + << (unsigned int)c; + } + } + return result.str(); +} diff --git a/src/util/unicode.h b/src/util/unicode.h index edad95039f0..c4bcab617d4 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -22,6 +22,10 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); std::string utf16_to_utf8(const std::basic_string &s); +std::wstring utf8_to_utf16_big_endian(const std::string &); +std::wstring utf8_to_utf16_little_endian(const std::string &); +std::string utf16_little_endian_to_ascii(const std::wstring &in); + const char **narrow_argv(int argc, const wchar_t **argv_wide); #endif // CPROVER_UTIL_UNICODE_H