From 68a8acc7538174f47b4199f792ec8438831655bb Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 27 Oct 2016 15:19:58 +0100 Subject: [PATCH 1/3] Add basic jsr / ret implementation. Fixes #280 jsr is a simple lightweight call-subroutine opcode (simply pushes a return address) and ret is the corresponding return, performing call/return within a compiled function. They are deprecated, but Java 5 compilers generate them and library JARs built to that standard are evidently still around. This implementation has a significant shortcoming: it considers the possibility of ret'ing to any jsr-site. This could be improved if we use basic dataflow analysis to determine which jsr targets can reach which ret instructions. --- .../java_bytecode_convert_method.cpp | 86 ++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 49b621aed89..dbe219d2b41 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -543,6 +543,9 @@ codet java_bytecode_convert_methodt::convert_instructions( address_mapt address_map; std::set targets; + std::vector jsr_ret_targets; + std::vector ret_instructions; + for(instructionst::const_iterator i_it=instructions.begin(); i_it!=instructions.end(); @@ -585,6 +588,15 @@ codet java_bytecode_convert_methodt::convert_instructions( targets.insert(target); a_entry.first->second.successors.push_back(target); + + if(i_it->statement=="jsr" || + i_it->statement=="jsr_w") + { + instructionst::const_iterator next=i_it; + assert(++next!=instructions.end() && "jsr without valid return address?"); + targets.insert(next->address); + jsr_ret_targets.push_back(next->address); + } } else if(i_it->statement=="tableswitch" || i_it->statement=="lookupswitch") @@ -604,6 +616,23 @@ codet java_bytecode_convert_methodt::convert_instructions( } } } + else if(i_it->statement=="ret") + { + // Finish these later, once we've seen all jsr instructions. + ret_instructions.push_back(i_it); + } + } + + // Draw edges from every `ret` to every `jsr` successor. + // Could do better with flow analysis to distinguish multiple subroutines within + // the same function. + for(const auto retinst : ret_instructions) + { + auto& a_entry=address_map.at(retinst->address); + a_entry.successors.insert( + a_entry.successors.end(), + jsr_ret_targets.begin(), + jsr_ret_targets.end()); } for(address_mapt::iterator @@ -925,6 +954,48 @@ codet java_bytecode_convert_methodt::convert_instructions( code_gotot code_goto(label(number)); c=code_goto; } + else if(statement=="jsr" || statement=="jsr_w") + { + // As 'goto', except we must also push the subroutine return address: + assert(op.empty() && results.size()==1); + irep_idt number=to_constant_expr(arg0).get_value(); + code_gotot code_goto(label(number)); + c=code_goto; + results[0]=as_number( + std::next(i_it)->address, + pointer_typet(void_typet(), 64)); + } + else if(statement=="ret") + { + // Since we have a bounded target set, make life easier on our analyses + // and write something like: + // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ... + assert(op.empty() && results.empty()); + code_blockt branches; + auto retvar=variable(arg0, 'a', i_it->address, INST_INDEX, NO_CAST); + assert(!jsr_ret_targets.empty()); + for(size_t idx=0, idxlim=jsr_ret_targets.size(); idx!=idxlim; ++idx) + { + irep_idt number=std::to_string(jsr_ret_targets[idx]); + code_gotot g(label(number)); + g.add_source_location()=i_it->source_location; + if(idx==idxlim-1) + branches.move_to_operands(g); + else + { + code_ifthenelset branch; + auto address_ptr=as_number( + jsr_ret_targets[idx], + pointer_typet(void_typet(), 64)); + branch.cond()=equal_exprt(retvar, address_ptr); + branch.cond().add_source_location()=i_it->source_location; + branch.then_case()=g; + branch.add_source_location()=i_it->source_location; + branches.move_to_operands(branch); + } + } + c=std::move(branches); + } else if(statement=="iconst_m1") { assert(results.size()==1); @@ -1530,8 +1601,19 @@ codet java_bytecode_convert_methodt::convert_instructions( else { c.make_block(); - forall_operands(o_it, more_code) - c.copy_to_operands(*o_it); + auto& last_statement=to_code_block(c).find_last_statement(); + if(last_statement.get_statement()==ID_goto) + { + // Insert stack twiddling before branch: + last_statement.make_block(); + last_statement.operands().insert( + last_statement.operands().begin(), + more_code.operands().begin(), + more_code.operands().end()); + } + else + forall_operands(o_it, more_code) + c.copy_to_operands(*o_it); } } From c90295b8de3b37612bab83d0e7ebe2037ebdb8ed Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 10 Jan 2017 14:01:01 +0000 Subject: [PATCH 2/3] Add basic test for understanding jsr instruction --- .../java/util/concurrent/ConcurrentHashMap.class | Bin 0 -> 9606 bytes regression/cbmc-java/jsr1/test.desc | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 regression/cbmc-java/jsr1/edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class create mode 100644 regression/cbmc-java/jsr1/test.desc diff --git a/regression/cbmc-java/jsr1/edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class b/regression/cbmc-java/jsr1/edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class new file mode 100644 index 0000000000000000000000000000000000000000..3ada3700fc1d191d20f89bc7ab0cff84fdd13b29 GIT binary patch literal 9606 zcmb_i34B!bo&Wyddoyol@_-OBsYy5j1_((YTn0HrV+b;k1eX8`s9{J3h$b_1X2P{r zD+O`aYHO=)u@x(=1zmQP$|)Y}v39pgZM9adc-yK~>#f~xx9s=#e=jphjJs^3eEi@4 z-+jIR_ZexP^_}qklMBC<<~biB;BqdT(u8mv&&#D3qAH6~at>9Kv7l-@?mZgO~%-`xga{ zl#<2>DN|Xt@)3jb%fz--z*a$9g>03h5U7i%lbf1iX$2<|kH?a=?a@>!mQwI9jkb5{ z!Gf3^u$7}QKp&5!W65Ybk+jtSg~8_~+tRVcYmPw&vWL+Q{N%ycaR4)LexjU1#bS*34<1TT{Du z;UaF&RVeJ*sa@DuyJXR#y2jdN&GmIl>*^J98*0vvG%RW82GQYCb5q?p4Rwu+n+SJN z-Qq=&x~Ar{>*g+8RM%X;uy%oB^Pu%RhwvUkF}&Ja~`(V5;VKA$hZi*+C*ziV7A3$ zjh!8Rwp3lmIw68rmn#?@ z1Z}LKUVI%bsZQRpT$J4AqN2H(dfC^qHr67uHR9-FJdEwp_?oJPt1hOlXO44;=)T0` zv>{QgHlUtcIb=K?ZHuSq*$UylAd`(wX4(HU>0 zX?9&8@^zi0_4Ga=^ZJ4w<**AvLtlOv5>+R50u_Gk<5Oh2hReK1HUfkb<&C&xfMw!s_vM5MID!DXHh zbdYcz-MEC#aY~Pk9=>g?=e{8^-lC@274|q=7w_zlK<@^EK&(9`F`CD6Hb}^KEk|L5 z$5UIvGp2=|X$re3os300v^CfF+doS)X@MFr)6rFoC0{_e>;aX5N@0HgNzKM!MOh7X zmD#s&=n-`rTVm@(#3@@1;+#Tj?UL;9-ja>PdkVI=7x3=;wqm*qH6=QeEwQ<65~B-h zd*_P@;@J)^#w8Bg(ClEXG=Gon3Zt@4A7MIJ6K$_auAx1|(;12#HCW{-%=@CUmSEl{ z&84;);^3RO&Q|%33abJKKgLy#`Vy&Ar$|$X?T#wKc3TxYYACLA@MR<&Y`{hbH>hC_ zcBtVB!?QdzcDA>ln`n!(a>>$<@8d=Xx2O>g;@D@a5=V_xqil7mqe}60M~zlvnAtTD z?NyRtb%=1(ST)8`Wo&Q@b~(6RjdQS9l}p3Xog@$KU_d^n-@;*lr8iS7s=`4644nOk zsbfb%I)^Zio`WN*%E3;K z)My;F)kH^467!$mZ_Jt4;;6}Lii3y6n9k}iScR&S0l}=7PO?U!#VX7?nHx;JpDio( z^?dCB3}8*V9BVWi*NSvCDPDupaMKgq#F$=jL5=F98oCA z1{(eS)R;)mm4c?*5lPuK$z+t(QX|X1FCckk^UQ!AAxdFjmYfEDM=)b{L^rbPDU0YL z-q%9f=&XT`L~E@(wAvF=ANo7v?jSb{p(_~$)>05|`QKON!@|Dz9j5AnKI)x_l&&-S z+W0Jl<$Xf!#;=Y#fi~}hR1kJAgSzxaO7gNY>RM%CMxGjD8z~DbuDkrM>piPoKkH^) z=mt3O3a`FOxyWZZ=#znyMGr{ z$YTFhDGPMZ+^c)0boN8}fWs1o6r?yyz~Ts)R8*8ZXyf2xU)i#9CWoO*x3KeX)Dyp+ z_%;^h4&1m>ujF33WiKxoNV6@1pY1NbeEkS{ET2xnt$eNe3Cgh>o!oI;1X%Bp$I{6| zUI&vffWhVap^DiUm$ET0n|vo1UEqFw`A!X}z&(sj*vx%iBOfAJC54;|Qf(nILGJA- zKLo2Z=Z2lgDgHj}VK*V?D16Hf!GHg-Jp0G+=h?H(YRjtjg?+YTxb? z@0aJ6gss*CFv_{a-ei9mfdiy5FDO@ovx>vM_G*9FANH*+-Ua_wzp|?BupRdAf)ln0 z$KHM7L$f*%4hU=a?Lv|C;lNRZmWA!oL&#~`8c@4Wyt0q9x}2^?A&2u}84HkS)T78~ zM2WEwry7ee#%RJgV==0XC75h1#Z2RT%rTas-dK($#syemT!@Q|i_mH`qr+H zzt6xx5_~Is#vK&Mop6kOC@_9XW4#Nbj02d&`84Bh8tGBg8Ta5q<32QV95Wu!%+02; z4C4E=7@vjN80(G5ipQ{zh9^OcUzNBJ*Wg-e+F?Nb25-m%JipgmAsEFJL5M>5R0|3LVCS#pB{l6l_E$jQf$Y1VE8^~EO&tdi?*gkR=10bMTh9cUB^NROb; ztq9wb4yQRJR93cek< zD2xu8`v5@sDt;#UYy|UJ$^Byb4VAR|M(xft`YmR)&#ZQRpprH>vC_3I<|F_3uRak@Dk5px0-n^joKwF}HiXf`LK)trL3ITahs)3D8~##QDtT+OxX%$fL( zISbz-texhW*u&od?_htxtifTk77vG%O|qKgz_ zn|2_J4@9oBSU5_^;y^yU?X5XtHeHs?5g#IGRDy~+P!Td7SnWQ>X$T!m(UAYS=gKnhfo&>HD8)SWec2r3kKl=ZXVD&-5Fkr@*#c z;UFr`+kN7Awsdd>a}C1gwY2STV2XKNhN=?Ve=of?}PCG#_o^D|LW-b&plc{~2e<9t1F2U?wDa%k&hJT@_iRYBz z4vtLv>mW^&_}AWyQKmh)l>BdgaS#)-dT^!k0HyLEZSo=7I z*b|B3*XTYEQ4?7efe4m~^%c}X#ct%5AH}3){A!!L`~W6qxpb}VCECi%83b9dKE|z| z^7B?o;{@*N1HqK;2&M{x(}-Y7Zv-C`!6%=CU_WaWLBN;$J`hxQM=(tgoK6JQy%1RZ z>k%vGa}XTxI5>zy9)c;_V)@y06w?Ldj4U`EaILTg<$i|*GcrEh7l4@pU{-GcBl-Yf zFc95M)$n0JO=T~ihMVc3XP9RU^(>-A`k|8>=@#Mf`!Rclo6-v{TFazDi%ETkztH~( zMpP8~_v6fiI7<&|IFN<2XZZQiyH!&CF@h-A4QGZegF^fMtp-#Sfjp+H zBo!<9MIgPK-{sdrNZsOW9>v>#g zy?~YM+ju8oy@a&&GB#PSXerEN7M;wbaVzttAE%<)%bg|6vG?F!nri`?a3AicR{VTs zk#bcGcyq?zH#1s4K;1paRq^YGxc@NzjjcSg@J)lOz--F0HY>{lSC-bjm~#+yn*G;# zVs3*pjf?~Qju=ob5lG_x8(HTomv0{HE%N_I^8YS|vL9uAgt6AgsI)%8MC(&bvHpZJ ztUqi1Bs%%|@FNw0!EE5xu?8ynB_|o$6xuSp%qB07c)a*|{vrH~?Z5Mkf&V}~`{k^E zr}5Ei0c)74>>JomV&BMqA^UUL{~h~9>}RrXVn3bzV)jeeFQqW#kCV=0pqRt{eD-Dh KYo?$0gZ~3UxCn9p literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/jsr1/test.desc b/regression/cbmc-java/jsr1/test.desc new file mode 100644 index 00000000000..abcbe1427a1 --- /dev/null +++ b/regression/cbmc-java/jsr1/test.desc @@ -0,0 +1,8 @@ +CORE +edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class +--show-goto-functions +^EXIT=0$ +^SIGNAL=0$ +-- +\\"statement\\" (\\"jsr\\") +\\"statement\\" (\\"ret\\") From a4929357b0eaf65d8d07e78b0c26f09724aa8789 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 13 Jan 2017 15:33:19 +0000 Subject: [PATCH 3/3] Add a smaller jsr test This one is build from raw assembly and checks that the generated cfg (or at least, its nodes) matches expectations. --- regression/cbmc-java/jsr2/test.class | Bin 0 -> 199 bytes regression/cbmc-java/jsr2/test.desc | 13 +++++++++++++ regression/cbmc-java/jsr2/test.j | 28 +++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 regression/cbmc-java/jsr2/test.class create mode 100644 regression/cbmc-java/jsr2/test.desc create mode 100644 regression/cbmc-java/jsr2/test.j diff --git a/regression/cbmc-java/jsr2/test.class b/regression/cbmc-java/jsr2/test.class new file mode 100644 index 0000000000000000000000000000000000000000..326874579e6776659f0538142b2c3bdf81c44f7a GIT binary patch literal 199 zcmX|)y9&ZU5Jm6gwdNscZ)2wx$q$H)U?XTD*spPs4KV?Y{*p+^U52+{Qrz#;S!d6iKrvqQArB`Pim-X_1CVztnzA~;i>>0(B(j#6g()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static main()V + .limit locals 4 + .limit stack 1 + goto runsrs +sr1: + astore_0 + ret 0 +runsrs: + jsr sr1 + jsr sr2 + return +sr2: + astore_3 + ret 3 +.end method +