diff --git a/.ci/ci.yaml b/.ci/ci.yaml new file mode 100644 index 0000000..4590c89 --- /dev/null +++ b/.ci/ci.yaml @@ -0,0 +1,28 @@ +version: v2.0 + +on: + push: ["*"] + mr: ["*"] + +stages: + - name: build and test stage + jobs: + job1: + name: build and test job + runs-on: + pool-name: docker + container: + image: mirrors.tencent.com/rust-ci/rust:latest + steps: + - checkout: self + - run: | + cargo build + cargo test + name: cargo build and test + - run: | + rustup component add clippy + name: install clippy + - run: | + cargo clippy --all-targets -- -D warnings + name: run clippy + diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..340577b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +/install +/target +/.vscode diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f31d813 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/target +Cargo.lock +.vscode/ +*.so +*.dSYM +*.dylib +output/ +output_cov/ +install/ +core.* +*.data +*.log diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..df2c2dc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[workspace] +resolver = "2" +members = [ + 'hopper-core', + 'hopper-derive-impl', + 'hopper-derive', + 'hopper-compiler', + 'hopper-harness', +] + +# [patch.crates-io] +# bindgen = { path = "../rust-bindgen" } \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7e20875 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,39 @@ +FROM ubuntu:20.04 + +ENV HOPPER_BIN=/hopper/hopper \ + RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + PATH=/hopper:/usr/local/cargo/bin:/root/.cargo/bin:$PATH \ + DEBIAN_FRONTEND=noninteractive + +# RUN sed -i 's/archive.ubuntu.com/mirrors.ustc.edu.cn/g' /etc/apt/sources.list +# RUN sed -i 's/security.ubuntu.com/mirrors.ustc.edu.cn/g' /etc/apt/sources.list + +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get -y install build-essential wget curl cmake git unzip xxd protobuf-compiler libprotobuf-dev \ + && apt-get clean + +# ENV RUSTUP_DIST_SERVER="https://mirrors.ustc.edu.cn/rust-static" +# ENV RUSTUP_UPDATE_ROOT="https://mirrors.ustc.edu.cn/rust-static/rustup" + +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable + +# RUN echo '[source.crates-io]' > ${CARGO_HOME}/config && \ +# echo "replace-with = 'tencent'" >> ${CARGO_HOME}/config && \ +# echo '[source.tencent]' >> ${CARGO_HOME}/config && \ +# echo 'registry = "http://mirrors.tencent.com/rust/index"' >> ${CARGO_HOME}/config + +RUN mkdir -p /hopper +COPY . /hopper +WORKDIR /hopper + +RUN ./build.sh + +RUN mkdir /llvm +ENV PATH=/llvm/bin:$PATH +ENV LD_LIBRARY_PATH=/llvm/lib:$LD_LIBRARY_PATH + +RUN mkdir /fuzz_lib +RUN mkdir /fuzz +WORKDIR /fuzz diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..20bbb30 --- /dev/null +++ b/LICENSE @@ -0,0 +1,828 @@ +Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved. The below software in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) THL A29 Limited. + +Hopper is licensed under the Apache License Version 2.0 except for the third-party components listed below. + +Apache License + +Version 2.0, January 2004 + +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of this License; and + +You must cause any modified files to carry prominent notices stating that You changed the files; and + +You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + + + +Other dependencies and licenses: + + +Open Source Software Licensed under the Apache License Version 2.0: +The below software in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2023 THL A29 Limited. +-------------------------------------------------------------------- +1. AFL + Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved. +Source code of this software can be obtained from: https://lcamtuf.coredump.cx/afl/ +Please note this software has been modified by Tencent in this distribution. + +2. Angora +Copyright (c) Angora original author and authors +Source code of this software can be obtained from: https://github.com/AngoraFuzzer/Angora +Please note this software has been modified by Tencent in this distribution. + + +A copy of the Apache License Version 2.0 is included in this file. + + + +Open Source Software Licensed under the Apache 2.0 or MIT: +The below software in this distribution may have been modified by Tencent. +-------------------------------------------------------------------- +1. rand +Copyright 2018 Developers of the Rand project +Copyright (c) 2014 The Rust Project Developers + + +A copy of the Apache 2.0 license is included in this file. +A copy of the MIT license is included in this file. + +Copyrights in the Rand project are retained by their contributors. No +copyright assignment is required to contribute to the Rand project. + +For full authorship information, see the version control history. + +Except as otherwise noted (below and/or in individual files), Rand is +licensed under the Apache License, Version 2.0 or + or the MIT license + or , at your option. + +The Rand project includes code from the Rust project +published under these same licenses. + + + +Open Source Software Licensed under the MIT License: +The below software in this distribution may have been modified by Tencent. +-------------------------------------------------------------------- +1. lain +Copyright (c) Microsoft Corporation. All rights reserved. + + +Terms of the MIT License: +-------------------------------------------------------------------- +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + +Open Source Software Licensed under the GNU General Public License, version 3 and Other Licenses of the Third-Party Components therein: +-------------------------------------------------------------------- +1. e9patch +Copyright (C) 2022 National University of Singapore +Source code of this software can be obtained from: https://github.com/GJDuck/e9patch + + +Terms of the GNU General Public License, version 3: +-------------------------------------------------------------------- + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. + +For the license of other third party components, please refer to the following URL: +https://github.com/GJDuck/e9patch/blob/v1.0.0-rc7/LICENSE + + + +Open Source Software Licensed under the GNU General Public License, version 3 and Other Licenses of the Third-Party Components therein: +The below software in this distribution may have been modified by Tencent. +-------------------------------------------------------------------- +1. e9afl +Copyright (C) 2022 National University of Singapore +Source code of this software can be obtained from: https://github.com/GJDuck/e9afl + + +A copy of the GNU General Public License, version 3 is included in this file. + +For the license of other third party components, please refer to the following URL: +https://github.com/GJDuck/e9afl/blob/v0.9.0/LICENSE \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e81f76c --- /dev/null +++ b/README.md @@ -0,0 +1,277 @@ +# Hopper + +Hopper is an tool for generating fuzzing test cases for libraries automatically using **interpretative fuzzing**. It transforms the problem of library fuzzing into the problem of interpreter fuzzing, enabling exploration of a vast range of API usages for library fuzzing out of the box. +Some key features of Hopper include: +- Interpretative API invoking without any fuzz driver. +- Type-aware mutation for arguments. +- Automiac intra- and inter-API constraints leanring. +- Binary instrumentation support. + +To learn more about Hopper, check out our [paper](https://arxiv.org/pdf/2309.03496) at CCS '23. + +## Build Hopper +### Build Requirements +- Linux-amd64 (Tested on Ubuntu 20.04 and Debian Buster) +- Rust stable (>= 1.60), can be obtained using [rustup](https://rustup.rs/) +- Clang (>= 5.0, [Install Clang](https://rust-lang.github.io/rust-bindgen/requirements.html)), [rust-bindgen](https://rust-lang.github.io/rust-bindgen/) leverages libclang to preprocess, parse, and type check C and C++ header files. + +### Build Hopper itself +```sh +./build.sh +``` + +The script will create a `install` directory in hopper's root directory, then you can use `hopper`. + +### Using Docker +You can choose to use the Dockerfile, which build the requirements and Hopper. +``` +docker build -t hopper ./ +docker run --name hopper_dev --privileged -v /path-to-lib:/fuzz -it --rm hopper /bin/bash +``` + +## Compile library with Hopper +Take `csjon` for example ([More examples](./examples/)). +```sh +hopper compile --header ./cJSON.h --library ./libcjson.so --output output +``` + +Use `hopper compile --help` to see detailed usage. If the compiling reports errors about header file, refer to the usage of [rust-bindgen](https://rust-lang.github.io/rust-bindgen/), which we used for parsing header file. +You may wrap the header file with the missing definitions. +Hopper uses [E9Patch](https://github.com/GJDuck/e9patch) to instrument binaries by default. + +After running `compile`, you will find that it generates the following files in the output directory: +- `bin/hopper-fuzzer`: generates inputs, maintatins states, and use `harness` to excuted the inputs. +- `bin/hopper-harness`: executes the inputs. +- `bin/hopper-translate`: translates inputs to C source code. +- `bin/hopper-generator`: replays the generate process. +- `bin/hopper-sanitizer`: sanitize and minimize crashes. + +#### Header files +- If there are multiple header files, you can crate a new header file, and *include* all of them. +- If header files are compiled depending on specific envoironment variables. You can set it by : `BINDGEN_EXTRA_CLANG_ARGS`. +- If the header file includes API functions that you do not want to test, use `--func-pattern` to filter them while running the fuzzer. + +#### Environment variable for compiling +- `HOPPER_MAP_SIZE_POW2`: controls the size of coverage path. The defult value is 16, and it should be in the range of [16, 20]. e.g. `HOPPER_MAP_SIZE_POW2=18`. +- `HOPPER_INST_RATIO`: controls how likely a block will be chosen for instrumentation. The default value is 100, and it should be in the range of (0, 100]. e.g. `HOPPER_INST_RATIO=75`. +- `HOPPER_INCLUDE_SEARCH_PATH`: includes the search path of file in header files. e.g. `HOPPER_INCLUDE_SEARCH_PATH=../`. +- `HOPPER_FUNC_BLACKLIST`: includes function blacklists that hopper won't compile. `bindgen` will not generate code for the functions. e.g. `HOPPER_FUNC_BLACKLIST=f1,f2`. +- `HOPPER_TYPE_BLACKLIST`: includes type blacklists that hopper won't compile. `bindgen` will not generate code for the types. e.g. `HOPPER_TYPE_BLACKLIST=type1,type2`. +- `HOPPER_ITEM_BLACKLIST`: includes item(constants/variables) blacklists that hopper won't compile. `bindgen` will not generate code for the items. e.g. `HOPPER_ITEM_BLACKLIST=IPPORT_RESERVED` +- `HOPPER_CUSTOM_OPAQUE_LIST`: includes custom opaque types we defined. e.g. `HOPPER_CUSTOM_OPAQUE_LIST=type1`. + +#### Tips +- You can set the arguments and environment variables for compiling and running in a configuration file named `hopper.config`, see `examples/*` for details. + +- Reduce density: If density is larger than 20%, the IDs of edges is likely to have hash-collisions. We can a) increase `HOPPER_MAP_SIZE_POW2` or b) reduce `HOPPER_INST_RATIO`. + +- Multiple libraries: (1) merge the archives into one shared library, e.g. `gcc -shared -o c.so -Wl,--whole-archive a.a b.a -Wl,--no-whole-archive`; (2) pass all of them into hopper compiler by `--library a.so b.so`. + +## Fuzz Library with Hopper + +``` +hopper fuzz output --func-pattern cJSON_* +``` + +Use `hopper fuzz output --help` to see detailed usage. + +After running `fuzz`, it will generate following directories. +- `queue`: generated normal inputs. +- `hangs`: generated timeout inputs. +- `crashes`: generated crash inputs. +- `misc`: store some temporal files or stats. + +#### Environment variable for running +- `DISABLE_CALL_DET`: disables call's deterministic mutating. +- `DISABLE_GEN_FAIL`: disables generating programs for functions that have been failed to invoke. +- `HOPPER_SEED_DIR`: provides seeds for byte-like arguments (default: output/seeds if t exists). +- `HOPPER_DICT`: provides dictionary for byte-like arguments. The grammar is the same as AFL's. +- `HOPPER_API_INSENSITIVE_COV`: disables API-sensitive branch counting. +- `HOPPER_FAST_EXECUTE_LOOP`: number of programs excuted (in a loop) for each fork, set as 0 or 1 to break the loop. e.g. `HOPPER_FAST_EXECUTE_LOOP=10`. + +#### System configuration +Set system core dumps as AFL (on the host if you execute Hopper in a Docker container). +``` +echo core | sudo tee /proc/sys/kernel/core_pattern +``` + +### Function pattern +Hopper generates inputs for all functions in libiries by default. However, there are two ways to filter functions in Hopper: exlucding functions or including functions. This way, it can be focus on intersting functions. + +#### `--func-pattern` +``` +hopper fuzz output --func-pattern @cJSON_parse,!cJSON_InitHook,cJSON_* +``` + - The pattern can be a function name, e.g. `cJSON_parse`, or a simple pattern, e.g. `cJSON_*`. + - If you have multiple patterns, use `,` to join them, e.g `cJSON_*,HTTP_*`. + - You can use `@` prefix to limit the fuzzer to only fuzz specific function, while the others can be candidates that provding values for fields or arguments, e.g. `@cJSON_parse,cJSON_*`. + - `!` is used as prefix for excluding some specific functions, e.g `!cJSON_InitHook,cJSON_*`. + +#### `--custom-rules` +The patterns can be defined in the file passed by `--custom-rules`. + +```rust +// hopper fuzz output --custom-rules path-to-file +func_target cJSON_parse +func_exclude cJSON_InitHook +func_include cJSON_*,HTTP_* +``` + +### Constraints +Hopper infers both intra- and inter-API constraints to invoking the APIs correctlly. +The constraints are written in `output/misc/constraint.config`. You can remove the file to reset the constraints. +Addtionally, users can defined a file that describe custom constraints for API invocations, which passed by `--custom-rules`. The constraints will override the infered ones. +```java +// hopper fuzz output --custom-rules path-to-file +// Grammar: +// func, type : prefix for adding a rule for function or type +// $[0-9]+ : function's i-th argument, or index in array +// [a-zA-Z_]+ : object field +// 0, 128 .. : integer constants +// "xxxx" : string constants +// methods : $len, $range, $null, $non_null, $need_init, $read_file, $write_file, $ret_from, $cast_from, $use, $arr_len, $opaque, $len_factors +// others : pointer(&) , option(?), e.g &.$0.len, `len` field in the pointer's first element +// +// Set one argument in a function to be specific constant +func test_add[$0] = 128 +// One argument must be the length of another one +func test_arr[$1] = $len($0) +// Or one field must be the length of another field +func test_arr[$0][len] = $len([$0][name]) +// One argument must be in a certain range +func test_arr[$1] = $range(0, $len($0)) +// Argument should be non-null +func test_non_null[$0] = $non_null +// Argument should be null +func test_null[$0] = $null +// Argument should be specific string +func test_magic[$0] = "magic" +// Argument should be a file and the file will be read +func test_path[$0] = $read_file +// Argument should be use the value of specific function's return +func test_use[$0] = $ret_from(test_create) +// Argument should be specific type for void pointer. The type should start with *mut or *cosnt. +func test_void[$0] = $cast_from(*mut u8) +// The array suppose has a minimal array length +func test_void[$0][&] = $arr_len(256) +// The array's length is formed by the factors +func fread[$0][&] = $len_factors(1, $2) +// Or +func gzfread[$0][&] = $len_factors($1, $2) +// Field in argument should be specific constant +func test_field[$0][len] = 128 +// Deeper fields +func test_field[$0][&.elements.$0] = 128 + +// One field `len` in a type must be the length of another field `p` +type ArrayWrap[len] = $len(p) +// One nested union `inner_union` in a type must be set to `member2` +type ComplicatedStruct[inner_union] = $use(member2) +// Type is opaque that used as an opaque pointer +type Partial = $opaque +// A type should be init with specific function +type Partial = $init_with(test_init, 0) + +// ctx: set context for specific function +// Add a context for function +ctx test_use[$0] <- test_init +// Add implicit context +ctx test_use[*] <- test_init +// Add optional context that prefered to use +ctx test_use[$0] <- test_init ? +// Add forbidden context +ctx test_use[$0] <- ! test_init + +// alias: alias types across different function +alias handleA <- useA($0),createA($ret),freeA($0) + +// assert: adding specific assertions for calls +assert test_one == 1 +assert test_non_zero != 0 + +``` + +### Seeds for bytes arguments +If there is a `seeds` direcotry (Set by `HOPPER_SEED_DIR`), Hopper will try to read files inside it and uses them as the seeds for bytes arguments (e.g. char*). Also, you can indicate the seeds for specific argument via its parameter names, e.g make the subdirectory as `@buf` for parameter whose name is `buf`. + +### Logging +Hopper uses Rust's log crate to print log information. The default log level is `INFO`. If you want to print all logging information (`DEBUG` and `TRACE`), you can set the environment `LOG_TYPE` during running Hopper, e.g. `LOG_TYPE=trace ./hopper`. +The detailed logging will be written at `output/fuzzer_r*.log` and `output/harness_r*.log`. + +### Reproduce execution +Hopper can reproduce the execution of programs at output directories. + +- `hopper-harness` can parse and explain the inputs by Hopper's runtime. It wiil print the internal states during execution in detail. +``` +./bin/hopper-harness ./queue/id_000000 +``` + +- `hopper-translate` can translate the input to C source code. The C files can be a witness for reporting issues. +``` +./bin/hopper-translate --input ./queue/id_000000 --header path-to/xx.h --output test.c +# then compile it with specific library +gcc -I/path-to-head -L/path-to-lib -l:libcjson.so test.c -o test +``` + +- `hopper-generator` is able to replay input generation except execution. You can use it to analyse how the input was generated or mutated. +``` +./bin/hopper-generator ./queue/id_000000 +``` + +- `hopper-sanitizer` can minimize and verify the crashes generated by Hopper. It excludes crashes that violate constraints and de-duplicate crashes according to call stacks. +``` +./bin/hopper-sanitizer +``` + +## Test +### Test rust code +- Run all testcases +``` +RUST_BACKTRACE=1 cargo test -- --nocapture +``` + +### Testsuite (test libraties) +- [How to run and write testuite](./testsuite/README.md) + +## Evaluating results via source-based code coverage +- Compile the libraies' source code with LLVM source-based code sanitizer(https://clang.llvm.org/docs/SourceBasedCodeCoverage.html). You should set the compiling flags, e.g. + +``` +export CFLAGS="${CFLAGS:-} -fprofile-instr-generate -fcoverage-mapping -gline-tables-only -g" +make +``` + +- Compile the libraries with `cov` instrumentation mode. e.g. +``` +hopper compile --instrument cov --header ./cJSON.h --library ./libcjson_cov.so --output output_cov +``` + +- Run the interpreter with all generated seed inputs (SEED_DIR). +``` +# run hopper and use llvm-cov to compute the coverage. +SEED_DIR=./output/queue hopper cov output_cov +``` + +## Contributing guidelines +We have listed some tasks in [Readmap](https://github.com/FuzzAnything/hopper/discussions/2). +If you are interested, please feel free to discuss with us and contribute your code. + +### Coding +- *Zero* `cargo check` warnning +- *Zero* `cargo clippy` warnning +- *Zero* `FAILED` in `cargo test` +- *Try* to write tests for your code + +### Profiling +- [Profiling Rust Applications](https://gist.github.com/KodrAus/97c92c07a90b1fdd6853654357fd557a) +- [Inferno](https://github.com/jonhoo/inferno) + +```bash +perf record --call-graph=dwarf ./bin/hopper-fuzzer +# use flamegraph directly +perf script | stackcollapse-perf.pl | rust-unmangle | flamegraph.pl > flame.svg +# use inferno +perf script | inferno-collapse-perf | inferno-flamegraph > flamegraph.svg +``` + +perf will produce huge intermediate data for analysis, so *do not* run fuzzer more than 2 minutes. diff --git a/README_Windows.md b/README_Windows.md new file mode 100644 index 0000000..07c1426 --- /dev/null +++ b/README_Windows.md @@ -0,0 +1,41 @@ +# Use Hopper in Windows +ATTN: The Windows feature is no longer being maintained. +ATTN: Hopper was tested in Windows10 **19044.1645**, and **fork() will fail after 19044.1646**. + +Since e9patch can only works on Linux environment, Hopper should need both linux and Windows environment. +Hopper uses e9patch to instrument libraries in Linux, and then copy the patched library to Windows environment. + +## On Linux side +- Build hopper +``` +./build.sh +``` + +- Compile libraries +``` +./hopper --header ./cJSON.h --library ./libcjson.dll +``` + +## On windows side +- Build hopper (toolchain: stable-x86_64-pc-windows-gnu) +``` +cargo build --release +``` + +- Compile libraries +```sh +# ./libcjson.dll is copied from linux side +/path-to-release/hopper-compiler.exe --header ./cJSON.h --library ./libcjson.dll --output output +``` + +## Fuzz library with Hopper +```sh +./path-to-output/bin/hopper-fuzzer.exe +``` + +### Envionment variables +- `HOPPER_TASK`: task name. default: `libname_fuzz`. +- `HOPPER_E9_BLACK_LIST`: functions should not be patched. e.g `export HOPPER_E9_BLACK_LIST=xx` +- `HOPPER_USE_THREAD`: `0` use `fork_loop`, `1` use `thread_loop`. +- `HOPPER_USE_THREAD_NUM`: Child process will exit after executing `HOPPER_USE_THREAD_NUM` threads. The higher the number, the faster the speed and the worse the stability. default: `100`. + diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..6307984 --- /dev/null +++ b/build.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +realpath() { + [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}" +} +BIN_PATH=$(realpath "$0") +ROOT_DIR=$(dirname $BIN_PATH) +INSTALL_DIR=$ROOT_DIR/install +PATCHELF_VERSION=0.14.5 + +source ${ROOT_DIR}/tools/style.sh + +mkdir -p $INSTALL_DIR +if [[ "$OSTYPE" == "linux-gnu"* ]]; then + info "start install e9path and hopper's e9 plugins ..." + cd hopper-instrument/e9-mode + PREFIX=$INSTALL_DIR ./build.sh + cd ../../ + + if [ ! -x $INSTALL_DIR/patchelf ]; then + info "download patchelf ..." + cd install + mkdir -p tmp + cd tmp + wget https://github.com/NixOS/patchelf/releases/download/${PATCHELF_VERSION}/patchelf-${PATCHELF_VERSION}-x86_64.tar.gz + tar -xvf patchelf-${PATCHELF_VERSION}-x86_64.tar.gz + cp bin/patchelf ../. + cd ../../ + fi +fi + +# info "start install hopper's llvm plugins ..." +# cd hopper-instrument/llvm-mode +# make PREFIX=$INSTALL_DIR + +BUILD_TYPE=${BUILD_TYPE:-debug} +# BUILD_TYPE=${BUILD_TYPE:-release} + +info "start build and install hopper fuzzer ..." +if [[ "$BUILD_TYPE" == "debug" ]]; then + cargo build +else + cargo build --release +fi + +ln -sf $ROOT_DIR/target/$BUILD_TYPE/hopper-compiler $INSTALL_DIR/ + +info "build and install hopper done!" diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000..85b8c83 --- /dev/null +++ b/clippy.toml @@ -0,0 +1 @@ +too-many-arguments-threshold=10 \ No newline at end of file diff --git a/examples/.gitignore b/examples/.gitignore new file mode 100644 index 0000000..4655ee4 --- /dev/null +++ b/examples/.gitignore @@ -0,0 +1,3 @@ +output/ +*.h +*.so diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..03f6ab4 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,18 @@ +# Examples for Hopper + +We have set configurations in `hopper.config` files for the examples. You can install the libraries and change the path for the header and shared libraries in your environment. + +If `hopper` detects the configuration file in current directory, it loads the setting. Therefore, you can run `hopper` directly by following commands. + +``` sh +# compile fuzzer for library +hopper compile +# run fuzzer +hopper fuzz +# translate specific file +hopper translate +# clean files generated by fuzzing to restart from scratch +hopper clean +# sanitize crashes +hopper sanitize +``` \ No newline at end of file diff --git a/examples/cjson/hopper.config b/examples/cjson/hopper.config new file mode 100644 index 0000000..beb8c7c --- /dev/null +++ b/examples/cjson/hopper.config @@ -0,0 +1,13 @@ +# Configurations for hopper fuzzer + +# Full path for header file +TEST_HEADER=cJSON.h + +# Full path to shared library +TEST_LIBRARY=libcjson.so + +# Output directory +OUT_DIR=output + +# API functions that you are interesting +FUNC_PATTERN=cJSON_* diff --git a/examples/lcms2/custom.rule b/examples/lcms2/custom.rule new file mode 100644 index 0000000..7a20b48 --- /dev/null +++ b/examples/lcms2/custom.rule @@ -0,0 +1,14 @@ +// func_include cms* +// func_exclude cmsCreateBCHSWabstractProfile,cmsCreateBCHSWabstractProfileTHR +// func_exclude cmsSetLogErrorHandler,cmsSetLogErrorHandlerTHR +// func_exclude cmsPlugin,cmsPluginTHR,cmsUnregisterPlugins,cmsUnregisterPluginsTHR +func_include cmsOpenProfileFromMem,cmsCreate_sRGBProfile,cmsCreateTransform,cmsDoTransform + +// type _cmsContext_struct* = $null +// func cmsCreateContext[$0] = $null +// func cmsCreateContext[$1] = $null + +// alias hModel <- cmsCIECAM02Init[$ret],cmsCIECAM02Done[$0],cmsCIECAM02Forward[$0],cmsCIECAM02Reverse[$0] +// alias hDict <- cmsDictAlloc[$ret],cmsDictFree[$0],cmsDictDup[$0],cmsDictAddEntry[$0],cmsDictGetEntryList[$0] +// alias ITHandle <- cmsIT8LoadFromMem[$ret],cmsIT8Free[$0] +// alias hGBD <- cmsGBDAlloc[$ret],cmsGBDFree[$0],cmsGDBAddPoint[$0],cmsGDBCompute[$0],cmsGDBCheckPoint[$0] diff --git a/examples/lcms2/hopper.config b/examples/lcms2/hopper.config new file mode 100644 index 0000000..fcc22a4 --- /dev/null +++ b/examples/lcms2/hopper.config @@ -0,0 +1,25 @@ +# Configurations for hopper fuzzer + +LIB_DIR=/data/workspace/fuzzing_bench/lcms/build/hopper_build + +# Full path for header file +TEST_HEADER=${LIB_DIR}/include/lcms2.h + +# Full path to shared library +TEST_LIBRARY=${LIB_DIR}/src/.libs/liblcms2.so + +# Output directory +OUT_DIR=output + +# Custom rule for invoking API functions +CUSTOM_RULES=custom.rule + +# set map size for branch counting +HOPPER_MAP_SIZE_POW2=18 +# disable API-sensitive +HOPPER_API_INSENSITIVE_COV=1 +# disable fast loop for execution +HOPPER_FAST_EXECUTE_LOOP=1 + +# set seeds for hopper +# HOPPER_SEED_DIR=seeds \ No newline at end of file diff --git a/examples/libpng/custom.rule b/examples/libpng/custom.rule new file mode 100644 index 0000000..962c046 --- /dev/null +++ b/examples/libpng/custom.rule @@ -0,0 +1,17 @@ +func_include png_* +# func_include png_create_read_struct,png_create_write_struct,png_create_info_struct,png_init_io,png_set_sig_bytes,png_set_crc_action +# func_target png_set_unknown_chunks + +# DEPRECATED +func_exclude png_info_init_3,png_convert_to_rfc1123,png_malloc_default,png_free_default,png_get_io_chunk_name,png_reset_zstream +# lead to errors +func_exclude png_get_io_state,png_set_read_fn,png_set_write_fn,png_set_rows,png_set_user_transform_info,png_read_image,png_set_read_user_transform_fn,png_benign_error,png_set_error_fn,png_free_data + +# func png_create_read_struct[$0] = "1.6.37"; +# func png_create_read_struct[$1] = $null; +# func png_create_read_struct[$2] = $null; +# func png_create_read_struct[$3] = $null; + +func png_image_write_to_file[$4] = 0 +func png_image_write_to_stdio[$4] = 0 +func png_image_write_to_memory[$5] = 0 \ No newline at end of file diff --git a/examples/libpng/hopper.config b/examples/libpng/hopper.config new file mode 100644 index 0000000..24693f8 --- /dev/null +++ b/examples/libpng/hopper.config @@ -0,0 +1,21 @@ +# Configurations for hopper fuzzer + +LIB_DIR=/data/workspace/fuzzing_bench/libpng/build/hopper_build + +# Full path for header file +TEST_HEADER=${LIB_DIR}/include/png.h + +# Full path to shared library +TEST_LIBRARY=${LIB_DIR}/.libs/libpng16.so + +# Output directory +OUT_DIR=output + +# Custom rule for invoking API functions +CUSTOM_RULES=custom.rule + +# set map size for branch counting +HOPPER_MAP_SIZE_POW2=18 + +# set seeds for hopper +# HOPPER_SEED_DIR=seeds \ No newline at end of file diff --git a/examples/pcre2/custom.rule b/examples/pcre2/custom.rule new file mode 100644 index 0000000..4b1fa33 --- /dev/null +++ b/examples/pcre2/custom.rule @@ -0,0 +1 @@ +// func_include * diff --git a/examples/pcre2/hopper.config b/examples/pcre2/hopper.config new file mode 100644 index 0000000..4138c56 --- /dev/null +++ b/examples/pcre2/hopper.config @@ -0,0 +1,21 @@ +# Configurations for hopper fuzzer + +# Full path for header file +#$(echo '#include "pcre2posix.h"' | cpp -H -o /dev/null 2>&1 | head -n1 | cut -d ' ' -f 2) +TEST_HEADER= /usr/include/pcre2posix.h + +# Full path to shared library +#$(ldconfig -p | grep -Po 'libpcre2-posix.so.*>\s*\K.+' | head -n 1) +TEST_LIBRARY=/lib64/libpcre2-posix.so /usr/lib64/libpcre2-8.so + +# Output directory +OUT_DIR=output + +# Custom rule for invoking API functions +CUSTOM_RULES=custom.rule + +# set map size for branch counting +HOPPER_MAP_SIZE_POW2=18 + +# set seeds for hopper +# HOPPER_SEED_DIR=seeds diff --git a/examples/sqlite3/custom.rule b/examples/sqlite3/custom.rule new file mode 100644 index 0000000..94302d3 --- /dev/null +++ b/examples/sqlite3/custom.rule @@ -0,0 +1,22 @@ +func_include sqlite3_* +func_exclude sqlite3_sleep,sqlite3_mprintf,sqlite3_vmprintf,sqlite3_snprintf,sqlite3_vsnprintf +// func_key sqlite3_exec,sqlite3_get_table,sqlite3_step,sqlite3_complete +// sqlite3_prepare, sqlite3_prepare_v2, sqlite3_step +// our file name is not uft16 +func_exclude sqlite3_open16,sqlite3_realloc,sqlite3_realloc64 +func_exclude sqlite3_vtab_nochange,sqlite3_vtab_rhs_value,sqlite3_free_filename +func_exclude sqlite3_overload_function,sqlite3_test_control,sqlite3_drop_modules +func_exclude sqlite3_mutex_try,sqlite3_mutex_enter,sqlite3_mutex_free,sqlite3_mutex_leave,sqlite3_mutex_alloc +func_exclude sqlite3_filename_wal,sqlite3_value_free,sqlite3_free_table +func_exclude sqlite3_result_* +func_exclude sqlite3_context_db_handle,sqlite3_aggregate_context,sqlite3_user_data,sqlite3_aggregate_count + +// func sqlite3_open[$0] = $write_file +// func sqlite3_open_v2[$0] = $write_file +// func sqlite3_open16[$0] = $write_file +// func sqlite3_open[$1][&] = $non_null +// func sqlite3_open_v2[$1] = $non_null +// func sqlite3_open16[$1] = $non_null + +// type sqlite3* = $init_with(sqlite3_open, 1) +// type sqlite3_stmt* = $init_with(sqlite3_prepare_v2, 3) \ No newline at end of file diff --git a/examples/sqlite3/hopper.config b/examples/sqlite3/hopper.config new file mode 100644 index 0000000..eb6a372 --- /dev/null +++ b/examples/sqlite3/hopper.config @@ -0,0 +1,21 @@ +# Configurations for hopper fuzzer + +LIB_DIR=/data/workspace/fuzzing_bench/sqlite3/build/hopper_build + +# Full path for header file +TEST_HEADER=${LIB_DIR}/sqlite3.h + +# Full path to shared library +TEST_LIBRARY=${LIB_DIR}/.libs/libsqlite3.so + +# Output directory +OUT_DIR=output + +# Custom rule for invoking API functions +CUSTOM_RULES=custom.rule + +# set map size for branch counting +HOPPER_MAP_SIZE_POW2=20 + +# set seeds for hopper +# HOPPER_SEED_DIR=seeds \ No newline at end of file diff --git a/examples/zlib/custom.rule b/examples/zlib/custom.rule new file mode 100644 index 0000000..dff7ec2 --- /dev/null +++ b/examples/zlib/custom.rule @@ -0,0 +1,4 @@ +// func_include * +func_exclude gzprintf +// func_include gzfwrite,gzopen +// func_include gzopen,gzread,gzdopen,gzbuffer,gzclose \ No newline at end of file diff --git a/examples/zlib/hopper.config b/examples/zlib/hopper.config new file mode 100644 index 0000000..e199062 --- /dev/null +++ b/examples/zlib/hopper.config @@ -0,0 +1,19 @@ +# Configurations for hopper fuzzer + +# Full path for header file +TEST_HEADER=$(echo '#include "zlib.h"' | cpp -H -o /dev/null 2>&1 | head -n1 | cut -d ' ' -f 2) + +# Full path to shared library +TEST_LIBRARY=$(ldconfig -p | grep -Po 'libz.so.*=>\s*\K.+' | head -n 1) + +# Output directory +OUT_DIR=output + +# Custom rule for invoking API functions +CUSTOM_RULES=custom.rule + +# set map size for branch counting +HOPPER_MAP_SIZE_POW2=18 + +# set seeds for hopper +# HOPPER_SEED_DIR=seeds \ No newline at end of file diff --git a/hopper b/hopper new file mode 100755 index 0000000..ee040a4 --- /dev/null +++ b/hopper @@ -0,0 +1,194 @@ +#!/bin/bash + +# set -exuo pipefail + +realpath() { + [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}" +} +BIN_PATH=$(realpath "$0") +ROOT_DIR=$(dirname $BIN_PATH) +LOG_TYPE=${LOG_TYPE:-info} +INSTALL_DIR=$ROOT_DIR/install +HOPPER_HARNESS_ROOT="$ROOT_DIR/hopper-harness" +ENVS="RUST_LOG=${LOG_TYPE} RUST_BACKTRACE=1 HOPPER_PATH=$INSTALL_DIR" +COMPILER="$INSTALL_DIR/hopper-compiler" +USAGE="Usage: $(basename $0) [compile|fuzz|translate|clean|help] ... + compile --header [header] --library [library] --output [output] ... + use --help to see usage in detail. + fuzz [output] --func-pattern [pattern] ... + use --help to see usage in detail. + translate [output] --header [header] --input [dsl_input] + use --help to see usage in detail. + clean + clean generated data during fuzzing + You can also define the configurations in a file named hopper.config in current directory, which includes: + TEST_HEADER=xx + TEST_LIBRARY=xx + OUT_DIR=xx + CUSTOM_RULES=xx + ..." + +source ${ROOT_DIR}/tools/style.sh +source ${ROOT_DIR}/tools/core_affinity.sh + +# load config if exists +if [[ -e "hopper.config" ]]; then + while IFS='=' read -r key value; do + if [[ $key && $value ]]; then + declare -x "$key=$value" + # echo "config file set $key = $value" + fi + done < "hopper.config" +fi + +CORE_PATTERN=$(cat /proc/sys/kernel/core_pattern) +if [ "$CORE_PATTERN" != "core" ]; then + warn "To avoid having crashes misinterpreted as timeouts, please log in as root" + warn "and temporarily modify /proc/sys/kernel/core_pattern, like so:" + warn "echo core > /proc/sys/kernel/core_pattern" + exit 1 +fi + +CMD=${1:-help} +ARGS="" +if [ ! -z ${OUT_DIR+x} ]; then + OUTPUT_DIR="$OUT_DIR" +fi +# if [ -z ${OUTPUT_DIR+x} ]; then +if [[ ${2:-} = \-* ]] ; then + ARGS="$ARGS ${@:2}" +else + ARGS="$ARGS ${@:3}" + OUTPUT_DIR="${2:-output}" + # info "set output dir as $OUTPUT_DIR" +fi + +check_output() { + if [ -z ${OUTPUT_DIR:-} ]; then + error "do not set output directory!!" + warn "$USAGE" + exit 1 + fi +} + +[ ! -d "${ROOT_DIR}/install" ] && warn "Please run ./build.sh to build hopper's code" + +find_filter() { + FILTER=$1 + for key in $ARGS; do + # echo "$key $FILTER" + if [[ "$key" == $FILTER ]]; then + # echo "key '$key' exists" + return 1 + fi + done + return 0 +} + +set_arg() { + # info "arg set $1 = $2" + if [ ! -z "$2" ]; then + find_filter $1 + if [[ $? -eq 0 ]]; then + ARGS="$ARGS $1 $2" + fi + fi +} + +case ${CMD} in +build) + cd $ROOT_DIR + ./build.sh + ;; +compile) + set_arg --header "${TEST_HEADER:-}" + set_arg --library "${TEST_LIBRARY:-}" + set_arg --output ${OUTPUT_DIR:-} + cmd="$ENVS $COMPILER $ARGS" + info "${cmd}" + eval ${cmd} + ;; +fuzz) + check_output + set_arg --timeout-limit ${TIMEOUT_LIMIT:-} + set_arg --mem-limit ${MEM_LIMIT:-} + set_arg --func-pattern ${FUNC_PATTERN:-} + set_arg --custom-rules ${CUSTOM_RULES:-} + find_core_for_task_set + FUZZER="$OUTPUT_DIR/bin/hopper-fuzzer" + cmd="$ENVS $TASK_SET_CMD $FUZZER ${ARGS}" + info "${cmd}" + eval ${cmd} + ;; +translate) + check_output + set_arg --header ${TEST_HEADER:-} + cmd="$OUTPUT_DIR/bin/hopper-translate ${ARGS}" + info "${cmd}" + eval ${cmd} + ;; +sanitize) + check_output + cmd="$OUTPUT_DIR/bin/hopper-sanitizer ${ARGS}" + info "${cmd}" + eval ${cmd} + ;; +cov) + check_output + if [ -z ${SEED_DIR:-} ]; then + error "you should set SEED_DIR that stores the seed inputs!!" + exit 1 + fi + info "worksapce: $OUTPUT_DIR, seed: $SEED_DIR" + PROFILE_DIR=$OUTPUT_DIR/profile + COV_DIR=$OUTPUT_DIR/cov + rm -rf $PROFILE_DIR + export RUST_LOG=error + export LD_LIBRARY_PATH=$OUTPUT_DIR + export HOPPER_TIMEOUT_LIMIT=5 + for FILE in ${SEED_DIR}/id_*; do + printf "iterate file $FILE: " + FILE_BASE=$(basename "$FILE") + { + LLVM_PROFILE_FILE="${PROFILE_DIR}/${FILE_BASE}.%m.profraw" $OUTPUT_DIR/bin/hopper-harness $FILE --execute > /dev/null 2>&1 + info "success" + } || { warn 'program execute failed'; } + set -e + done + # find .so file in output directory. + BIN_FILE=$(find $OUTPUT_DIR -maxdepth 1 -type f -name \*.so) + BIN_FILE=$(IFS= ; echo "${BIN_FILE[*]}") + info "bin file: $BIN_FILE" + rm -rf $COV_DIR + mkdir -p $COV_DIR + DATA_FILE=$COV_DIR/all.profdata + find ${PROFILE_DIR} -type f > ${COV_DIR}/prof.list + llvm-profdata merge -sparse -f ${COV_DIR}/prof.list -o ${DATA_FILE} + COV_EXCLUDE='(fuzzing|fuzz|test|OT|cre2|oss|examples)/' + llvm-cov show -ignore-filename-regex=$COV_EXCLUDE --format=html $BIN_FILE -instr-profile=${DATA_FILE} >${COV_DIR}/coverage.html + # llvm-cov report -show-functions -ignore-filename-regex=$COV_EXCLUDE -instr-profile=${DATA_FILE} $BIN_FILE $SRC_DIR | tee ${COV_DIR}/coverage_funcs.report + llvm-cov report $BIN_FILE -instr-profile=${DATA_FILE} -ignore-filename-regex=$COV_EXCLUDE | tee ${COV_DIR}/coverage.report + ;; +clean) + check_output + [ ! -d "${OUTPUT_DIR}" ] && error "directory '${OUTPUT_DIR}' is not exist" && exit 1 + cd $OUTPUT_DIR + rm -rf crashes + rm -rf minimized_crashes + rm -rf hangs + rm -rf queue + rm -rf misc + rm -rf release + rm -rf working + find . -maxdepth 1 -type f ! -executable ! -name "test*" ! -name "*.log" ! -name "func_list" ! -name "custom_rule" ! -name "hopper.config" -delete + info "clean files in '$OUTPUT_DIR' directory" + ;; +help) + warn "$USAGE" + exit 0 + ;; +*) + warn "$USAGE" + exit 1 + ;; +esac diff --git a/hopper-compiler/Cargo.toml b/hopper-compiler/Cargo.toml new file mode 100644 index 0000000..8d3dda7 --- /dev/null +++ b/hopper-compiler/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "hopper-compiler" +version = "1.0.0" +edition = "2021" +authors = ["Peng Chen "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rand = "0.8" +log = "0.4" +simplelog = "0.12" +goblin = { version = "0.6", optional = true } +clap = { version = "4.2", features = ["derive"] } +memmap = "0.7" +gimli = { version = "0.26", default-features = false, features = ["read"] } +object = { version = "0.28", default-features = false, features = ["read"] } +eyre = "0.6" +twoway = "0.2" + +[features] +default = ["elf", "dwarf"] +elf = ["goblin"] +dwarf = [] \ No newline at end of file diff --git a/hopper-compiler/src/binary_info.rs b/hopper-compiler/src/binary_info.rs new file mode 100644 index 0000000..872a853 --- /dev/null +++ b/hopper-compiler/src/binary_info.rs @@ -0,0 +1,213 @@ +use goblin::Object; +use std::collections::HashSet; +use std::fs; +use std::path::Path; + +#[derive(Debug)] +pub struct BinaryInfo { + // library type: elf or pe + pub lib_type: &'static str, + // all names in the symbol table + pub str_list: Vec, + // exported API in the symbol table + pub func_list: Vec, +} + +#[derive(Debug)] +pub struct FuncInfo { + pub name: String, + pub addr: u64, + pub size: u64, +} + +impl BinaryInfo { + pub fn parse(path: &Path) -> eyre::Result { + let file = fs::File::open(path)?; + let f = unsafe { memmap::MmapOptions::new().map(&file)? }; + let buf = f.as_ref(); + let result = Object::parse(buf).expect("fail to parse object"); + let lib_type; + let mut str_list = vec![]; + let mut func_list = vec![]; + match result { + Object::Elf(elf) => { + lib_type = "elf"; + for sym in &elf.dynsyms { + let name = elf.dynstrtab.get_at(sym.st_name).unwrap(); + if sym.st_shndx == 0 && !name.is_empty() { + str_list.push(name.to_string()); + } + if sym.is_function() + && sym.st_bind() == 1 + && sym.st_value > 0 + && sym.st_size > 0 + { + func_list.push(FuncInfo { + name: name.to_string(), + addr: sym.st_value, + size: sym.st_size, + }) + } + } + for sym in &elf.syms { + let name = elf.strtab.get_at(sym.st_name).unwrap(); + if sym.st_shndx == 0 && !name.is_empty() { + str_list.push(name.to_string()); + } + // Global: st_bind == 1 + if sym.is_function() + && sym.st_bind() == 1 + && sym.st_value > 0 + && sym.st_size > 0 + { + func_list.push(FuncInfo { + name: name.to_string(), + addr: sym.st_value, + size: sym.st_size, + }) + } + } + } + Object::PE(pe) => { + lib_type = "pe"; + let strtab = pe.header.coff_header.strings(buf)?; + str_list = strtab.to_vec()?.iter().map(|s| s.to_string()).collect(); + + let symbol_table = pe.header.coff_header.symbols(buf)?; + let symbol_size = pe.header.coff_header.number_of_symbol_table as usize; + let virtual_address = pe.sections[0].virtual_address as u64; + let image_base = pe.image_base as u64; + let mut index: usize = 0; + while index != symbol_size { + if let Some(mut sym) = symbol_table.get(index) { + if sym.1.typ == 0x20 { + // function + let offset = sym.1.value as u64; + let addr: u64 = image_base + virtual_address + offset; + let name = if let Some(name) = &sym.0 { + name + } else { + // strtab miss 4 bytes. + // https://github.com/m4b/goblin/issues/171 + sym.1.set_name_offset(sym.1.name_offset().unwrap() - 4); + sym.1.name(&strtab).unwrap() + }; + func_list.push(FuncInfo { + name: name.to_string(), + addr, + size: 0, + }); + } + } + index += 1; + } + } + _ => eyre::bail!(format!("unimplemented!: {result:?}")), + } + // sort func list by address + func_list.sort_by(|a, b| a.addr.cmp(&b.addr)); + func_list.dedup_by_key(|f| f.addr); + str_list.dedup(); + Ok(Self { + lib_type, + str_list, + func_list, + }) + } + + /// Check if string tables contain specific strings + pub fn contain_func(&self, func: &str) -> bool { + self.str_list.iter().any(|name| { + if name == func || name.starts_with(&format!("{func}@")) { + return true; + } + #[cfg(target_os = "windows")] + if name.starts_with(&format!("__impl_{func}")) { + return true; + } + false + }) + } + + /// Get function's address range in the binary + pub fn get_function_addr_range(&self, name: &str) -> Option<(u64, u64)> { + if let Some(pos) = self.func_list.iter().position(|f| f.name == name) { + let f = &self.func_list[pos]; + let cur_addr = f.addr; + if pos == self.func_list.len() - 1 || f.size > 0 { + // register_frame_ctor should + 0x10 in the end + return Some((cur_addr, cur_addr + f.size)); + } else { + return Some((cur_addr, self.func_list[pos + 1].addr)); + } + } + None + } + + /// Function's range that should not be patched in the binary + /// used for e9patch + pub fn list_exclude_patch_range(&self) -> Vec<(u64, u64)> { + let mut blacklist = HashSet::new(); + if self.lib_type == "pe" { + blacklist.extend(PE_DEFAULT_BLACK_LIST); + } + let optional = std::env::var("HOPPER_E9_BLACK_LIST"); + if let Ok(list) = &optional { + for f in list.split(',') { + blacklist.insert(f); + } + } + let mut ranges = vec![]; + for name in blacklist { + if let Some(range) = self.get_function_addr_range(name) { + ranges.push(range); + } + } + ranges + } +} + +/// Save function list to disk +pub fn save_func_list(func_list: &[FuncInfo], output: &Path) -> eyre::Result<()> { + use std::io::Write; + let file = output.join("func_list"); + log::info!( + "save function list in the binary into {}", + file.to_string_lossy() + ); + let mut f = std::fs::File::create(file)?; + for func in func_list { + writeln!(f, "{}", func.name)?; + } + Ok(()) +} + +const PE_DEFAULT_BLACK_LIST: &[&str] = &[ + "_CRT_INIT", + "__DllMainCRTStartup", + "DllMainCRTStartup", + "__dyn_tls_dtor", + "__dyn_tls_init", + "_execute_onexit_table", + "_pei386_runtime_relocator", + "__mingw_TLScallback", + "__mingw_GetSectionCount", + "_ValidateImageBase.part.0", + "_register_onexit_function", + "ValidateImageBase", + "DllEntryPoint", + "DllMain", + "__do_global_dtors", + "__gcc_deregister_frame", + "__mingwthr_run_key_dtors.part.0", + "__security_init_cookie", +]; + +#[test] +fn test_parse_zlib() { + let zlib_path = std::path::Path::new("/usr/lib64/libz.so"); + if zlib_path.exists() { + let ret = BinaryInfo::parse(zlib_path).unwrap(); + println!("ret: {ret:?}"); + } +} diff --git a/hopper-compiler/src/cargo.rs b/hopper-compiler/src/cargo.rs new file mode 100644 index 0000000..2abd55a --- /dev/null +++ b/hopper-compiler/src/cargo.rs @@ -0,0 +1,149 @@ +use std::{ + collections::HashMap, + env, + path::{Path, PathBuf}, + process::Command, +}; + +use eyre::{bail, ensure, Context, ContextCompat, Result}; + +use crate::{config::Config, binary_info::FuncInfo}; + +fn cargo_path() -> PathBuf { + if let Ok(path) = env::var("CARGO") { + return path.into(); + } + "cargo".into() +} + +fn hopper_harness_path() -> Result { + if let Ok(path) = env::var("HOPPER_HARNESS_ROOT") { + return Ok(path.into()); + } + let crate_path: Option<&'static str> = option_env!("CARGO_MANIFEST_DIR"); + if let Some(p) = crate_path { + return Ok(PathBuf::from(p).parent().unwrap().join("hopper-harness")); + } + + bail!("Can't find harness path") +} + +fn task_name(library: &str) -> &str { + #[cfg(target_family = "unix")] + let (_, lib) = library.rsplit_once('/').unwrap(); + #[cfg(target_os = "windows")] + let (_, lib) = library.rsplit_once('\\').unwrap(); + let (lib, _) = lib.split_once('.').unwrap(); + log::info!("task name: {}", lib); + lib +} + +fn convert_canonicalized_path(path: &Path) -> String { + let p = path.display().to_string(); + // https://stackoverflow.com/questions/50322817/how-do-i-remove-the-prefix-from-a-canonical-windows-path + #[cfg(target_os = "windows")] + if let Some(next) = p.strip_prefix(r#"\\?\"#) { + return next.to_string(); + } + p +} + +pub fn cargo_install( + libraries: Vec, + header: &Path, + out: &Path, + config: &Config, + func_list: Vec, +) -> Result<()> { + let cargo = cargo_path(); + let harness_path = hopper_harness_path()?; + let mut envs = HashMap::new(); + let out_dir = convert_canonicalized_path(out); + // envs.insert("INSTRUMENT_TYPE", instrument_type.to_str()); + let library_list: Vec<&str> = libraries.iter().map(|l| l.to_str().unwrap()).collect(); + let library = library_list.join(","); + let header = header.to_str().context("Fail to convert header as str")?; + let func_list: Vec<&str> = func_list.iter().map(|f| f.name.as_str()).collect(); + let func_allow = func_list.join(","); + envs.insert("HOPPER_HEADER", header); + envs.insert("HOPPER_LIBRARY", &library); + envs.insert("HOPPER_TASK", task_name(&library)); + envs.insert("HOPPER_OUT_DIR", &out_dir); + if !func_allow.is_empty() { + envs.insert("HOPPER_FUNC_ALLOW_LIST", &func_allow); + } + let mut quiet_option = "--verbose"; + if config.quiet { + envs.insert("RUSTFLAGS", "-Awarnings"); + quiet_option = "--quiet"; + } + let mut features = format!("{}_mode", config.instrument.as_str()); + if env::var("HOPPER_TESTSUITE").is_ok() { + features.push_str(",testsuite"); + } + let args = [ + "install", + quiet_option, + "--features", + &features, + "--force", + "--path", + &harness_path.to_string_lossy(), + "--target-dir", + &out_dir, + "--root", + &out_dir, + ]; + log::info!( + "cargo cmd: {:?} , args: {:?}, envs: {:?}", + &cargo, + args.join(" "), + envs + ); + let mut child = Command::new(&cargo) + .args(args) + .envs(envs) + .spawn() + .context("Fail to invoke cargo install")?; + + log::info!("start compiling harness by cargo .."); + + let status = child.wait()?; + ensure!(status.success(), "cargo install error"); + + log::info!("compiling harness done"); + + // mac should set link library by `install_name_tool` + #[cfg(target_os = "macos")] + { + let (_, library_file) = library + .rsplit_once("/") + .context("fail to get library file")?; + let harness_bin = out.join("bin/hopper-harness"); + let fuzzer_bin = out.join("bin/hopper-fuzzer"); + fn change_link_lib(lib_origin: &str, lib_new: &str, executable: &str) -> Result<()> { + let output = std::process::Command::new("install_name_tool") + .args(["-change", &lib_origin, &lib_new, executable]) + .output() + .context("Failed to start install_name_tool")?; + ensure!( + output.status.success(), + "install_name_tool failed: {:#?}", + output + ); + Ok(()) + } + change_link_lib( + library_file, + library, + harness_bin.to_str().context("fail to convert as str")?, + )?; + change_link_lib( + library_file, + library, + fuzzer_bin.to_str().context("fail to convert as str")?, + )?; + } + + Ok(()) +} diff --git a/hopper-compiler/src/check.rs b/hopper-compiler/src/check.rs new file mode 100644 index 0000000..8d143c1 --- /dev/null +++ b/hopper-compiler/src/check.rs @@ -0,0 +1,66 @@ +use eyre::{ensure, ContextCompat, Result}; +use std::{fs::File, path::Path}; + +#[cfg(target_os = "linux")] +static DYNAMIC_LIB_SUFFIX: &str = ".so"; +#[cfg(target_os = "macos")] +static DYNAMIC_LIB_SUFFIX: &'static str = ".dylib"; +#[cfg(target_os = "windows")] +static DYNAMIC_LIB_SUFFIX: &str = ".dll"; +static STATIC_LIB_SUFFIX: &str = ".a"; + +static DYNAMIC_LIB_SUFFIX_PE: &str = ".dll"; + +pub fn check_header(header: &Path) -> Result<()> { + ensure!(header.is_file(), "Header is not a file"); + ensure!( + header.extension().context("Can't find header extension")? == "h", + "Header is not end with .h" + ); + Ok(()) +} + +pub fn check_library(library: &Path) -> Result<()> { + ensure!(library.is_file(), "Library is not a file"); + + let file_name = library + .file_name() + .context("Can't find library file name")? + .to_str() + .context("fail to convert to str")?; + ensure!( + file_name.starts_with("lib"), + "Library is not start with lib" + ); + ensure!( + file_name.contains(DYNAMIC_LIB_SUFFIX) + || file_name.ends_with(STATIC_LIB_SUFFIX) + || file_name.ends_with(DYNAMIC_LIB_SUFFIX_PE), + "Library does not contain `.so` or `.a` or `.dylib`" + ); + Ok(()) +} + +pub fn output_lib_name(file: &str) -> String { + if let Some(index) = file.find(DYNAMIC_LIB_SUFFIX) { + let lib = &file[..index]; + return format!("{lib}_fuzz{DYNAMIC_LIB_SUFFIX}"); + } + file.to_string() +} + +pub fn check_llvm_runtime(libraries: &[String]) -> bool { + libraries + .iter() + .any(|l| check_file_contains(l, "HOOPER_LLVM_MARK")) +} + +pub fn check_file_contains(target: &str, s: &str) -> bool { + let file = File::open(target).unwrap_or_else(|_| panic!("Unable to open file: {target}")); + let f = unsafe { + memmap::MmapOptions::new() + .map(&file) + .expect("unable to mmap file") + }; + twoway::find_bytes(&f[..], s.as_bytes()).is_some() +} diff --git a/hopper-compiler/src/config.rs b/hopper-compiler/src/config.rs new file mode 100644 index 0000000..844a4ae --- /dev/null +++ b/hopper-compiler/src/config.rs @@ -0,0 +1,58 @@ +use std::str::FromStr; + +use clap::{ValueEnum, Parser}; + +#[derive(Parser, Debug, Copy, Clone, ValueEnum)] +pub enum InstrumentType { + E9, + Llvm, + Cov, +} + +/// Hopper - fuzz libraries fully automatically +#[derive(Parser, Debug)] +#[clap(name = "hopper-compiler")] +#[clap(version = "1.0.0", author = "Tencent")] +pub struct Config { + /// Path of target dynamic library + #[clap(long, value_parser, num_args(1..))] + pub library: Vec, + + /// Path of header file of library + #[clap(long, value_parser, num_args(1..))] + pub header: Vec, + + /// Output directory of harness + #[clap(long, value_parser, default_value = "./")] + pub output: String, + + /// Intrument type + #[clap(long, value_enum, value_parser, default_value = "e9")] + pub instrument: InstrumentType, + + /// Show detailed compiling information or not + #[clap(long, value_parser)] + pub quiet: bool, +} + +impl FromStr for InstrumentType { + type Err = eyre::Error; + fn from_str(input: &str) -> Result { + match input { + "e9" => Ok(Self::E9), + "llvm" => Ok(Self::Llvm), + "cov" => Ok(Self::Cov), + _ => Err(eyre::eyre!("fail to parse instrument type")), + } + } +} + +impl InstrumentType { + pub fn as_str(&self) -> &str { + match self { + Self::E9 => "e9", + Self::Llvm => "llvm", + Self::Cov => "cov", + } + } +} diff --git a/hopper-compiler/src/dwarf/analyzer.rs b/hopper-compiler/src/dwarf/analyzer.rs new file mode 100644 index 0000000..151eda2 --- /dev/null +++ b/hopper-compiler/src/dwarf/analyzer.rs @@ -0,0 +1,343 @@ +extern crate gimli; +extern crate object; + +use crate::dwarf; + +use gimli::ReaderOffset; +use std::collections::BTreeMap; + +pub struct DwarfAnalyzer +where + R: gimli::Reader, +{ + dwarf: gimli::Dwarf, +} + +impl DwarfAnalyzer { + pub fn new(dwarf: gimli::Dwarf) -> Self { + Self { dwarf } + } + + pub fn parse(&mut self) -> Result { + let mut program_units = vec![]; + + // Iterate over the compilation units. + let mut iter = self.dwarf.units(); + while let Some(header) = iter.next()? { + println!( + "Unit at <.debug_info+0x{:x}>", + header.offset().as_debug_info_offset().unwrap().0.into_u64() + ); + + let mut type_table = BTreeMap::new(); + let mut fn_list = vec![]; + let mut var_list = vec![]; + let mut last_struct = None; + let mut last_array = None; + let mut unit_name = "None".to_string(); + let mut producer = "None".to_string(); + + let unit = self.dwarf.unit(header)?; + // Iterate over the Debugging Information Entries (DIEs) in the unit. + let mut entries = unit.entries(); + // delta_depth: + // 1 -> move to previous' children + // 0 -> move to previous' sibling + // -k -> move to previous' parent (depth = k) + while let Some((delta_depth, entry)) = entries.next_dfs()? { + let offset = entry.offset().0.into_u64(); + println!("<{}><{}> {}", delta_depth, offset, entry.tag()); + + if delta_depth < 0 { + last_struct = None; + last_array = None; + } + match entry.tag() { + gimli::DW_TAG_compile_unit => { + unit_name = self + .dwarf + .attr_string( + &unit, + entry.attr_value(gimli::constants::DW_AT_name)?.unwrap(), + )? + .to_string_lossy()? + .to_string(); + + producer = self + .dwarf + .attr_string( + &unit, + entry.attr_value(gimli::constants::DW_AT_producer)?.unwrap(), + )? + .to_string_lossy()? + .to_string(); + } + gimli::DW_TAG_file_type => { + unimplemented!(); + } + gimli::DW_TAG_base_type => { + // self.print_attrs(&entry); + let name = self.dwarf.attr_string( + &unit, + entry.attr_value(gimli::constants::DW_AT_name)?.unwrap(), + )?; + let byte_size = entry + .attr_value(gimli::constants::DW_AT_byte_size)? + .unwrap() + .udata_value() + .unwrap() as usize; + let ty = dwarf::ArgType::from(name.to_string_lossy()?.as_ref(), byte_size); + type_table.insert(offset, ty); + } + gimli::DW_TAG_typedef => { + let name = self.dwarf.attr_string( + &unit, + entry.attr_value(gimli::constants::DW_AT_name)?.unwrap(), + )?; + // If it doesn't contain type attribute, it is a declaration instead of definition + let alias_type = + self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Undefined); + let ty = + dwarf::ArgType::alias(name.to_string_lossy()?.as_ref(), alias_type); + type_table.insert(offset, ty); + } + gimli::DW_TAG_pointer_type => { + let dst_type = self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Undefined); + let byte_size = entry + .attr_value(gimli::constants::DW_AT_byte_size)? + .unwrap() + .udata_value() + .unwrap() as usize; + let ty = dwarf::ArgType::pointer(dst_type, byte_size); + type_table.insert(offset, ty); + } + gimli::DW_TAG_const_type => { + let inner_type = + self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Undefined); + let ty = dwarf::ArgType::constt(inner_type); + type_table.insert(offset, ty); + } + gimli::DW_TAG_structure_type => { + let name = self.dwarf.attr_string( + &unit, + entry.attr_value(gimli::constants::DW_AT_name)?.unwrap(), + )?; + if entry + .attr_value(gimli::constants::DW_AT_declaration)? + .is_some() + { + let ty = dwarf::ArgType::Decla(dwarf::DeclaType { + name: name.to_string_lossy()?.to_string(), + }); + type_table.insert(offset, ty); + } else { + let byte_size = entry + .attr_value(gimli::constants::DW_AT_byte_size)? + .unwrap() + .udata_value() + .unwrap() as usize; + let ty = dwarf::ArgType::structt( + name.to_string_lossy()?.as_ref(), + byte_size, + ); + type_table.insert(offset, ty); + last_struct = Some(offset); + } + // self.print_attrs(&entry)?; + } + gimli::DW_TAG_member => { + let name = self.dwarf.attr_string( + &unit, + entry.attr_value(gimli::constants::DW_AT_name)?.unwrap(), + )?; + let ty = self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Void); + let location = entry + .attr_value(gimli::constants::DW_AT_data_member_location)? + .unwrap() + .udata_value() + .unwrap() as usize; + let member = dwarf::StructField { + name: name.to_string_lossy()?.to_string(), + ty: Box::new(ty), + location, + }; + if let Some(index) = last_struct { + if let Some(dwarf::ArgType::Struct(structt)) = + type_table.get_mut(&index) + { + structt.fields.push(member); + } + } + // self.print_attrs(&entry); + } + gimli::DW_TAG_array_type => { + let ele_type = self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Undefined); + let ty = dwarf::ArgType::array(ele_type); + type_table.insert(offset, ty); + last_array = Some(offset); + } + gimli::DW_TAG_subrange_type => { + if let Some(index) = last_array { + if let Some(t) = type_table.get_mut(&index) { + if let Some(attr) = + entry.attr_value(gimli::constants::DW_AT_upper_bound)? + { + if let Some(val) = attr.udata_value() { + if let dwarf::ArgType::Array(arr) = t { + arr.sub_range.push(val as usize + 1); + } + } else { + unimplemented!(); + } + }; + } + } else { + unimplemented!(); + } + } + gimli::DW_TAG_variable => { + var_list.push(self.parse_variable(entry, &unit)?); + } + gimli::DW_TAG_subprogram + | gimli::DW_TAG_entry_point + | gimli::DW_TAG_inlined_subroutine => { + fn_list.push(self.parse_function(entry, &unit)?); + } + gimli::DW_TAG_formal_parameter => { + let arg_type = self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Void); + if let Some(f) = fn_list.last_mut() { + f.arg_types.push(arg_type); + } + } + _ => { + self.print_attrs(entry)?; + } + } + } + + let program_unit = dwarf::ProgramUnit { + name: unit_name, + producer, + type_table, + fn_list, + var_list, + }; + program_units.push(program_unit); + } + + Ok(dwarf::Program { + units: program_units, + }) + } + + fn parse_function( + &self, + entry: &gimli::read::DebuggingInformationEntry, + unit: &gimli::Unit, + ) -> Result { + let name = self.dwarf.attr_string( + unit, + entry.attr_value(gimli::constants::DW_AT_name)?.unwrap(), + )?; + let ret_type = self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Void); + let external = match entry.attr_value(gimli::constants::DW_AT_external)? { + Some(gimli::AttributeValue::Flag(flag)) => flag, + _ => false, + }; + + let func = dwarf::Function { + name: name.to_string_lossy()?.to_string(), + ret_type, + arg_types: vec![], + external, + position: self.parse_position(entry)?, + }; + + Ok(func) + } + + fn parse_variable( + &self, + entry: &gimli::read::DebuggingInformationEntry, + unit: &gimli::Unit, + ) -> Result { + let name = self.dwarf.attr_string( + unit, + entry.attr_value(gimli::constants::DW_AT_name)?.unwrap(), + )?; + let ty = self.parse_type(entry)?.unwrap_or(dwarf::ArgType::Undefined); + let external = match entry.attr_value(gimli::constants::DW_AT_external)? { + Some(gimli::AttributeValue::Flag(flag)) => flag, + _ => false, + }; + + let v = dwarf::Variable { + name: name.to_string_lossy()?.to_string(), + ty, + external, + position: self.parse_position(entry)?, + }; + + Ok(v) + } + + fn parse_position( + &self, + entry: &gimli::read::DebuggingInformationEntry, + ) -> Result { + let file_index = match entry + .attr_value(gimli::constants::DW_AT_decl_file)? + .unwrap() + { + gimli::AttributeValue::FileIndex(index) => index as usize, + _ => 0, + }; + let line = entry + .attr_value(gimli::constants::DW_AT_decl_line)? + .unwrap() + .udata_value() + .unwrap() as usize; + let column = entry + .attr_value(gimli::constants::DW_AT_decl_column)? + .unwrap() + .udata_value() + .unwrap() as usize; + Ok(dwarf::Position { + file: file_index, + line, + column, + }) + } + + fn parse_type( + &self, + entry: &gimli::read::DebuggingInformationEntry, + ) -> Result, gimli::Error> { + let type_offset = entry.attr_value(gimli::constants::DW_AT_type)?; + if type_offset.is_none() { + return Ok(None); + } + match type_offset.unwrap() { + gimli::AttributeValue::UnitRef(offset) => { + Ok(Some(dwarf::ArgType::ref_as(offset.0.into_u64()))) + } + _ => { + unimplemented!(); + // Ok(Some(ir::ArgType::Undefined)) + } + } + } + + fn print_attrs( + &self, + entry: &gimli::read::DebuggingInformationEntry, + ) -> Result<(), gimli::Error> { + let mut attrs = entry.attrs(); + + while let Some(attr) = attrs.next()? { + println!(" {}: {:?}", attr.name(), attr.value(),); + } + + Ok(()) + } +} diff --git a/hopper-compiler/src/dwarf/arg_type.rs b/hopper-compiler/src/dwarf/arg_type.rs new file mode 100644 index 0000000..2e2ab82 --- /dev/null +++ b/hopper-compiler/src/dwarf/arg_type.rs @@ -0,0 +1,153 @@ + +#[derive(Debug, Clone)] +pub enum ArgType { + Char(CharType), + Integer(IntegerType), + Void, + TypeDef(DefType), + Point(PointType), + Const(ConstType), + Struct(StructType), + Array(ArrayType), + Decla(DeclaType), + Ref(RefType), + Undefined, +} + +#[derive(Debug, Clone)] +pub struct IntegerType { + pub sign: bool, + pub size: usize, +} + +#[derive(Debug, Clone)] +pub struct CharType { + pub sign: bool, +} + +#[derive(Debug, Clone)] +pub struct DefType { + pub name: String, + pub alias: Box, +} + +#[derive(Debug, Clone)] +pub struct PointType { + pub dst_type: Box, + pub size: usize, +} + +#[derive(Debug, Clone)] +pub struct ConstType { + pub inner_type: Box, +} + +#[derive(Debug, Clone)] +pub struct StructType { + pub name: String, + pub fields: Vec, + pub size: usize, + // position +} + +#[derive(Debug, Clone)] +pub struct StructField { + pub name: String, + pub ty: Box, + pub location: usize, +} + +#[derive(Debug, Clone)] +pub struct ArrayType { + pub ele_type: Box, + pub sub_range: Vec, +} + +#[derive(Debug, Clone)] +pub struct DeclaType { + pub name: String, +} + +#[derive(Debug, Clone)] +pub struct RefType { + pub offset: u64, +} + +impl ArgType { + pub fn from(name: &str, size: usize) -> Self { + match name { + "void" => ArgType::Void, + "short int" | "int" | "long int" => ArgType::Integer(IntegerType { sign: true, size }), + "short unsigned int" | "unsigned int" | "long unsigned int" => { + ArgType::Integer(IntegerType { sign: false, size }) + } + // FIXME: should make sure char signed or unsigned + "char" | "signed char" => ArgType::Char(CharType { sign: true }), + "unsigned char" => ArgType::Char(CharType { sign: false }), + _ => { + println!("name: {name}, size: {size}"); + ArgType::Undefined + } + } + } + + pub fn alias(name: &str, alias_type: ArgType) -> Self { + ArgType::TypeDef(DefType { + name: name.to_string(), + alias: Box::new(alias_type), + }) + } + + pub fn pointer(dst_type: ArgType, size: usize) -> Self { + ArgType::Point(PointType { + dst_type: Box::new(dst_type), + size, + }) + } + + pub fn constt(inner_type: ArgType) -> Self { + ArgType::Const(ConstType { + inner_type: Box::new(inner_type), + }) + } + + pub fn structt(name: &str, size: usize) -> Self { + ArgType::Struct(StructType { + name: name.to_string(), + size, + fields: vec![], + }) + } + + pub fn array(ele_type: ArgType) -> Self { + ArgType::Array(ArrayType { + ele_type: Box::new(ele_type), + sub_range: vec![], + }) + } + + pub fn ref_as(offset: u64) -> Self { + ArgType::Ref(RefType { offset }) + } + + /* + use std::collections::BTreeMap; + pub fn expand_ref<'a>(&'a self, type_table: &'a BTreeMap) -> &'a ArgType { + match self { + ArgType::Ref(t) => t.find(type_table).unwrap_or_else(|| &ArgType::Undefined), + _ => self, + } + } + */ +} + +/* +impl RefType { + pub fn find<'a>(&self, type_table: &'a BTreeMap) -> Option<&'a ArgType> { + if let Some(r) = type_table.get(&self.offset) { + Some(r) + } else { + None + } + } +} */ diff --git a/hopper-compiler/src/dwarf/function.rs b/hopper-compiler/src/dwarf/function.rs new file mode 100644 index 0000000..9fb513b --- /dev/null +++ b/hopper-compiler/src/dwarf/function.rs @@ -0,0 +1,12 @@ +use super::{ArgType, Position}; + +#[derive(Debug, Clone)] +pub struct Function { + pub name: String, + pub ret_type: ArgType, + pub arg_types: Vec, + pub external: bool, + pub position: Position, +} + +impl Function {} diff --git a/hopper-compiler/src/dwarf/mod.rs b/hopper-compiler/src/dwarf/mod.rs new file mode 100644 index 0000000..c8a34e9 --- /dev/null +++ b/hopper-compiler/src/dwarf/mod.rs @@ -0,0 +1,60 @@ +//! TODO: Extract information from debugging information +//! It needs to reverse function signatures and custom data structures. +//! We just put some simple codes and do not implement it, +//! so it is not used in Hopper, too. + +#![allow(dead_code)] + +mod analyzer; +mod arg_type; +mod function; +mod position; +mod program; +mod variable; + +use object::{Object, ObjectSection}; +use std::{borrow, fs::File, path::Path}; + +pub use analyzer::*; +pub use arg_type::*; +pub use function::*; +pub use position::*; +pub use program::*; +pub use variable::*; + +pub fn analyze_dwarf(library: &Path) -> Result { + let file = File::open(library).expect("fail to open library file"); + let mmap = unsafe { memmap::Mmap::map(&file).unwrap() }; + let object = &object::File::parse(&*mmap).expect("fail to parse object"); + + let endian = if object.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + + // Load a section and return as `Cow<[u8]>`. + let load_section = |id: gimli::SectionId| -> Result, gimli::Error> { + match object.section_by_name(id.name()) { + Some(ref section) => Ok(section + .uncompressed_data() + .unwrap_or_else(|_| borrow::Cow::Borrowed(&[][..]))), + None => Ok(borrow::Cow::Borrowed(&[][..])), + } + }; + + // Load all of the sections. + let dwarf_cow = gimli::Dwarf::load(&load_section)?; + + // Borrow a `Cow<[u8]>` to create an `EndianSlice`. + let borrow_section: &dyn for<'a> Fn( + &'a borrow::Cow<[u8]>, + ) -> gimli::EndianSlice<'a, gimli::RunTimeEndian> = + &|section| gimli::EndianSlice::new(section, endian); + + // Create `EndianSlice`s for all of the sections. + let dwarf = dwarf_cow.borrow(&borrow_section); + let mut analyzer = DwarfAnalyzer::new(dwarf); + + analyzer.parse() +} diff --git a/hopper-compiler/src/dwarf/position.rs b/hopper-compiler/src/dwarf/position.rs new file mode 100644 index 0000000..551f20b --- /dev/null +++ b/hopper-compiler/src/dwarf/position.rs @@ -0,0 +1,8 @@ +#[derive(Debug, Clone)] +pub struct Position { + // TODO: support file name + // https://github.com/eliben/pyelftools/issues/250 + pub file: usize, + pub line: usize, + pub column: usize, +} diff --git a/hopper-compiler/src/dwarf/program.rs b/hopper-compiler/src/dwarf/program.rs new file mode 100644 index 0000000..082a3d7 --- /dev/null +++ b/hopper-compiler/src/dwarf/program.rs @@ -0,0 +1,17 @@ +use std::collections::BTreeMap; + +use super::*; + +#[derive(Debug, Clone)] +pub struct Program { + pub units: Vec, +} + +#[derive(Debug, Clone)] +pub struct ProgramUnit { + pub name: String, + pub producer: String, + pub type_table: BTreeMap, + pub fn_list: Vec, + pub var_list: Vec, +} diff --git a/hopper-compiler/src/dwarf/variable.rs b/hopper-compiler/src/dwarf/variable.rs new file mode 100644 index 0000000..4af08e0 --- /dev/null +++ b/hopper-compiler/src/dwarf/variable.rs @@ -0,0 +1,11 @@ +use super::*; + +#[derive(Debug, Clone)] +pub struct Variable { + pub name: String, + pub ty: ArgType, + pub external: bool, + pub position: Position, +} + +impl Variable {} diff --git a/hopper-compiler/src/main.rs b/hopper-compiler/src/main.rs new file mode 100644 index 0000000..92db34f --- /dev/null +++ b/hopper-compiler/src/main.rs @@ -0,0 +1,126 @@ +use binary_info::BinaryInfo; +use eyre::{Context, ContextCompat, Result}; +use std::{ + fs, + io::Write, + path::{Path, PathBuf}, +}; + +use clap::Parser; + +mod cargo; +mod check; +mod config; +mod dwarf; +#[cfg(target_os = "linux")] +mod patch; +mod binary_info; + +use config::*; + +pub fn compile(config: &Config) -> Result<()> { + log::info!("config: {:?}", config); + fs::create_dir_all(&config.output).expect("fail to create output directory"); + let output = PathBuf::from(&config.output) + .canonicalize() + .expect("cononicalize output path fail"); + eyre::ensure!(!config.header.is_empty(), "require at least one header"); + eyre::ensure!(!config.library.is_empty(), "require at least one library"); + let header = if config.header.len() == 1 { + let header = PathBuf::from(&config.header[0]) + .canonicalize() + .expect("cononicalize header path fail"); + check::check_header(&header)?; + header + } else { + concat_headers(&output, &config.header)? + }; + + let mut libraries = vec![]; + let mut func_list = vec![]; + for lib in &config.library { + let lib = PathBuf::from(lib) + .canonicalize() + .expect("cononicalize library path fail"); + check::check_library(&lib)?; + let lib_info = crate::binary_info::BinaryInfo::parse(&lib)?; + let instrumented_lib = instrument(&lib, &output, config, &lib_info)?; + libraries.push(instrumented_lib); + func_list.extend(lib_info.func_list); + } + binary_info::save_func_list(&func_list, &output)?; + cargo::cargo_install(libraries, &header, &output, config, func_list)?; + Ok(()) +} + +fn instrument(library: &Path, output: &Path, config: &Config, lib_info: &BinaryInfo) -> Result { + let lib_name = library.file_name().context("fail to parse library name")?; + let lib_name = check::output_lib_name(lib_name.to_str().context("fail cast as str")?); + let output_lib = output.join(&lib_name); + match config.instrument { + InstrumentType::E9 => { + #[cfg(target_os = "windows")] + { + eyre::ensure!( + check::check_file_contains(library, "E9PATCH"), + "The library should be instrumented by E9 in linux" + ); + fs::copy(library, &output_lib).context("fail to copy library")?; + } + #[cfg(target_os = "linux")] + patch::e9_instrument(library, &output_lib, lib_info)?; + } + InstrumentType::Llvm | InstrumentType::Cov => { + fs::copy(library, &output_lib).context("fail to copy library")?; + } + } + #[cfg(target_os = "linux")] + patch::patchelf_set_so_name(&lib_name, output_lib.to_str().context("fail to be str")?)?; + Ok(output_lib) +} + + +fn main() -> Result<()> { + init_logger(); + let mut config = Config::parse(); + if check::check_llvm_runtime(&config.library) { + config.instrument = InstrumentType::Llvm; + } + let ret = compile(&config); + if let Err(e) = ret { + log::error!("Meets error: {}", e); + return Err(e); + } + + Ok(()) +} + + + +fn init_logger() { + let mut config_builder = simplelog::ConfigBuilder::new(); + config_builder.set_time_offset_to_local().unwrap(); + simplelog::CombinedLogger::init(vec![simplelog::TermLogger::new( + simplelog::LevelFilter::Info, + config_builder.build(), + simplelog::TerminalMode::Mixed, + simplelog::ColorChoice::Auto, + )]) + .unwrap(); +} + +fn concat_headers(output: &Path, headers: &Vec) -> Result { + let tmp_header = output.join("tmp.h"); + let mut content = String::new(); + for header in headers { + let header = PathBuf::from(header) + .canonicalize() + .expect("cononicalize header path fail"); + check::check_header(&header)?; + content.push_str(&format!("#include \"{}\"\n", header.to_str().unwrap())); + } + let mut f = std::fs::File::create(&tmp_header)?; + f.write_all(content.as_bytes())?; + + Ok(tmp_header) +} diff --git a/hopper-compiler/src/patch/e9.rs b/hopper-compiler/src/patch/e9.rs new file mode 100644 index 0000000..6c5d91c --- /dev/null +++ b/hopper-compiler/src/patch/e9.rs @@ -0,0 +1,267 @@ +#![allow(dead_code)] +//! Run e9patch instrumentation command +//! [OPTIONS] library [e9tool-OPTIONS] +//! OPTIONS: +//! -Oblock=never,default,always +//! Apply bad block optimization. +//! -Oselect=never,default,always +//! Apply selection optimization. +//! -d, --debug +//! Enable debugging output. +//! --counter=classic,neverzero,saturated +//! +//! E9Patch is not support static library +//! + +use std::{ + collections::HashMap, + env, + path::{Path, PathBuf}, + process::Command, +}; + +use eyre::{ensure, Context, Result}; + +use crate::binary_info::BinaryInfo; + +pub fn e9_instrument(library: &Path, output_lib: &Path, lib_info: &BinaryInfo) -> Result<()> { + let e9_dir = e9_dir()?; + let e9_tool = e9_dir.join("e9tool"); + ensure!( + e9_tool.exists(), + format!("e9tool is not found in {:?}", &e9_tool) + ); + let lib_type = lib_info.lib_type; + let cov_plugin_path = e9_dir.join(format!("hopper-e9-plugin-{lib_type}.so")); + let cov_plugin = cov_plugin_path.to_string_lossy(); + let instr_plugin_path = e9_dir.join(format!("hopper-instr-plugin-{lib_type}.so")); + let instr_plugin = instr_plugin_path.to_string_lossy(); + let hopper_rt_path = e9_dir.join(format!("hopper-e9-rt-{lib_type}")); + let hopper_rt = hopper_rt_path.to_string_lossy(); + + let mut envs = HashMap::new(); + envs.insert("E9AFL_PATH", e9_dir.to_string_lossy().to_string()); + envs.insert("E9AFL_COUNTER", "saturated".to_string()); + if let Ok(str) = env::var("HOPPER_MAP_SIZE_POW2") { + envs.insert("HOPPER_MAP_SIZE_POW2", str); + } + if let Ok(str) = env::var("HOPPER_INST_RATIO") { + envs.insert("HOPPER_INST_RATIO", str); + } + // envs.insert("E9AFL_DEBUG", "default".to_string()); + // envs.insert("E9AFL_OBLOCK", "default".to_string()); + // envs.insert("E9AFL_OSELECT", "default".to_string()); + // envs.insert("E9AFL_COUNTER", "default".to_string()); + let mut args = vec![]; + let conf_args = [ + "-E", + "\".plt\"", + "-E", + "\".plt.got\"", + "-O2", + "--option", + "--mem-granularity=4096", + // "--debug", + // --seed + "-o", + &format!("{}", &output_lib.to_string_lossy()), + ]; + args.extend_from_slice(&conf_args); + + let plugin_pattern = [ + "-M", + &format!("plugin(\"{cov_plugin}\").match()"), + "-P", + &format!("plugin(\"{cov_plugin}\").patch()"), + "-M", + &format!("plugin(\"{instr_plugin}\").match()"), + "-P", + &format!("plugin(\"{instr_plugin}\").patch()"), + ]; + args.extend_from_slice(&plugin_pattern); + let indiret_call_pat = [ + "-M", + "call and op[0].type != imm", + "-P", + &format!("before entry_indirect(offset, op[0], &rdi, rsi)@{hopper_rt}"), + "-M", + "jump and op[0].type != imm", + "-P", + &format!("before entry_indirect(offset, op[0], &rdi, rsi)@{hopper_rt}"), + "-M", + "I[-1].call and I[-1].op[0].type != imm", + "-P", + &format!("before exit_indirect(offset, rax)@{hopper_rt}"), + ]; + let free_pat = [ + "-M", + "call and target == &free", + "-P", + &format!("before entry_free(offset, &rdi)@{hopper_rt}"), + ]; + let malloc_pat = [ + "-M", + "call and target == &malloc", + "-P", + &format!("replace hook_malloc(offset, rdi, &rax)@{hopper_rt}"), + ]; + let calloc_pat = [ + "-M", + "call and target == &calloc", + "-P", + &format!("replace hook_calloc(offset, rdi, rsi, &rax)@{hopper_rt}"), + ]; + let realloc_pat = [ + "-M", + "call and target == &realloc", + "-P", + &format!("replace hook_realloc(offset, rdi, rsi, &rax)@{hopper_rt}"), + ]; + let fopen_pat = [ + "-M", + "call and target == &fopen", + "-P", + &format!("before entry_fopen(offset, rdi, rsi)@{hopper_rt}"), + ]; + let open_pat = [ + "-M", + "call and target == &open", + "-P", + &format!("before entry_open(offset, rdi, rsi)@{hopper_rt}"), + ]; + let open64_pat = [ + "-M", + "call and target == &open64", + "-P", + &format!("before entry_open(offset, rdi, rsi)@{hopper_rt}"), + ]; + let close_pat = [ + "-M", + "call and target == &close", + "-P", + &format!("before entry_close(offset, &rdi)@{hopper_rt}"), + ]; + let creat_pat = [ + "-M", + "call and target == &creat", + "-P", + &format!("before entry_creat(offset, rdi)@{hopper_rt}"), + ]; + let fdopen_pat = [ + "-M", + "call and target == &fdopen", + "-P", + &format!("before entry_fdopen(offset, &rdi, rsi)@{hopper_rt}"), + ]; + let lseek_pat = [ + "-M", + "call and target == &lseek", + "-P", + &format!("before entry_lseek(offset, &rdi)@{hopper_rt}"), + ]; + let lseek64_pat = [ + "-M", + "call and target == &lseek64", + "-P", + &format!("before entry_lseek(offset, &rdi)@{hopper_rt}"), + ]; + let read_pat = [ + "-M", + "call and target == &\"read\"", + "-P", + &format!("before entry_read(offset, &rdi)@{hopper_rt}"), + ]; + let write_pat = [ + "-M", + "call and target == &\"write\"", + "-P", + &format!("before entry_write(offset, &rdi)@{hopper_rt}"), + ]; + // log::info!("lib funcs: {:?}", funcs); + args.extend_from_slice(&indiret_call_pat); + if lib_info.contain_func("free") { + args.extend_from_slice(&free_pat); + } + if lib_info.contain_func("malloc") { + args.extend_from_slice(&malloc_pat); + } + if lib_info.contain_func("calloc") { + args.extend_from_slice(&calloc_pat); + } + if lib_info.contain_func("realloc") { + args.extend_from_slice(&realloc_pat); + } + if lib_info.contain_func("fopen") { + args.extend_from_slice(&fopen_pat); + } + if lib_info.contain_func("open") { + args.extend_from_slice(&open_pat); + } + if lib_info.contain_func("close") { + args.extend_from_slice(&close_pat); + } + if lib_info.contain_func("open64") { + args.extend_from_slice(&open64_pat); + } + if lib_info.contain_func("creat") { + args.extend_from_slice(&creat_pat); + } + if lib_info.contain_func("fdopen") { + args.extend_from_slice(&fdopen_pat); + } + if lib_info.contain_func("lseek") { + args.extend_from_slice(&lseek_pat); + } + if lib_info.contain_func("lseek64") { + args.extend_from_slice(&lseek64_pat); + } + if lib_info.contain_func("read") { + args.extend_from_slice(&read_pat); + } + if lib_info.contain_func("write") { + args.extend_from_slice(&write_pat); + } + + let e9_exclude: Vec = lib_info + .list_exclude_patch_range() + .iter() + .map(|range| format!("-E 0x{:02X}..0x{:02x}", range.0, range.1)) + .collect(); + for exclude in e9_exclude.iter() { + args.push(exclude); + } + + // add_instrument_patterns(&mut args, library, &e9_dir)?; + let lib_args = ["--", &format!("{}", &library.to_string_lossy())]; + args.extend_from_slice(&lib_args); + + log::info!( + "e9 cmd: {:?}, args: {}, envs: {:?}", + e9_tool, + args.join(" "), + envs + ); + let mut child = Command::new(&e9_tool) + .args(args) + .envs(envs) + .spawn() + .context("Fail to invoke e9")?; + + log::info!("start instrument library by e9 .."); + let status = child.wait()?; + ensure!(status.success(), "e9 instrumenit error"); + log::info!("e9 instrument done"); + if lib_type == "pe" { + log::warn!("copy it to windows and fuzz!"); + std::process::exit(0); + } + Ok(()) +} + +fn e9_dir() -> Result { + if let Ok(path) = env::var("HOPPER_PATH") { + return Ok(path.into()); + } + let exe_dir = env::current_dir()?; + Ok(exe_dir) +} diff --git a/hopper-compiler/src/patch/mod.rs b/hopper-compiler/src/patch/mod.rs new file mode 100644 index 0000000..5824c12 --- /dev/null +++ b/hopper-compiler/src/patch/mod.rs @@ -0,0 +1,7 @@ +#![allow(dead_code)] + +mod patchelf; +mod e9; + +pub use patchelf::*; +pub use e9::*; diff --git a/hopper-compiler/src/patch/patchelf.rs b/hopper-compiler/src/patch/patchelf.rs new file mode 100644 index 0000000..70c7600 --- /dev/null +++ b/hopper-compiler/src/patch/patchelf.rs @@ -0,0 +1,37 @@ +//! Linux specification +//! set shared libary's soname + +use std::{ + env, + path::{PathBuf}, + process::Command, +}; + +use eyre::{ensure, Context, Result}; + +fn patchelf_path() -> Result { + if let Ok(path) = env::var("HOPPER_PATH") { + return Ok(format!("{}/patchelf", &path).into()); + } + let exe_dir = env::current_dir()?; + Ok(exe_dir) +} + +pub fn patchelf_set_so_name(lib_name: &str, path: &str) -> Result<()> { + let patchelf = patchelf_path()?; + log::info!("patchelf cmd: {:?}, lib_name: {:?}, path: {:}", patchelf, lib_name, path); + let mut child = Command::new(&patchelf) + .arg("--set-soname") + .arg(lib_name) + .arg(path) + .spawn() + .context("Fail to invoke patchelf")?; + + log::info!("start set soname .."); + + let status = child.wait()?; + ensure!(status.success(), "patchelf set soname error"); + log::info!("patchelf set soname done"); + + Ok(()) +} \ No newline at end of file diff --git a/hopper-core/Cargo.toml b/hopper-core/Cargo.toml new file mode 100644 index 0000000..c219ae5 --- /dev/null +++ b/hopper-core/Cargo.toml @@ -0,0 +1,67 @@ +[package] +name = "hopper" +version = "1.0.0" +edition = "2021" +authors = ["Peng Chen "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +hopper-derive = { path = "../hopper-derive", features = ["use_crate"] } +rand = "0.8" +rand_core = "0.6" +log = "0.4" +thiserror = "1.0" +downcast-rs = "1.2" +nix = "0.24" +libc = "0.2" +once_cell = "1.1" +wait-timeout = "0.2" +clap = { version = "4.2", features = ["derive"] } +linkme = { version = "0.3", optional = true } +ctrlc = { version = "3.2", features = ["termination"] } +num-traits = "0.2" +num = "0.4" +dyn-clone = "1.0" +paste = "1.0" +findshlibs = "0.10" +region = "3.0" +eyre = "0.6" +twoway = "0.2" +base64 = "0.13" +regex = "1" +serde_json = "1.0" + +#[cfg(target_os = "windows")] +uds_windows = "1.0.1" +#[cfg(target_os = "windows")] +ntapi = "0.3.7" +#[cfg(target_os = "windows")] +winapi = { version = "0.3", features = [ + "profileapi", + "sysinfoapi", + "errhandlingapi", + "processthreadsapi", + "libloaderapi", + "consoleapi", + "winbase", + "processenv", + "wow64apiset", + "synchapi", + "memoryapi" + ] } + +[build-dependencies] +cc = "1.0" + +[features] +default = ["ctor_hook", "fat_bucket"] # "slices" +ctor_hook = [] +link_hook = ["linkme"] +e9_mode = [] +llvm_mode = [] +select_timeout = [] +fat_bucket = [] +slices = [] +verbose = [] +testsuite = [] \ No newline at end of file diff --git a/hopper-core/asm-win.S b/hopper-core/asm-win.S new file mode 100644 index 0000000..431cbbd --- /dev/null +++ b/hopper-core/asm-win.S @@ -0,0 +1,26 @@ +.text +.globl __hopper_inc_stmt_index +.globl __hopper_reset_stmt_index +.globl __hopper_last_stmt_index +.globl __hopper_enable_cov +.globl __hopper_disable_cov +.globl __hopper_set_context +__hopper_inc_stmt_index: + incw 0x47ff1000 + incl %ds:0x47fb0008 + ret +__hopper_reset_stmt_index: + movw $0,0x47ff1000 + movl $0, %ds:0x47fb0008 + ret +__hopper_last_stmt_index: + movw $0xFFFF, 0x47ff1000 + ret +__hopper_enable_cov: + movl $0, 0x47ff2000 + ret +__hopper_disable_cov: + movl $0xFFFFFFFF, 0x47ff2000 + ret +__hopper_set_context: + ret \ No newline at end of file diff --git a/hopper-core/asm.S b/hopper-core/asm.S new file mode 100644 index 0000000..cde5e72 --- /dev/null +++ b/hopper-core/asm.S @@ -0,0 +1,27 @@ +.text +.globl __hopper_inc_stmt_index +.globl __hopper_reset_stmt_index +.globl __hopper_last_stmt_index +.globl __hopper_enable_cov +.globl __hopper_disable_cov +.globl __hopper_set_context +__hopper_inc_stmt_index: + incw %ds:0x3B0108 + incl %ds:0x3B0008 + ret +__hopper_reset_stmt_index: + movw $0, %ds:0x3B0108 + movl $0, %ds:0x3B0008 + ret +__hopper_last_stmt_index: + movw $0xFFFF, %ds:0x3B0108 + ret +__hopper_enable_cov: + movl $0, %ds:0x3B0100 + ret +__hopper_disable_cov: + movl $0xFFFFFFFF, %ds:0x3B0100 + ret +__hopper_set_context: + movq %rdi, %ds:0x3B0110 + ret \ No newline at end of file diff --git a/hopper-core/build.rs b/hopper-core/build.rs new file mode 100644 index 0000000..5979fd8 --- /dev/null +++ b/hopper-core/build.rs @@ -0,0 +1,10 @@ +extern crate cc; + +#[cfg(target_family = "unix")] +static ASM_FILE: &str = "asm.S"; +#[cfg(target_os = "windows")] +static ASM_FILE: &str = "asm-win.S"; + +fn main() { + cc::Build::new().file(ASM_FILE).compile("asm"); +} diff --git a/hopper-core/src/config.rs b/hopper-core/src/config.rs new file mode 100644 index 0000000..25fad0f --- /dev/null +++ b/hopper-core/src/config.rs @@ -0,0 +1,430 @@ +// --- Project setting --- +pub const TASK_NAME: &str = task_env_var(); +pub const OUTPUT_DIR: &str = out_dir_env_var(); +// Use canary or not +pub const USE_CANARY: bool = use_canary(); +// Enable set function pointer +pub const ENABLE_SET_FN_POINTER: bool = enable_fn_pointer(); +pub const FN_POINTER_PREFIX: &str = fn_pointer_name_prefix(); +// Enable use infered contraints +pub const ENABLE_REFINE: bool = true; +// Enable infer abort crash +pub const ENABLE_INFER_ABORT: bool = true; +// enable mutate +pub const ENABLE_MUTATE: bool = true; +// enable effective arg +pub const ENABLE_EFF_ARG: bool = true; +// enable inter api infer +pub const ENABLE_INTER_API_LEARN: bool = true; + +// --- SHM and branch config --- +#[cfg(feature = "fat_bucket")] +pub type BucketType = u16; +#[cfg(not(feature = "fat_bucket"))] +pub type BucketType = u8; +// Branch coverage map size +pub const BRANCHES_POW2: usize = map_size_pow2_var(); +pub const BRANCHES_SIZE: usize = 1 << BRANCHES_POW2; +// Fixed pointer for collected path, instructions +#[cfg(target_family = "unix")] +pub const SHM_PATH_BASE: u64 = 0x200000; +#[cfg(target_os = "windows")] +pub const SHM_PATH_BASE: u64 = 0x47e00000; +pub const SHM_INSTR_BASE: u64 = SHM_PATH_BASE + 0x100000; +#[cfg(target_os = "windows")] +pub const RAW_DATA_PTR: u64 = 0x46f00000; + +// Size of area(a list) for collected cmp & memory related instructions +// or functions. If we modify these, please check asm.S and asm-win.S. +pub const CMP_LIST_AREA: usize = 0x80000; +pub const MEM_LIST_AREA: usize = 0x30000; +// Fixed pointer of arena memory for allocated objectes wrapped with canaries +pub const CANARY_PTR: *const u8 = (SHM_INSTR_BASE + 0x100000) as *const u8; +// Size of area of the arena memory +pub const CANARY_AREA_SIZE: usize = 0x100000; + +// Name for shared memory +pub static PATH_SHMID_VAR: &str = "HOPPER_PATH_SHMID"; +pub static INSTR_SHMID_VAR: &str = "HOPPER_INSTR_SHMID"; + +// --- Fork server --- +pub static FORK_SOCKET_PATH: &str = "HOPPER_SOCKET_PATH"; + +// Custom exit code +pub const FORK_ERROR_EXIT_CODE: i32 = 0x61; +pub const EXEC_ERROR_EXIT_CODE: i32 = 0x62; +pub const ASSERT_SILENT_EXIT_CODE: i32 = 0x63; +pub const ASSERT_ERROR_EXIT_CODE: i32 = 0x64; +pub const UAF_ERROR_EXIT_CODE: i32 = 0x65; +pub const TIMEOUT_CODE: i32 = 0x67; +pub const DOUBLE_FREE_ERROR_EXIT_CODE: i32 = 0x68; +pub const TEST_SUCCESS_EXIT_CODE: i32 = 0x69; + +// --- Other --- +pub static TIMEOUT_LIMIT_VAR: &str = "HOPPER_TIMEOUT_LIMIT"; +pub static CONSTRAINT_CONFIG: &str = "misc/constraint.config"; +pub static SLICES_PATH: &str = "HOPPER_SLICES_PATH"; +pub static ONLY_USE_SLICES_VAR: &str = "HOPPER_ONLY_SLICES"; +// Hopper set all the malloced memory to a fixed content. +pub const UNINITIALIZED_MEMORY_MAGIC: usize = 0xFAFA_FAFA_FAFA_FAFA; +pub const DEFAULT_RIP_ADDR: u64 = 0xDEAD_BEEF_DEAD_BEEF; +pub const DEFAULT_SEGV_ADDR: u64 = 0xDEAD_BEEF_DEAD_BEEF; +pub const CMP_MAX_COUNTER: usize = 8; + +// --- Depot --- +pub static CRASHES_DIR: &str = "crashes"; +pub static MINIMIZED_CRASHES_DIR: &str = "minimized_crashes"; +pub static HANGS_DIR: &str = "hangs"; +pub static INPUTS_DIR: &str = "queue"; +pub static MISC_DIR: &str = "misc"; +pub static TMP_DIR: &str = "misc/tmp"; +pub static REVIEW_DIR: &str = "misc/review"; +pub static HARNESS_WORK_DIR: &str = "working"; +pub const MAX_INPUT_SZIE: usize = 5000; +pub const MAX_QUEUE_SIZE: usize = 6000; + +// --- Mutation --- +pub const ROUND_PILOT_NUM: usize = 256; +pub const ROUND_GENERATE_NUM: usize = 96; +pub const ROUND_MUTATE_NUM: usize = 384; +pub const ROUND_WARM_UP_NUM: usize = 256; +pub const MAX_STMTS_LEN: usize = 84; +pub const MAX_DEPTH: usize = 8; +pub const PILOT_MAX_DEPTH: usize = 3; +// Maximal length for vector we generated +pub const MIN_VEC_LEN: usize = 16; +pub const MAX_VEC_LEN: usize = 64; +// Times of re-running program after we find a new one +pub const RE_RUN_TIMES: usize = 5; +// Maximal number of failures in one round +pub const MAX_ROUND_FAIL_NUM: usize = 20; +// The maximal number of rounds if we has found nothing with single call +pub const ROUND_STUCK_NUM: usize = 50; +pub const ENABLE_APPEND_NEW_TARGET: bool = true; +/// --- Constraint --- +pub const MAX_RANGE_NUM: u64 = 4096; +pub const RESERVED_FD_MIN: i32 = 3; +pub const RESERVED_FD_MAX: i32 = 32; +pub const RESERVED_FD_HUGE: i32 = 1000; + +// ----------------------------------------------------- + +use clap::{Parser, ValueEnum}; +/// Configuration parsed from command line +#[derive(Debug, Clone, Parser, Default)] +#[clap(name = "hopper")] +#[clap(version = "1.0.0", author = "Tencent")] +pub struct Config { + /// Function we want to fuzz. The pattern can be a function name, + /// or a simple pattern, such as cJSON_*. + /// If you has multiple pattern use `,` to join them, e.g cJSON_*,HTTP_* + /// You can use @ prefix to limit it to only fuzz specific function. + /// e.g. @JSON_parse, cJSON_* + #[clap(long, value_parser)] + pub func_pattern: Option, + /// Limitation of timeout, whose unit is seconds. + #[clap(long, value_parser, default_value_t = 1)] + pub timeout_limit: u64, + /// Limitation of memory, whose unit is `MB`, and should > 10GB + #[clap(long, value_parser)] + pub mem_limit: Option, + /// Select strategy + #[clap(long, value_enum, value_parser, default_value = "sa")] + pub select: SelectType, + /// Custom rules for constraints or patterns + #[clap(long, value_parser)] + pub custom_rules: Option, + /// Taget function + #[clap(skip)] + pub func_target: Option<&'static str>, + /// include function patterns + #[clap(skip)] + pub func_include: Vec, + /// Exclude function patterns + #[clap(skip)] + pub func_exclude: Vec, + /// Key functions + #[clap(skip)] + pub func_key: Vec, +} + +use eyre::Context; +use once_cell::sync::OnceCell; +use std::io::BufRead; + +pub static mut CONFIG_INSTANCE: Option = None; + +pub fn get_config() -> &'static Config { + if let Some(c) = unsafe { &CONFIG_INSTANCE } { + return c; + } + unsafe { + CONFIG_INSTANCE = Some(Config::default()); + } + unsafe { CONFIG_INSTANCE.as_ref().unwrap() } +} + +pub fn get_config_mut() -> &'static mut Config { + if let Some(c) = unsafe { &mut CONFIG_INSTANCE } { + return c; + } + unsafe { + CONFIG_INSTANCE = Some(Config::default()); + } + unsafe { CONFIG_INSTANCE.as_mut().unwrap() } +} + +pub fn parse_config() -> eyre::Result<()> { + let mut config = Config::parse(); + config.set_func_pattern()?; + if let Some(size) = config.mem_limit { + if size < 10000 { + eyre::bail!("the limitation for memory it too small! (< 10GB), since we enable canary and huge shm in our harness, we need much memory!"); + } + if size == 0 { + config.mem_limit = None; + } + } + *get_config_mut() = config; + Ok(()) +} + +impl Config { + /// Match a function to check if it can be our candidates or not. + pub fn match_func(&self, f_name: &str) -> bool { + if let Some(f) = self.func_target { + if f == f_name { + return true; + } + } + for exclude in &self.func_exclude { + if let Some(pat) = exclude.strip_suffix('*') { + if f_name.ends_with(pat) { + return false; + } + } else if f_name == exclude { + return false; + } + } + for include in &self.func_include { + if let Some(pat) = include.strip_suffix('*') { + if f_name.starts_with(pat) { + return true; + } + } else if f_name == include { + return true; + } + } + false + } + + /// Set function pattern for `match_func` + /// if can read from command line `--func-pattern`, or entries in file defined by `--custom-rule` + pub fn set_func_pattern(&mut self) -> eyre::Result<()> { + // set by `--custom-rule` + // e.g + // func_target xx + // func_exclude bad_one + // func_include test_* + if let Some(f) = &self.custom_rules { + let buf = std::fs::read(f).context("the path to custom rules is wrong")?; + for line in buf.lines() { + let line = line.context("fail to read rule line")?; + if let Some(next) = line.strip_prefix("func_target") { + let f_name = next.trim(); + self.set_func_target(f_name)?; + } + if let Some(next) = line.strip_prefix("func_exclude") { + for f in next.split(',') { + self.func_exclude.push(f.trim().to_string()); + } + } + if let Some(next) = line.strip_prefix("func_include") { + for f in next.split(',') { + self.func_include.push(f.trim().to_string()); + } + } + if let Some(next) = line.strip_prefix("func_key") { + for f in next.split(',') { + self.func_key.push(f.trim().to_string()); + } + } + } + } + // set by `--func-pattern` + // e.g. @test_target, !exclude_func, ?key_func, test_*, other_* + if let Some(patterns) = self.func_pattern.take() { + for pattern in patterns.split(',') { + let pattern = pattern.trim(); + if let Some(pat) = pattern.strip_prefix('@') { + let f_name = pat.trim(); + self.set_func_target(f_name)?; + } else if let Some(pat) = pattern.strip_prefix('!') { + self.func_exclude.push(pat.to_string()); + } else if let Some(pat) = pattern.strip_prefix('?') { + self.func_key.push(pat.to_string()) + } else if !pattern.is_empty() { + self.func_include.push(pattern.to_string()); + } + } + } + // read from output/func_list as default + if self.func_include.is_empty() && self.func_target.is_none() { + let f = output_file_path("func_list"); + if f.is_file() { + let buf = std::fs::read(f).context("the path to custom rules is wrong")?; + for line in buf.lines() { + let line = line.context("fail to read rule line")?; + self.func_include.push(line.trim().to_string()); + } + } else { + eyre::bail!( + "You should specific API list by either --func-pattern or --custom-rule !" + ); + } + } + Ok(()) + } + + pub fn set_func_target(&mut self, f_name: &str) -> eyre::Result<()> { + let fg = crate::global_gadgets::get_instance() + .get_func_gadget(f_name) + .with_context(|| format!("function name `{f_name}` is not in gagdget"))?; + self.func_target = Some(fg.f_name); + Ok(()) + } +} + +/// Strategy for select seed +#[derive(Parser, Debug, Copy, Clone, ValueEnum)] +pub enum SelectType { + /// Round robin + Rr, + /// Simulated annealing + Sa, +} + +impl Default for SelectType { + fn default() -> Self { + Self::Sa + } +} + +impl std::str::FromStr for SelectType { + type Err = eyre::Error; + + fn from_str(input: &str) -> Result { + match input { + "rr" | "RR" | "Rr" => Ok(Self::Rr), + "sa" | "SA" | "Sa" => Ok(Self::Sa), + _ => Err(eyre::eyre!("fail to parse select type")), + } + } +} + +/// Const function for get task env +const fn task_env_var() -> &'static str { + if let Some(v) = option_env!("HOPPER_TASK") { + v + } else { + "test" + } +} + +/// Const function for get out_dir env +const fn out_dir_env_var() -> &'static str { + if let Some(v) = option_env!("HOPPER_OUT_DIR") { + v + } else { + "./" + } +} + +const fn enable_fn_pointer() -> bool { + option_env!("HOPPER_DISABLE_FN_POINTER").is_none() +} + +const fn fn_pointer_name_prefix() -> &'static str { + if let Some(v) = option_env!("HOPPER_FUNCTION_POINTER_PREFIX") { + v + } else { + "GENERATED_hopper_callback_" + } +} + +/// Const function for get map_size_pow2 +const fn map_size_pow2_var() -> usize { + if let Some(v) = option_env!("HOPPER_MAP_SIZE_POW2") { + if v.len() == 2 { + let bytes = v.as_bytes(); + if bytes[0] == b'1' && bytes[1] == b'7' { + return 17; + } + if bytes[0] == b'1' && bytes[1] == b'8' { + return 18; + } + if bytes[0] == b'1' && bytes[1] == b'9' { + return 19; + } + if bytes[0] == b'2' && bytes[1] == b'0' { + return 20; + } + } + } + 16 +} + +/// use canary +const fn use_canary() -> bool { + cfg!(feature = "e9_mode") +} + +/// Get file path in output dir +pub fn output_file_path(file: &str) -> std::path::PathBuf { + let path = std::path::PathBuf::from(OUTPUT_DIR); + path.join(file) +} + +/// Crate the direcotry if it does not exist +pub fn create_dir_in_output_if_not_exist(dir_name: &str) -> eyre::Result<()> { + let dir = output_file_path(dir_name); + if !dir.exists() { + std::fs::create_dir(&dir)?; + } + Ok(()) +} + +/// Get constraint path in output dir +pub fn constraint_file_path() -> std::path::PathBuf { + output_file_path(CONSTRAINT_CONFIG) +} + +/// Get path in tmp directory +/// fuzzer&harness is always run at `output`'s directory in shell +pub fn tmp_file_path(file: &str) -> std::path::PathBuf { + let mut path = std::path::PathBuf::from(crate::config::OUTPUT_DIR); + path.push(crate::config::TMP_DIR); + path.push(file); + path +} + +/// Set AOI_SENSITIVE_CALL accoring to environment variables. +pub static API_INSENSITIVE_COV: &str = "HOPPER_API_INSENSITIVE_COV"; +pub fn get_api_sensitive_cov() -> bool { + pub static API_SENSITIVE_COV: OnceCell = OnceCell::new(); + *API_SENSITIVE_COV + .get_or_init(|| !matches!(std::env::var("HOPPER_API_INSENSITIVE_COV"), Ok(..))) +} + +/// Enable generate failed target or not after pilot +pub fn enable_gen_fail() -> bool { + pub static ENABLE_GEN_FAIL: OnceCell = OnceCell::new(); + *ENABLE_GEN_FAIL.get_or_init(|| !matches!(std::env::var("DISABLE_GEN_FAIL"), Ok(..))) +} + +/// Get fast execute loop number +pub static FAST_EXECUTE_LOOP: &str = "HOPPER_FAST_EXECUTE_LOOP"; +pub fn get_fast_execute_loop() -> usize { + pub static ENABLE_FAST: OnceCell = OnceCell::new(); + *ENABLE_FAST.get_or_init(|| std::env::var(FAST_EXECUTE_LOOP).map_or(10, |s| s.parse().unwrap())) +} diff --git a/hopper-core/src/depot/io.rs b/hopper-core/src/depot/io.rs new file mode 100644 index 0000000..c22e775 --- /dev/null +++ b/hopper-core/src/depot/io.rs @@ -0,0 +1,145 @@ +use std::fmt::Write as _; +use std::{ + fs, + io::prelude::*, + path::PathBuf, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use crate::{config, execute::StatusType, read_program, FuzzProgram, Serialize}; + +/// Directory of depot, stored all input files. +pub struct DepotDir { + path: PathBuf, + size: AtomicUsize, +} + +impl DepotDir { + /// Create new directory of depot + pub fn new(path: PathBuf) -> eyre::Result { + if !path.exists() { + fs::create_dir(&path)?; + } + Ok(Self { + path, + size: AtomicUsize::new(0), + }) + } + + /// Return size of files in this direcotry + pub fn size(&self) -> usize { + self.size.load(Ordering::Relaxed) + } + + /// Get file name of n-th input, where n is ID of the input. + fn file_name(&self, id: usize) -> PathBuf { + let file_name = format!("id_{id:06}"); + self.path.join(file_name) + } + + /// count ID + pub fn inc_id(&self) -> usize { + self.size.fetch_add(1, Ordering::Relaxed) + } + + /// Save program into depot directory + pub fn save_program(&self, program: &FuzzProgram, status: StatusType) -> eyre::Result<()> { + let mut buf = program.serialize_all()?; + if let StatusType::Crash { signal } = status { + let _ = writeln!(buf, " {}", &signal.serialize()?); + } + self.save_file(program.id, buf.as_bytes())?; + Ok(()) + } + + pub fn add_appendix(&self, id: usize, appendix: &str) -> eyre::Result<()> { + let file_name = self.file_name(id); + let mut f = fs::OpenOptions::new().append(true).open(file_name)?; + f.write_all(appendix.as_bytes())?; + Ok(()) + } + + /// Save program with custom file_name and appendix + pub fn save_program_custom( + &self, + file_name: &str, + program: &FuzzProgram, + status: StatusType, + appendix: Option, + ) -> eyre::Result<()> { + let mut buf = program.serialize_all()?; + if let StatusType::Crash { signal } = status { + let _ = writeln!(buf, " {}", &signal.serialize()?); + } + if let Some(ap) = appendix { + buf.push_str(&ap); + } + let path = self.path.join(file_name); + let mut f = fs::File::create(path)?; + f.write_all(buf.as_bytes())?; + f.flush()?; + Ok(()) + } + + /// Save file into depot directory + fn save_file(&self, id: usize, buf: &[u8]) -> eyre::Result<()> { + let file_name = self.file_name(id); + crate::log!(debug, "save program at file: {:?}", &file_name); + if !file_name.exists() { + let mut f = fs::File::create(file_name.as_path())?; + f.write_all(buf)?; + f.flush()?; + } + Ok(()) + } + + /// List all the files in directory + pub fn read_dir(&self) -> eyre::Result> { + let mut files = vec![]; + let mut entries: Vec = + self.path.read_dir()?.collect::, _>>()?; + entries.sort_by_key(|p| p.metadata().unwrap().created().unwrap()); + for entry in entries { + let path = entry.path(); + if path.is_file() { + files.push(path); + } + } + Ok(files) + } + + /// Update size of files in depot's directory (in memory), used for rerun the fuzzer. + pub fn update_size(&self) -> eyre::Result<()> { + let size = self.path.read_dir()?.count(); + crate::log!(info, "{:?} has {} files", self.path, size); + self.size.store(size, Ordering::Relaxed); + Ok(()) + } +} + +/// Initilize depot's directories +pub fn init_depot_dirs() -> eyre::Result<(DepotDir, DepotDir, DepotDir)> { + crate::log!(info, "init depot dir.."); + let out_dir = PathBuf::from(config::OUTPUT_DIR); + let inputs_dir = out_dir.join(config::INPUTS_DIR); + let hangs_dir = out_dir.join(config::HANGS_DIR); + let crashes_dir = out_dir.join(config::CRASHES_DIR); + config::create_dir_in_output_if_not_exist(config::MISC_DIR)?; + config::create_dir_in_output_if_not_exist(config::TMP_DIR)?; + config::create_dir_in_output_if_not_exist(config::REVIEW_DIR)?; + Ok(( + DepotDir::new(inputs_dir)?, + DepotDir::new(hangs_dir)?, + DepotDir::new(crashes_dir)?, + )) +} + +/// Read program from queue +pub fn read_input_in_queue(id: usize) -> eyre::Result { + let out_dir = PathBuf::from(config::OUTPUT_DIR); + let file_name = format!("id_{id:06}"); + let f = out_dir.join(config::INPUTS_DIR).join(file_name); + let buf = std::fs::read_to_string(f)?; + let program = read_program(&buf, false)?; + Ok(program) +} diff --git a/hopper-core/src/depot/mod.rs b/hopper-core/src/depot/mod.rs new file mode 100644 index 0000000..fb38cab --- /dev/null +++ b/hopper-core/src/depot/mod.rs @@ -0,0 +1,117 @@ +mod io; +mod priority; +mod select; + +use std::{collections::BinaryHeap, fmt}; + +use self::select::*; +use crate::{execute::StatusType, FuzzProgram, FeedbackSummary}; +pub use io::*; +use priority::PriorityWrap; +pub use priority::*; + +/// Depot for saving all inputs, hangs, and crashes. +pub struct Depot { + pub queue: BinaryHeap>, + pub inputs: DepotDir, + pub hangs: DepotDir, + pub crashes: DepotDir, + pub selector: Box, +} + +impl Depot { + /// Create new depot. + pub fn new() -> eyre::Result { + let (inputs, hangs, crashes) = io::init_depot_dirs()?; + Ok(Self { + queue: BinaryHeap::new(), + inputs, + hangs, + crashes, + selector: init_selector(&crate::config::get_config().select), + }) + } + + /// Fetch new ID + pub fn fetch_id(&mut self, status: StatusType) -> usize { + match status { + StatusType::Normal { .. } => self.inputs.inc_id(), + StatusType::Timeout => self.hangs.inc_id(), + StatusType::Crash { .. } => self.crashes.inc_id(), + _ => 0, + } + } + /// Save new interesting input into depot. + pub fn save( + &mut self, + status: StatusType, + program: &FuzzProgram, + sync: bool, + ) -> eyre::Result<()> { + if sync { + return Ok(()); + } + match status { + StatusType::Normal { .. } => self.inputs.save_program(program, status), + StatusType::Timeout => self.hangs.save_program(program, status), + StatusType::Crash { .. } => self.crashes.save_program(program, status), + _ => { + eyre::bail!("unknown status type"); + } + } + } + + pub fn add_appendix( + &mut self, + status: StatusType, + id: usize, + appendix: &str, + ) -> eyre::Result<()> { + match status { + StatusType::Normal { .. } => self.inputs.add_appendix(id, appendix), + StatusType::Timeout => self.hangs.add_appendix(id, appendix), + StatusType::Crash { .. } => self.crashes.add_appendix(id, appendix), + _ => { + eyre::bail!("unknown status type"); + } + } + } + + /// put program in the queue, + pub fn push_queue(&mut self, program: FuzzProgram, feedback: &FeedbackSummary) -> eyre::Result<()> { + let id = program.id; + let score = self.selector.init_score(&program, feedback); + self.queue.push(PriorityWrap::new(program, score)); + crate::log!( + debug, + "put new program on queue, id: {id}, priority score: {score:?}" + ); + if self.queue.len() > crate::config::MAX_QUEUE_SIZE { + // To make the memory usage to be low, + // we find the seeds with small IDs, and kick out them + let archived = id - self.queue.len() + 5; + self.queue.retain(|item| item.data.id > archived); + } + Ok(()) + } + + /// Get program in queue by id + pub fn get_program_by_id(&self, id: usize) -> Option<&FuzzProgram> { + self.queue + .iter() + .find(|p| p.data.id == id) + .map(|qw| &qw.data) + } +} + +impl fmt::Display for Depot { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "#queue: {}, #crashes: {}, #hangs: {}", + self.inputs.size(), + self.crashes.size(), + self.hangs.size(), + ) + } +} diff --git a/hopper-core/src/depot/priority.rs b/hopper-core/src/depot/priority.rs new file mode 100644 index 0000000..a940444 --- /dev/null +++ b/hopper-core/src/depot/priority.rs @@ -0,0 +1,72 @@ +use std::{ + cmp::Ordering, + fmt::{self, Display}, +}; + +#[derive(Debug)] +pub struct PriorityWrap { + pub data: T, + pub score: (u64, f64), +} + +impl PriorityWrap { + pub fn new(data: T, score: (u64, f64)) -> Self { + Self { data, score } + } +} + +impl PartialEq for PriorityWrap { + fn eq(&self, other: &PriorityWrap) -> bool { + self.score.0 == other.score.0 && self.score.1 == other.score.1 + } +} + +impl Eq for PriorityWrap {} + +// Make the queue get largestscore first. +impl Ord for PriorityWrap { + fn cmp(&self, other: &PriorityWrap) -> Ordering { + // score.0 is more important than score.1 + // only if score.0 is useless, then we use score.1 + if self.score.0 == 0 && other.score.0 == 0 { + // use score.1 + match self.score.1.partial_cmp(&other.score.1) { + Some(o) => match o { + Ordering::Greater => Ordering::Greater, + Ordering::Less => Ordering::Less, + Ordering::Equal => Ordering::Equal, + }, + None => { + panic!("The priority cannot be NaN!"); + } + } + } else { + match self.score.0.partial_cmp(&other.score.0) { + Some(o) => match o { + Ordering::Greater => Ordering::Greater, + Ordering::Less => Ordering::Less, + Ordering::Equal => Ordering::Equal + }, + None => { + panic!("The priority cannot be NaN!"); + } + } + } + } +} + +impl PartialOrd for PriorityWrap { + fn partial_cmp(&self, other: &PriorityWrap) -> Option { + Some(self.cmp(other)) + } +} + +impl fmt::Display for PriorityWrap { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "data: {}, priority: ({}, {})", + self.data, self.score.0, self.score.1 + ) + } +} diff --git a/hopper-core/src/depot/select.rs b/hopper-core/src/depot/select.rs new file mode 100644 index 0000000..760f1b5 --- /dev/null +++ b/hopper-core/src/depot/select.rs @@ -0,0 +1,144 @@ +use std::cell::Cell; + +use crate::{FuzzProgram, SelectType, FeedbackSummary}; + +use super::Depot; + + + +/// Select seed input form depot +pub trait Selector { + fn init_score(&self, program: &FuzzProgram, feedback: &FeedbackSummary) -> (u64, f64); + fn next_score(&self, score: (u64, f64)) -> (u64, f64); +} + +/// Round Robin +pub struct RrSelector; + +/// Simulated annealing +pub struct SaSelector; + +const RR_BASE: u64 = 100000; +const RR_STEP: u64 = 1; + +impl Selector for RrSelector { + fn init_score(&self, program: &FuzzProgram, _feedback: &FeedbackSummary) -> (u64, f64) { + let bonus = get_bonus(program); + (RR_BASE + bonus, 0_f64) + } + + fn next_score(&self, score: (u64, f64)) -> (u64, f64) { + if score.0 == 0 { + return score; + } + (score.0 - RR_STEP, score.1) + } +} + +// base score +const SA_BASE: f64 = 60_f64; +// 20**(-N/500) +const SA_COE: f64 = 0.9_f64; +// for fresh seeds +const SA_UNIQ_NEW: u64 = 3; +// bonus for key function +const KEY_BONUS: u64 = 2; + +thread_local! { + static AVG_SCORE: Cell<(f64, u64)> = Cell::new((0_f64, 0_u64)); +} + +impl Selector for SaSelector { + fn init_score(&self, program: &FuzzProgram, feedback: &FeedbackSummary) -> (u64, f64) { + let edge_num = feedback.path_len as u128; + let t_used = feedback.time_used as f64; + let call_num = program.stmts.iter().filter(|s| s.stmt.is_call()).count() as u128; + assert!(call_num > 0, "The number of call statements cannot be zero!"); + let r = (edge_num / call_num) as f64 / t_used; + // make the range of `r` to be [0, 5]; + let avg = AVG_SCORE.with(|c| { + let (mut avg, mut num) = c.get(); + let sum = avg.mul_add(num as f64, r); + num += 1; + avg = sum / (num as f64); + c.replace((avg, num)); + avg + }); + let mut coef = r / avg; + if coef > 5_f64 { + coef = 5_f64; + } + let score = SA_BASE * (1_f64 + coef); + let mut bonus = get_bonus(program); + crate::log!( + debug, + "#edge: {edge_num}, #t: {t_used}, #call: {call_num}, score: {score}" + ); + if feedback.has_new_uniq_path { + bonus += SA_UNIQ_NEW; + } + (bonus, score) + } + + fn next_score(&self, score: (u64, f64)) -> (u64, f64) { + if score.0 > 0 { + (score.0 - 1, score.1) + } else { + (0, score.1 * SA_COE) + } + } +} + +impl Depot { + pub fn select_seed(&mut self) -> Option { + if let Some(mut entry) = self.queue.peek_mut() { + crate::log!(debug, "select program {} as seed, score: {:?}", entry.data.id, entry.score); + entry.score = self.selector.next_score(entry.score); + return Some(entry.data.clone()); + } + None + } +} + +pub fn init_selector(ty: &SelectType) -> Box { + match ty { + SelectType::Rr => Box::new(RrSelector), + SelectType::Sa => Box::new(SaSelector), + } +} + +/// Bonus for specific programs +fn get_bonus(program: &FuzzProgram) -> u64 { + if let Some(call) = program.get_target_stmt() { + // bonus for key function + if crate::config::get_config().func_key.contains(&call.name) { + return KEY_BONUS; + } + } + 0 +} + +#[test] +fn test_priority_in_queue() { + use super::PriorityWrap; + use std::collections::BinaryHeap; + let selector = RrSelector; + let mut heap = BinaryHeap::new(); + heap.push(PriorityWrap::new(1, (200, 100_f64))); + heap.push(PriorityWrap::new(2, (200, 100_f64))); + for _ in 0..10 { + let v1 = { + let mut entry = heap.peek_mut().unwrap(); + println!("v1: {}", *entry); + entry.score = selector.next_score(entry.score); + entry.data + }; + let v2 = { + let mut entry = heap.peek_mut().unwrap(); + println!("v2: {}", *entry); + entry.score = selector.next_score(entry.score); + entry.data + }; + assert_ne!(v1, v2); + } +} diff --git a/hopper-core/src/error.rs b/hopper-core/src/error.rs new file mode 100644 index 0000000..294998a --- /dev/null +++ b/hopper-core/src/error.rs @@ -0,0 +1,73 @@ +use std::any::Any; + +use crate::execute::{Pid, Signal}; + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum HopperError { + #[error("fail to fork, errno `{0}`")] + ForkError(String), + + #[error("null function pointer")] + NullFuncionPointer, + + #[error("OS Error: {errno}, caused by: {info}")] + OSError { errno: std::io::Error, info: String }, + + #[error("Catch unwind panic")] + UnwindPanic(Box), + + #[error("Crash in child process, pid: `{pid}`, signal: `{signal}`")] + ProcessCrash { pid: Pid, signal: Signal }, + + #[error("Timeout in child process, pid: `{pid}`")] + ProcessTimeout { pid: Pid }, + + #[error("Panic at rust runtime")] + RuntimeError, + + #[error("Assert error: `{msg}`")] + AssertError{ msg: String, silent: bool }, + + #[error("Use resource `{ptr:?}` after free")] + UseAfterFree { ptr: *mut u8 }, + + #[error("Free resource `{ptr:?}` more than once")] + DoubleFree { ptr: *mut u8 }, + + #[error("Test success")] + TestSuccess, + + #[error("Fail to spawn thread")] + SpawnThreadPanic(Box), + + #[error("Timeout in spawn thread")] + SpawnTimeout, + + #[error("union field not found")] + UnionErr, + + #[error("`{0}`")] + FieldNotFound(String), + + #[error("Index does not exist in sequence")] + IndexNotExist, + + #[error("Fail to read line: EOF")] + ReadLineEOF, +} + +unsafe impl Sync for HopperError {} +unsafe impl Send for HopperError {} + +/// Get error number (errno) if return value less than zero, used for libc function calls. +pub fn check_os_error(num: T, msg: &str) -> Result<(), HopperError> { + if num < T::default() { + return Err(HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: msg.to_string(), + }); + } + Ok(()) +} diff --git a/hopper-core/src/execute/executor.rs b/hopper-core/src/execute/executor.rs new file mode 100644 index 0000000..4b1bb0e --- /dev/null +++ b/hopper-core/src/execute/executor.rs @@ -0,0 +1,299 @@ +//! Wrapper for executing program +//! - wrap executing by fork, spwan .. +//! - handle errors +use std::{ + panic, + sync::atomic::{compiler_fence, Ordering}, + time::Duration, +}; + +use super::StatusType; +use crate::{config, error::HopperError}; + +/// Executor for programs generaetd by Hopper +pub struct Executor { + cnt: usize, + timeout: Duration, +} + +impl Default for Executor { + fn default() -> Self { + Self { + cnt: 0, + timeout: Duration::from_secs(1), + } + } +} + +impl Executor { + /// Set timeout + pub fn set_timeout(&mut self, tmout: Duration) { + self.timeout = tmout; + } + + /// Return count + pub fn count(&self) -> usize { + self.cnt + } + + /// Execute program and return status + pub fn execute(&mut self, fun: F) -> StatusType + where + F: FnOnce() -> eyre::Result, + { + self.cnt += 1; + // let _counter = self.usage.count(); + compiler_fence(Ordering::SeqCst); + let res = self.fork_execute(fun); + compiler_fence(Ordering::SeqCst); + if let Err(err) = res { + crate::log!(error, "{}-execute error: {:?}", self.cnt, err); + match err { + HopperError::ProcessCrash { pid: _, signal } => StatusType::Crash { signal }, + HopperError::ProcessTimeout { pid: _ } => StatusType::Timeout, + _ => StatusType::Ignore, + } + } else { + StatusType::default() + } + } + + /// Fork a new process and execute program + /// + /// the process can catch timeout and out of limited memory + #[cfg(target_family = "unix")] + fn fork_execute(&self, fun: F) -> Result<(), HopperError> + where + F: FnOnce() -> eyre::Result, + { + use nix::sys::{ + signal, + wait::{waitpid, WaitStatus}, + }; + + let start_at = std::time::Instant::now(); + match unsafe { nix::unistd::fork() } { + Ok(nix::unistd::ForkResult::Parent { child, .. }) => { + #[cfg(not(feature = "select_timeout"))] + let res = { + let (sender, receiver) = std::sync::mpsc::channel(); + let _ = std::thread::Builder::new().spawn(move || { + let res = waitpid(child, None).unwrap(); + let _ = sender.send(res); + }); + receiver.recv_timeout(self.timeout) + }; + #[cfg(feature = "select_timeout")] + let res = { + unsafe { + let _ = signal::signal(signal::SIGCHLD, signal::SigHandler::SigDfl); + } + let mut timeout_timeval = nix::sys::time::TimeVal::from(nix::libc::timeval { + tv_sec: self.timeout.as_secs() as i64, + tv_usec: 0, + }); + let sres = nix::sys::select::select(0, None, None, None, &mut timeout_timeval); + match sres { + Ok(_) => Err(()), + Err(_) => Ok(waitpid(child, None).unwrap()), + } + }; + match res { + Ok(status) => { + match status { + WaitStatus::Signaled(pid, signal, _) => { + Err(HopperError::ProcessCrash { pid, signal }) + } + WaitStatus::Exited(pid, code) => { + // crate::log!(debug, "exited with code: {}", code); + // TODO: ASAN, MSAN + match code { + config::ASSERT_ERROR_EXIT_CODE => { + crate::log!(error, "assert error"); + Err(HopperError::ProcessCrash { + pid, + signal: signal::Signal::SIGABRT, + }) + } + config::DOUBLE_FREE_ERROR_EXIT_CODE => { + crate::log!(error, "double free happened"); + Err(HopperError::ProcessCrash { + pid, + signal: signal::Signal::SIGABRT, + }) + } + config::EXEC_ERROR_EXIT_CODE => { + crate::log!(error, "The program panic at rust side!"); + Err(HopperError::RuntimeError) + } + config::TIMEOUT_CODE => { + Err(HopperError::ProcessTimeout { pid: child }) + } + _ => Ok(()), + } + } + _ => Ok(()), + } + } + Err(_) => { + if signal::kill(child, signal::Signal::SIGKILL).is_err() { + crate::log!(error, "fail to kill child {}", child); + } + // if a wait is not performed, then the terminated child remains in a "zombie" state + // ATTN: the result can't be unwrap, if pid is nonexistent (it was finish), + // `waitpid` will return `Err(ECHILD)` + let _ = waitpid(child, None); + Err(HopperError::ProcessTimeout { pid: child }) + } + } + } + Ok(nix::unistd::ForkResult::Child) => { + crate::log!( + trace, + "fork time: {} micro seconds", + start_at.elapsed().as_micros() + ); + let ret = Self::execute_fn(fun); + // return special signal if meet some error + if let Err(e) = ret { + if let Some(he) = e.downcast_ref::() { + std::process::exit(error_to_exit_code(he)); + } + } + std::process::exit(0); + } + Err(err) => Err(HopperError::ForkError(err.to_string())), + } + } + + #[cfg(target_os = "windows")] + fn fork_execute(&self, fun: F) -> Result<(), HopperError> + where + F: FnOnce() -> eyre::Result, + { + let mut pi: crate::execute::ProcessInformation = crate::execute::ProcessInformation { + hProcess: crate::execute::NULL, + hThread: crate::execute::NULL, + dwProcessId: 0, + dwThreadId: 0, + }; + match crate::execute::hopper_fork(&mut pi) { + Ok(crate::execute::WinForkResult::Parent { child, .. }) => { + crate::log!( + trace, + "fork child pid: {}, hProcess: {:?}, hThread: {:?}, dwProcessId: {}, dwThreadId: {}", + child, + pi.hProcess, + pi.hThread, + pi.dwProcessId, + pi.dwThreadId + ); + match crate::execute::hopper_waitpid(&mut pi, config::WAIT_PID_TIMEOUT) { + Ok(status) => match status { + crate::execute::WinWaitStatus::Exited(code) => { + if code == config::EXEC_ERROR_EXIT_CODE as u32 { + crate::log!(error, "The program panic at rust side!"); + } + crate::execute::close_child(pi); + Ok(()) + } + crate::execute::WinWaitStatus::Crash(code) => { + crate::execute::close_child(pi); + Err(HopperError::ProcessCrash { + pid: child, + signal: code, + }) + } + crate::execute::WinWaitStatus::Timeout(_code) => { + crate::execute::terminate_close_child(pi); + Err(HopperError::ProcessTimeout { pid: child }) + } + }, + Err(_) => { + crate::execute::terminate_close_child(pi); + Err(HopperError::ProcessTimeout { pid: child }) + } + } + } + Ok(crate::execute::WinForkResult::Child) => { + crate::execute::register_execption_handler(); + crate::execute::register_signal_handler(); + let ret = Self::execute_fn(fun); + // return special signal if meet some error + if ret.is_err() { + if let Some(he) = ret.err().unwrap().downcast_ref::() { + std::process::exit(error_to_exit_code(he)); + }; + } else { + std::process::exit(0); + } + Ok(()) + } + Err(_) => Err(HopperError::ForkError( + std::io::Error::last_os_error().to_string(), + )), + } + } + + /// Spawn and execute program + /// + /// FIXME: It can't catch crash in FFI code + #[cfg(feature = "spawn_execute")] + fn spawn_execute(&self, fun: F) -> eyre::Result + where + F: FnOnce() -> crate::Result + std::marker::Send, + T: Send, + { + let res = crossbeam_utils::thread::scope(|s| { + let (sender, receiver) = mpsc::channel(); + let handle = s.spawn(|_| { + let res = Self::execute_fn(fun); + let _ = sender.send(res); + res + }); + let res = receiver.recv_timeout(self.timeout).map_or( + Err(HopperError::SpawnTimeout), + |status| match status { + _ => Ok(()), + }, + ); + // kill thread if timeout + match handle.join() { + Ok(_) => res, + Err(err) => Err(HopperError::SpawnThreadPanic(err)), + } + }); + + res.unwrap() + } + + /// Execute programs generated by hopper directly + /// + /// It will ignore and print errors we defined in `eval`, and + /// catch panics that is not caused by the foreighn functions in library. + pub fn execute_fn(fun: F) -> eyre::Result + where + F: FnOnce() -> eyre::Result, + { + match panic::catch_unwind(panic::AssertUnwindSafe(fun)) { + Ok(ret) => ret, + Err(err) => Err(eyre::eyre!(HopperError::UnwindPanic(err))), + } + } +} + +fn error_to_exit_code(err: &HopperError) -> i32 { + match err { + HopperError::DoubleFree { .. } => config::DOUBLE_FREE_ERROR_EXIT_CODE, + HopperError::AssertError { msg: _, silent } => { + if *silent { + config::ASSERT_SILENT_EXIT_CODE + } else { + config::ASSERT_ERROR_EXIT_CODE + } + } + // ignore + HopperError::UseAfterFree { .. } => config::UAF_ERROR_EXIT_CODE, + _ => config::EXEC_ERROR_EXIT_CODE, + } +} diff --git a/hopper-core/src/execute/forkcli.rs b/hopper-core/src/execute/forkcli.rs new file mode 100644 index 0000000..cb8bd92 --- /dev/null +++ b/hopper-core/src/execute/forkcli.rs @@ -0,0 +1,277 @@ +use std::{ + collections::HashMap, + io::{prelude::*, BufReader, BufWriter}, + path::PathBuf, + process::{Command, Stdio}, + sync::atomic::{compiler_fence, Ordering}, +}; + +use super::{limit::SetLimit, *}; +use crate::{config, feedback::Feedback, FuzzProgram, Serialize, TimeUsage}; +use eyre::Context; + +pub struct ForkCli { + socket_path: PathBuf, + reader: BufReader, + writer: BufWriter, + fast_io: Option<(BufReader, BufWriter)>, + pub history: Vec, + pub usage: TimeUsage, +} + +impl ForkCli { + pub fn new(feedback: &Feedback) -> eyre::Result { + let config = config::get_config(); + let harness = PathBuf::from(&config::OUTPUT_DIR) + .join("bin") + .join("hopper-harness"); + let socket_path = socket_path(); + crate::log!(info, "path: {:?}", socket_path); + let listener = UnixListener::bind(&socket_path).unwrap(); + let mut envs = HashMap::new(); + if let Ok(log_type) = std::env::var("RUST_LOG") { + if log_type == "trace" { + // avoid RUST_LOG to be trace + envs.insert("RUST_LOG", "debug".to_string()); + } + } + if std::env::var("ENABLE_HARNESS_TRACE_LOG").is_ok() { + envs.insert("RUST_LOG", "trace".to_string()); + } + envs.insert(config::PATH_SHMID_VAR, feedback.path.get_env_var()); + envs.insert(config::INSTR_SHMID_VAR, feedback.instrs.get_env_var()); + envs.insert(config::TIMEOUT_LIMIT_VAR, config.timeout_limit.to_string()); + envs.insert( + config::FORK_SOCKET_PATH, + socket_path.to_string_lossy().to_string(), + ); + if let Ok(context) = std::env::var(config::API_INSENSITIVE_COV) { + envs.insert(config::API_INSENSITIVE_COV, context); + } + crate::log!(info, "Run harness: {:?}, envs: {:?}", &harness, envs); + + config::create_dir_in_output_if_not_exist(config::HARNESS_WORK_DIR)?; + let tmout = config.timeout_limit + 5; + Command::new(&harness) + .arg("--server") + .envs(&envs) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .current_dir(config::output_file_path(config::HARNESS_WORK_DIR)) + .mem_limit(config.mem_limit) + .core_limit() + .setsid() + .spawn() + .context("fail to spwan fork server in fuzzer")?; + // May block here if the client doesn't exist. + let (socket, _) = listener.accept()?; + socket.set_read_timeout(Some(std::time::Duration::from_secs(tmout)))?; + socket.set_write_timeout(Some(std::time::Duration::from_secs(tmout)))?; + crate::log!(info, "fork server is initialized successfully !"); + // for fast + let mut fast_io = None; + let num_fast_loop = config::get_fast_execute_loop(); + if num_fast_loop > 1 { + envs.insert(config::FAST_EXECUTE_LOOP, num_fast_loop.to_string()); + Command::new(&harness) + .arg("--server") + .arg("--fast") + .envs(&envs) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .current_dir(config::output_file_path(config::HARNESS_WORK_DIR)) + .mem_limit(config.mem_limit) + .core_limit() + .setsid() + .spawn() + .context("fail to spwan fork server in fuzzer")?; + // May block here if the client doesn't exist. + let (fast_socket, _) = listener.accept()?; + fast_socket.set_read_timeout(Some(std::time::Duration::from_secs(tmout)))?; + fast_socket.set_write_timeout(Some(std::time::Duration::from_secs(tmout)))?; + fast_io = Some(( + BufReader::new(fast_socket.try_clone()?), + BufWriter::new(fast_socket), + )); + crate::log!(info, "fast fork server is initialized successfully !"); + } + let cli = Self { + socket_path, + reader: BufReader::new(socket.try_clone()?), + writer: BufWriter::new(socket), + fast_io, + history: vec![], + usage: TimeUsage::default(), + }; + + Ok(cli) + } + + pub fn execute_program_fast(&mut self, program: &FuzzProgram) -> eyre::Result { + if self.history.len() >= crate::config::get_fast_execute_loop() { + self.history.clear(); + } + if let Some((reader, writer)) = &mut self.fast_io { + let t = std::time::Instant::now(); + crate::log!(trace, "start execute program (fast).."); + compiler_fence(Ordering::SeqCst); + writeln!(writer, "{}", ForkCmd::Loop.serialize()?) + .context("fail to send cmd (fast)")?; + writer + .write_all(program.serialize()?.as_bytes()) + .context("fail to send program (fast)")?; + writer.flush().context("fail to flush send (fast)")?; + let mut status: StatusType = io_utils::receive_line(reader).with_context(|| { + format!( + "program: {program}\n history: {}", + self.history.serialize().unwrap() + ) + })?; + compiler_fence(Ordering::SeqCst); + crate::log!(trace, "receive status {:?} from fork server (fast)", status); + self.usage.add_time(&t); + if status.is_loop_end() { + status = io_utils::receive_line(reader).with_context(|| { + format!( + "program: {program}\n history: {}", + self.history.serialize().unwrap() + ) + })?; + // wait for outer ping for finish process + let _: StatusType = + io_utils::receive_line(reader).context("stop process status")?; + self.history.clear(); + } + if status.is_normal() { + self.history.push(program.clone()); + } else { + self.history.clear(); + } + Ok(status) + } else { + self.execute_program(program) + } + } + + pub fn execute_program(&mut self, program: &FuzzProgram) -> eyre::Result { + let t = std::time::Instant::now(); + crate::log!(trace, "start execute program.."); + compiler_fence(Ordering::SeqCst); + self.send_cmd(ForkCmd::Execute)?; + self.send_program(program)?; + let status = self + .receive_status() + .with_context(|| format!("program: {program}"))?; + compiler_fence(Ordering::SeqCst); + crate::log!(trace, "receive status {:?} from fork server", status); + self.usage.add_time(&t); + Ok(status) + } + + pub fn review_program(&mut self, program: &FuzzProgram) -> eyre::Result { + let t = std::time::Instant::now(); + crate::log!(trace, "start review program.."); + compiler_fence(Ordering::SeqCst); + self.send_cmd(ForkCmd::Review)?; + self.send_program(program)?; + let status = self.receive_status()?; + compiler_fence(Ordering::SeqCst); + crate::log!(trace, "review status {:?} from fork server", status); + self.usage.add_time(&t); + Ok(status) + } + + pub fn sanitize_program(&mut self, program: &FuzzProgram) -> eyre::Result { + let t = std::time::Instant::now(); + crate::log!(trace, "start sanitizing program.."); + compiler_fence(Ordering::SeqCst); + self.send_cmd(ForkCmd::Sanitize)?; + self.send_program(program)?; + let status = self.receive_status()?; + compiler_fence(Ordering::SeqCst); + crate::log!(trace, "sanitize status {:?} from fork server", status); + self.usage.add_time(&t); + Ok(status) + } + + pub fn set_config(&mut self, key: &str, value: &str) -> eyre::Result<()> { + self.send_cmd(ForkCmd::Config(format!("{key}={value}")))?; + self.writer.flush().context("fail to flush set config")?; + let _status: StatusType = self.receive_status()?; + if let Some((reader, writer)) = &mut self.fast_io { + writeln!( + writer, + "{}", + ForkCmd::Config("nop".to_string()).serialize()? + ) + .context("fail to send nop cmd")?; + writer.flush().context("fail to flush set config (fast)")?; + let _: StatusType = io_utils::receive_line(reader).context("receive config ping")?; + writeln!( + writer, + "{}", + ForkCmd::Config(format!("{key}={value}")).serialize()? + ) + .context("fail to send nop cmd")?; + writer.flush().context("fail to flush set config (fast)")?; + let _: StatusType = io_utils::receive_line(reader).context("receive config ping")?; + self.history.clear(); + } + Ok(()) + } + + pub fn sync_all_configs(&mut self) -> eyre::Result<()> { + let opaque_list: Vec<&str> = crate::global_gadgets::get_instance() + .opaque_types + .iter() + .map(|t| t.as_str()) + .collect(); + let opaque_list = opaque_list.join(","); + crate::log!(info, "sync opaque config: {opaque_list:?}"); + self.set_config(OPAQUE_CONFIG_KEY, &opaque_list)?; + Ok(()) + } + + fn send_cmd(&mut self, cmd: ForkCmd) -> eyre::Result<()> { + writeln!(self.writer, "{}", cmd.serialize()?).context("fail to send cmd")?; + Ok(()) + } + + fn send_program(&mut self, program: &FuzzProgram) -> eyre::Result<()> { + self.writer + .write_all(program.serialize()?.as_bytes()) + .context("fail to send program ")?; + self.writer.flush().context("fail to flush send") + } + + fn receive_status(&mut self) -> eyre::Result { + match io_utils::receive_line(&mut self.reader) { + Ok(val) => Ok(val), + Err(err) => { + eyre::bail!("fail to receive status : {:?}", err); + } + } + } +} + +impl Drop for ForkCli { + fn drop(&mut self) { + write!(self.writer, "{}", ForkCmd::Finish.serialize().unwrap()).unwrap(); + if let Some((_, writer)) = &mut self.fast_io { + write!(writer, "{}", ForkCmd::Finish.serialize().unwrap()).unwrap(); + } + if self.socket_path.exists() && std::fs::remove_file(&self.socket_path).is_err() { + crate::log!(warn, "fail to remove {:?}", self.socket_path); + } + } +} + +fn socket_path() -> PathBuf { + use std::time; + let dir = std::env::temp_dir(); + let since_the_epoch = time::SystemTime::now() + .duration_since(time::UNIX_EPOCH) + .expect("Time went backwards"); + // dir.join("hopper") + dir.join(format!("hopper_socket_{}", since_the_epoch.as_millis())) +} diff --git a/hopper-core/src/execute/forklib_win.rs b/hopper-core/src/execute/forklib_win.rs new file mode 100644 index 0000000..9356e01 --- /dev/null +++ b/hopper-core/src/execute/forklib_win.rs @@ -0,0 +1,1154 @@ +use crate::{config, error, execute::*, feedback::*, runtime::*, TimeUsage}; +use eyre::Context; +use forksrv::ForkSrv; +use ntapi; +use std::io::prelude::*; +use winapi; + +pub static HOPPER_USE_THREAD: &str = "HOPPER_USE_THREAD"; +pub static HOPPER_USE_THREAD_NUM: &str = "HOPPER_USE_THREAD_NUM"; +pub const WAIT_PID_TIMEOUT: u32 = 3000; +pub static HOPPER_ENABLE_CPU_BINDING_VAR: &str = "HOPPER_ENABLE_CPU_BINDING_VAR"; + +extern "system" { + fn CsrClientConnectToServer( + ObjectDirectory: winapi::shared::ntdef::PWSTR, + ServerId: winapi::shared::ntdef::ULONG, + ConnectionInfo: winapi::shared::ntdef::PVOID, + ConnectionInfoSize: winapi::shared::ntdef::ULONG, + ServerToServerCall: winapi::shared::ntdef::PBOOLEAN, + ) -> winapi::shared::ntdef::NTSTATUS; +} + +pub fn check_hopper_use_thread_win() -> bool { + if let Ok(enable) = std::env::var(crate::config::HOPPER_USE_THREAD) { + if enable == "1" { + return true; + } + false + } else { + false + } +} + +pub fn get_hopper_use_thread_num() -> i32 { + if let Ok(num) = std::env::var(crate::config::HOPPER_USE_THREAD_NUM) { + num.parse::().unwrap() + } else { + 100 + } +} + +impl ForkSrv { + pub fn thread_loop_win(&mut self) -> eyre::Result<()> { + let mut executor = super::Executor::new(); + executor.set_timeout(self.timeout_limit); + let mut exec_usage = TimeUsage::default(); + let start_at = std::time::Instant::now(); + disable_coverage_feedback(); + let raw_data_base = format!("{}_RAW_DATA_BASE\x00", crate::config::TASK_NAME); + let id = + crate::execute::hopper_create_file_mapping(0, 0x100000, raw_data_base.as_ptr() as u32) + .unwrap(); + let ptr = match crate::execute::hopper_map_view_of_file_ex( + id, + 0, + 0, + 0, + crate::config::RAW_DATA_PTR as *mut std::os::raw::c_void, + ) { + Ok(ptr) => { + crate::log!( + info, + "{:?} thread_loop shm {:?}", + ptr, + std::io::Error::last_os_error() + ); + ptr + } + Err(_) => { + crate::log!( + error, + "thread_loop shm error {:?}", + std::io::Error::last_os_error() + ); + crate::execute::NULL + } + }; + crate::log!(info, "start thread loop ! shm {:?}", ptr); + let event_str = format!("{}_CHILD_FINISH\x00", crate::config::TASK_NAME); + let event_child_finish = crate::execute::creat_event(event_str); + let event_str = format!("{}_PARENT_READY\x00", crate::config::TASK_NAME); + let event_parent_ready = crate::execute::creat_event(event_str); + let event_str = format!("{}_CHILD_READY\x00", crate::config::TASK_NAME); + let event_child_ready = crate::execute::creat_event(event_str); + let event_str = format!("{}_THREAD_READY\x00", crate::config::TASK_NAME); + let event_thread_ready = crate::execute::creat_event(event_str); + 'outer: loop { + let before_fork = std::time::Instant::now(); + let mut pi: crate::execute::ProcessInformation = crate::execute::ProcessInformation { + hProcess: crate::execute::NULL, + hThread: crate::execute::NULL, + dwProcessId: 0, + dwThreadId: 0, + }; + let pid = crate::execute::fork(&mut pi); + if pid > 0 { + crate::log!(info, "pid {}!,handle {:?}", pid, pi.hProcess); + let wait_res = unsafe { + winapi::um::synchapi::WaitForSingleObject( + event_child_ready, + config::WAIT_PID_TIMEOUT - 1000, + ) + }; + unsafe { + winapi::um::synchapi::ResetEvent(event_child_ready); + } + if wait_res != 0 { + crate::log!(warn, "fork error"); + crate::execute::terminate_close_child(pi); + continue; + } + close_child(pi); + crate::log!( + debug, + "fork ==> duration:{:?}", + before_fork.elapsed().as_micros() + ); + let mut raw_buf = crate::config::RAW_DATA_PTR as *mut crate::execute::RawData; + let mut child_procrss_thread_cnt = 0; + loop { + if child_procrss_thread_cnt > crate::config::get_hopper_use_thread_num() { + unsafe { + (*raw_buf).cmd = 0; + winapi::um::synchapi::SetEvent(event_parent_ready); + winapi::um::synchapi::WaitForSingleObject( + event_child_finish, + config::WAIT_PID_TIMEOUT + 2000, + ); + winapi::um::synchapi::ResetEvent(event_child_finish); + } + break; + } + unsafe { + libc::memset(crate::config::RAW_DATA_PTR as *mut libc::c_void, 0, 0x20000); + (*raw_buf).event_child_finish = event_child_finish; + (*raw_buf).event_thread_ready = event_thread_ready; + } + let cmd: ForkCmd = + io_utils::receive_line(&mut self.reader).context("fail to receive cmd")?; + match cmd { + ForkCmd::Execute => { + child_procrss_thread_cnt += 1; + crate::log!(debug, "receive {}-th program..", executor.count()); + + let buf = self.read_buf()?; + crate::log!(debug, "program: {}", buf); + self.feedback.clear(); + unsafe { + (*raw_buf).program_size = buf.len(); + for (i, &item) in buf.as_bytes().iter().enumerate() { + (*raw_buf).program[i] = item as u8; + } + (*raw_buf).cmd = 1; + + winapi::um::synchapi::SetEvent(event_parent_ready); + } + unsafe { + let wait_res = winapi::um::synchapi::WaitForSingleObject( + event_child_finish, + config::WAIT_PID_TIMEOUT + 2000, + ); + winapi::um::synchapi::ResetEvent(event_child_finish); + if wait_res != 0 { + crate::log!( + error, + "child error, break loop, wait_res: {}", + wait_res + ); + let status = StatusType::Normal; + writeln!(self.writer, "{}", status.serialize()?)?; + break; + } + }; + crate::log!( + debug, + "wait_res {}, thread_cnt {}", + wait_res, + child_procrss_thread_cnt + ); + let exit_code = unsafe { (*raw_buf).exit_code }; + let status = if exit_code == 0 { + StatusType::Normal + } else if exit_code == 259 { + if unsafe { (*raw_buf).not_in_eval } == 0 { + StatusType::Timeout + } else { + StatusType::Normal + } + } else if unsafe { (*raw_buf).not_in_eval } == 0 { + StatusType::Crash { + signal: exit_code as u32, + } + } else { + StatusType::Normal + }; + + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + if exit_code != 0 { + crate::log!( + debug, + "exec_break_loop ==> execcode: {}, not in eval: {}", + exit_code, + unsafe { (*raw_buf).not_in_eval } + ); + break; + } + crate::log!(debug, "exec_normal ==> exitcode: {}", exit_code); + } + ForkCmd::Review => { + crate::log!( + debug, + "receive {}-th program for review..", + executor.count() + ); + let buf = self.read_buf()?; + crate::log!(debug, "program: {}", buf); + self.feedback.clear(); + // make timeout longer + executor.set_timeout(self.timeout_limit * 3); + let status = { + let _counter = exec_usage.count(); + executor.execute(|| { + let mut program = self.read_program(&buf)?; + program.review() + }) + }; + executor.set_timeout(self.timeout_limit); + crate::log!(debug, "review status: {:?}", status); + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + } + ForkCmd::Finish => { + crate::log!(warn, "break server loop"); + let all_secs = start_at.elapsed().as_secs(); + crate::log!( + info, + "Time uasge : exec {} - {} ", + exec_usage.percent(all_secs), + exec_usage.avg_ms() + ); + unsafe { + (*raw_buf).cmd = 0; + winapi::um::synchapi::SetEvent(event_parent_ready); + } + break 'outer; + } + ForkCmd::Sanitize(f) => { + crate::log!( + debug, + "receive {}-th program for sanitize..", + executor.count() + ); + let buf = self.read_buf()?; + self.feedback.clear(); + let status = { + let _counter = exec_usage.count(); + executor.execute(|| { + let mut program = self.read_program(&buf)?; + program.sanitize(f); + }) + }; + let last_stmt = self.feedback.last_stmt_index(); + // SanitizeChecker::check_illegal_free(f, last_stmt)?; + executor.set_timeout(self.timeout_limit); + crate::log!(debug, "sanitize status: {:?}", status); + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + } + } + } + } else { + unsafe { + winapi::um::synchapi::SetEvent(event_child_ready); + } + crate::execute::register_execption_handler_thread(); + crate::execute::register_signal_handler_thread(); + let mut raw_buf = crate::config::RAW_DATA_PTR as *mut crate::execute::RawData; + loop { + unsafe { + let _ = winapi::um::synchapi::WaitForSingleObject( + event_parent_ready, + 0xffffffff, + ); + winapi::um::synchapi::ResetEvent(event_parent_ready); + } + let cmd = unsafe { (*raw_buf).cmd }; + match cmd { + 1 => { + let (t_handle, _thread_id) = unsafe { + (*raw_buf).not_in_eval = 1; + crate::execute::create_thread( + Some(thread_exec), + thread_fun as *mut winapi::ctypes::c_void, + ) + }; + if t_handle == crate::execute::NULL { + let mut child_log = std::fs::OpenOptions::new() + .write(true) + .append(true) + .create(true) + .open("child_log.txt") + .unwrap(); + unsafe { + writeln!( + child_log, + "create thread error: {}", + std::io::Error::last_os_error() + ) + .unwrap(); + (*raw_buf).not_in_eval = 1; + (*raw_buf).exit_code = 0xcafecafe_u64; + winapi::um::synchapi::SetEvent(event_child_finish); + winapi::um::processthreadsapi::ExitProcess(0); + } + } + + unsafe { winapi::um::processthreadsapi::ResumeThread(t_handle) }; + unsafe { + let wait_res = winapi::um::synchapi::WaitForSingleObject( + event_thread_ready, + config::WAIT_PID_TIMEOUT, + ); + winapi::um::synchapi::ResetEvent(event_thread_ready); + if wait_res != 0 { + (*raw_buf).not_in_eval = 1; + (*raw_buf).exit_code = 0xcafecafe_u64; + winapi::um::synchapi::SetEvent(event_child_finish); + crate::execute::do_close_handle(t_handle); + winapi::um::processthreadsapi::ExitProcess(0); + } + } + let wait_res = unsafe { + winapi::um::synchapi::WaitForSingleObject( + t_handle, + config::WAIT_PID_TIMEOUT, + ) + }; + crate::execute::do_close_handle(t_handle); + if wait_res == 0x102 { + unsafe { + (*raw_buf).exit_code = 259; + }; + } + + let exit_code = unsafe { (*raw_buf).exit_code }; + if exit_code == 0 { + unsafe { + winapi::um::synchapi::SetEvent(event_child_finish); + }; + } else { + unsafe { + winapi::um::synchapi::SetEvent(event_child_finish); + winapi::um::processthreadsapi::ExitProcess(0); + } + } + } + _ => unsafe { + winapi::um::synchapi::SetEvent(event_child_finish); + winapi::um::processthreadsapi::ExitProcess(0); + }, + } + } + } + unsafe { + winapi::um::synchapi::ResetEvent(event_child_finish); + winapi::um::synchapi::ResetEvent(event_parent_ready); + winapi::um::synchapi::ResetEvent(event_child_ready); + winapi::um::synchapi::ResetEvent(event_thread_ready); + } + } + Ok(()) + } +} + +pub fn thread_fun() { + let mut _thread_log = std::fs::OpenOptions::new() + .write(true) + .create(true) + .open("thread_log.txt") + .unwrap(); + + let mut raw_buf_thread = crate::config::RAW_DATA_PTR as *mut crate::execute::RawData; + let buf = unsafe { + String::from_raw_parts( + (*raw_buf_thread).program.as_mut_ptr(), + (*raw_buf_thread).program_size, + (*raw_buf_thread).program_size, + ) + }; + let buf = buf.replace("\x00", ""); + let mut program = read_program(&buf, true).unwrap(); + unsafe { winapi::um::synchapi::SetEvent((*raw_buf_thread).event_thread_ready) }; + unsafe { (*raw_buf_thread).not_in_eval = 0 } + let _ = program.eval(); + unsafe { + (*raw_buf_thread).not_in_eval = 1; + (*raw_buf_thread).exit_code = 0; + (*raw_buf_thread).cmd = 0; + let size = crate::canary::MEM_OFFSET.load(std::sync::atomic::Ordering::SeqCst); + let _ = region::protect( + crate::config::CANARY_PTR as *mut std::ffi::c_void, + size, + region::Protection::READ_WRITE, + ); + libc::memset(crate::config::CANARY_PTR as *mut libc::c_void, 0, size); + } + crate::canary::MEM_OFFSET.store(0, std::sync::atomic::Ordering::SeqCst); + unsafe { + winapi::um::processthreadsapi::ExitThread(0); + }; +} + +unsafe extern "system" fn thread_exec(args: winapi::shared::minwindef::LPVOID) -> u32 { + crate::execute::register_execption_handler_thread(); + crate::execute::register_signal_handler_thread(); + let fun = std::intrinsics::transmute::(args); + fun(); + 0 +} + +const E_HANDLE: u32 = 0x80070006; +unsafe extern "system" fn execption_handler( + exception_info: winapi::um::winnt::PEXCEPTION_POINTERS, +) -> i32 { + let rec = &(*(*exception_info).ExceptionRecord); + let code = rec.ExceptionCode; + match code { + //SEGV + winapi::um::minwinbase::EXCEPTION_ACCESS_VIOLATION | + winapi::um::minwinbase::EXCEPTION_ARRAY_BOUNDS_EXCEEDED | + winapi::um::minwinbase::EXCEPTION_STACK_OVERFLOW | + winapi::um::minwinbase::EXCEPTION_DATATYPE_MISALIGNMENT | + winapi::um::minwinbase::EXCEPTION_IN_PAGE_ERROR | + //PPE + winapi::um::minwinbase::EXCEPTION_FLT_DENORMAL_OPERAND | + winapi::um::minwinbase::EXCEPTION_FLT_DIVIDE_BY_ZERO | + winapi::um::minwinbase::EXCEPTION_FLT_INEXACT_RESULT | + winapi::um::minwinbase::EXCEPTION_FLT_INVALID_OPERATION | + winapi::um::minwinbase::EXCEPTION_FLT_OVERFLOW | + winapi::um::minwinbase::EXCEPTION_FLT_STACK_CHECK | + winapi::um::minwinbase::EXCEPTION_FLT_UNDERFLOW | + winapi::um::minwinbase::EXCEPTION_INT_DIVIDE_BY_ZERO | + winapi::um::minwinbase::EXCEPTION_INT_OVERFLOW | + //ILL + winapi::um::minwinbase::EXCEPTION_ILLEGAL_INSTRUCTION | + winapi::um::minwinbase::EXCEPTION_PRIV_INSTRUCTION + => { + do_terminate_process(ntapi::ntpsapi::NtCurrentProcess,code as i32); + }, + E_HANDLE => { + do_terminate_process(ntapi::ntpsapi::NtCurrentProcess,0); + } + _ => return winapi::vc::excpt::EXCEPTION_CONTINUE_SEARCH + }; + 0 +} +pub struct RawData { + pub cmd: i64, + pub exit_code: u64, + pub event_child_finish: *mut winapi::ctypes::c_void, + pub event_thread_ready: *mut winapi::ctypes::c_void, + pub not_in_eval: u64, + pub program_size: usize, + pub program: [u8; 0x10000], +} + +unsafe extern "system" fn execption_handler_thread( + exception_info: winapi::um::winnt::PEXCEPTION_POINTERS, +) -> i32 { + let rec = &(*(*exception_info).ExceptionRecord); + let code = rec.ExceptionCode; + let mut raw_buf_thread = crate::config::RAW_DATA_PTR as *mut RawData; + (*raw_buf_thread).exit_code = code as u64; + winapi::um::synchapi::SetEvent((*raw_buf_thread).event_child_finish); + if code == 541541187_u32 { + ntapi::ntpsapi::NtTerminateProcess(ntapi::ntpsapi::NtCurrentProcess, code as i32); + } + winapi::um::processthreadsapi::ExitProcess(code); + 0 +} + +extern "system" { + pub fn signal(sig: i32, handler: Option ()>); +} + +const SIGINT: i32 = 2; +const SIGILL: i32 = 4; +const SIGFPE: i32 = 8; +const SIGSEGV: i32 = 11; +const SIGTERM: i32 = 15; +const SIGBREAK: i32 = 21; +const SIGABRT: i32 = 22; +unsafe extern "system" fn signal_handler(sig: i32) { + do_terminate_process(ntapi::ntpsapi::NtCurrentProcess, sig as i32); +} + +unsafe extern "system" fn signal_handler_thread(sig: i32) { + let mut raw_buf_thread = crate::config::RAW_DATA_PTR as *mut RawData; + (*raw_buf_thread).exit_code = sig as u64; + winapi::um::synchapi::SetEvent((*raw_buf_thread).event_child_finish); + winapi::um::processthreadsapi::ExitProcess(sig as u32); +} + +pub fn register_execption_handler() -> winapi::shared::ntdef::HANDLE { + unsafe { winapi::um::errhandlingapi::AddVectoredExceptionHandler(1, Some(execption_handler)) } +} + +pub fn register_execption_handler_thread() -> winapi::shared::ntdef::HANDLE { + unsafe { + winapi::um::errhandlingapi::AddVectoredExceptionHandler(1, Some(execption_handler_thread)) + } +} + +pub fn register_signal_handler() { + unsafe { signal(SIGABRT, Some(signal_handler)) }; +} + +pub fn register_signal_handler_thread() { + unsafe { signal(SIGINT, Some(signal_handler_thread)) }; + unsafe { signal(SIGILL, Some(signal_handler_thread)) }; + unsafe { signal(SIGFPE, Some(signal_handler_thread)) }; + unsafe { signal(SIGSEGV, Some(signal_handler_thread)) }; + unsafe { signal(SIGTERM, Some(signal_handler_thread)) }; + unsafe { signal(SIGBREAK, Some(signal_handler_thread)) }; + unsafe { signal(SIGABRT, Some(signal_handler_thread)) }; +} + +pub fn _remove_handler(handler_handle: winapi::shared::ntdef::HANDLE) { + unsafe { winapi::um::errhandlingapi::RemoveVectoredExceptionHandler(handler_handle) }; +} + +fn connect_csr_child() -> bool { + let ntdll_name: winapi::um::winnt::LPCSTR = "ntdll.dll\x00".as_ptr() as *const i8; + let ntdll: winapi::shared::minwindef::HMODULE = + unsafe { winapi::um::libloaderapi::GetModuleHandleA(ntdll_name) }; + let kernelbase_name: winapi::um::winnt::LPCSTR = "kernelbase.dll\x00".as_ptr() as *const i8; + let kernelbase: winapi::shared::minwindef::HMODULE = + unsafe { winapi::um::libloaderapi::GetModuleHandleA(kernelbase_name) }; + let csr_data_rva_x64 = 0x16ac08; + let csr_data_size_x64 = 0xf8; + let p_csr_data = (ntdll as u64 + csr_data_rva_x64 as u64) as *mut winapi::ctypes::c_void; + unsafe { winapi::um::winnt::RtlZeroMemory(p_csr_data, csr_data_size_x64) }; + + let p_ctrl_routine = unsafe { + winapi::um::libloaderapi::GetProcAddress( + kernelbase, + "CtrlRoutine\x00".as_ptr() as *const i8, + ) + } as *mut winapi::ctypes::c_void; + let buf: [u64; 1] = [p_ctrl_routine as u64; 1]; + let mut session_id: winapi::shared::minwindef::DWORD = 0; + unsafe { + winapi::um::processthreadsapi::ProcessIdToSessionId( + winapi::um::processthreadsapi::GetProcessId( + winapi::um::processthreadsapi::GetCurrentProcess(), + ), + &mut session_id, + ); + } + let sessions_str = format!("\\Sessions\\{}\\Windows\x00\x00\x00\x00", session_id); + let sessions_bytes = sessions_str.as_bytes(); + let mut sessions_wchar_vec: Vec = Vec::new(); + for c in sessions_bytes.iter() { + sessions_wchar_vec.push(*c as winapi::shared::ntdef::WCHAR); + } + while sessions_wchar_vec.len() != 100 { + sessions_wchar_vec.push(0_u16); + } + let mut trash: winapi::shared::ntdef::BOOLEAN = 0; + let res = unsafe { + CsrClientConnectToServer( + sessions_wchar_vec.as_ptr() as winapi::shared::ntdef::PWSTR, + 1, + buf.as_ptr() as winapi::shared::ntdef::PVOID, + 8, + &mut trash, + ) + }; + if let false = winapi::shared::ntdef::NT_SUCCESS(res) { + crate::log!( + error, + "CsrClientConnectToServer1 error, errno: {}", + std::io::Error::last_os_error() + ); + return false; + } + let buf: [char; 0x240] = ['\x00'; 0x240]; + let mut trash: winapi::shared::ntdef::BOOLEAN = 0; + let res = unsafe { + CsrClientConnectToServer( + sessions_wchar_vec.as_ptr() as winapi::shared::ntdef::PWSTR, + 3, + buf.as_ptr() as winapi::shared::ntdef::PVOID, + 0x240, + &mut trash, + ) + }; + if let false = winapi::shared::ntdef::NT_SUCCESS(res) { + crate::log!( + error, + "CsrClientConnectToServer2 error, errno: {}", + std::io::Error::last_os_error() + ); + return false; + } + let res = unsafe { ntapi::ntrtl::RtlRegisterThreadWithCsrss() }; + if let false = winapi::shared::ntdef::NT_SUCCESS(res) { + crate::log!( + error, + "RtlRegisterThreadWithCsrss error, errno: {}", + std::io::Error::last_os_error() + ); + return false; + } + true +} + +pub fn fork( + lp_process_information: winapi::um::processthreadsapi::LPPROCESS_INFORMATION, +) -> winapi::shared::minwindef::DWORD { + let mut process_handle: winapi::shared::ntdef::HANDLE = winapi::shared::ntdef::NULL; + let mut thread_handle: winapi::shared::ntdef::HANDLE = winapi::shared::ntdef::NULL; + let process_desired_access: winapi::um::winnt::ACCESS_MASK = winapi::um::winnt::MAXIMUM_ALLOWED; + let thread_desired_access: winapi::um::winnt::ACCESS_MASK = winapi::um::winnt::MAXIMUM_ALLOWED; + let process_flags: winapi::shared::ntdef::ULONG = + ntapi::ntpsapi::PROCESS_CREATE_FLAGS_INHERIT_FROM_PARENT + | ntapi::ntpsapi::PROCESS_CREATE_FLAGS_INHERIT_HANDLES; + let thread_flags: winapi::shared::ntdef::ULONG = + ntapi::ntpsapi::THREAD_CREATE_FLAGS_CREATE_SUSPENDED; + let u: ntapi::ntpsapi::PS_CREATE_INFO_u = ntapi::ntpsapi::PS_CREATE_INFO_u { + FileHandle: winapi::shared::ntdef::NULL, + }; + let mut create_info: ntapi::ntpsapi::PS_CREATE_INFO = ntapi::ntpsapi::PS_CREATE_INFO { + Size: 0, + State: 0, + u, + }; + create_info.Size = std::mem::size_of::(); + let results: winapi::shared::ntdef::NTSTATUS = unsafe { + ntapi::ntpsapi::NtCreateUserProcess( + &mut process_handle, + &mut thread_handle, + process_desired_access, + thread_desired_access, + 0 as winapi::shared::ntdef::POBJECT_ATTRIBUTES, + 0 as winapi::shared::ntdef::POBJECT_ATTRIBUTES, + process_flags, + thread_flags, + 0 as winapi::shared::ntdef::PVOID, + &mut create_info, + 0 as ntapi::ntpsapi::PPS_ATTRIBUTE_LIST, + ) + }; + if results == 0 { + //notify_csrss_parent(process_handle,thread_handle); + unsafe { + (*lp_process_information).hProcess = process_handle; + (*lp_process_information).hThread = thread_handle; + (*lp_process_information).dwProcessId = + winapi::um::processthreadsapi::GetProcessId(process_handle); + (*lp_process_information).dwThreadId = + winapi::um::processthreadsapi::GetThreadId(thread_handle); + winapi::um::processthreadsapi::ResumeThread(thread_handle); + (*lp_process_information).dwProcessId + } + } else { + // unsafe { + // winapi::um::wincon::FreeConsole(); + // //debug + // winapi::um::consoleapi::AllocConsole(); + // winapi::um::processenv::SetStdHandle( + // winapi::um::winbase::STD_INPUT_HANDLE, + // winapi::um::processenv::GetStdHandle(winapi::um::winbase::STD_INPUT_HANDLE), + // ); + // winapi::um::processenv::SetStdHandle( + // winapi::um::winbase::STD_OUTPUT_HANDLE, + // winapi::um::processenv::GetStdHandle(winapi::um::winbase::STD_OUTPUT_HANDLE), + // ); + // winapi::um::processenv::SetStdHandle( + // winapi::um::winbase::STD_ERROR_HANDLE, + // winapi::um::processenv::GetStdHandle(winapi::um::winbase::STD_ERROR_HANDLE), + // ); + // } + if let false = connect_csr_child() { + crate::log!( + error, + "connect_csr_child error, errno: {}", + std::io::Error::last_os_error() + ); + // return u32::MAX; + } + 0 + } +} + +pub fn waitpid( + lp_process_information: winapi::um::processthreadsapi::LPPROCESS_INFORMATION, + dw_milliseconds: winapi::shared::minwindef::DWORD, +) -> winapi::shared::minwindef::DWORD { + let mut exit_code: winapi::shared::minwindef::DWORD = 0xdeadbeef; + unsafe { + winapi::um::synchapi::WaitForSingleObject( + (*lp_process_information).hProcess, + dw_milliseconds, + ); + winapi::um::processthreadsapi::GetExitCodeProcess( + (*lp_process_information).hProcess, + &mut exit_code, + ); + }; + exit_code +} + +pub fn do_create_file_mapping( + dw_maximum_size_high: winapi::shared::minwindef::DWORD, + dw_maximum_size_low: winapi::shared::minwindef::DWORD, + lp_name: winapi::shared::minwindef::DWORD, +) -> winapi::shared::ntdef::HANDLE { + let mut sec: winapi::um::minwinbase::SECURITY_ATTRIBUTES = + winapi::um::minwinbase::SECURITY_ATTRIBUTES { + nLength: std::mem::size_of::() as u32, + lpSecurityDescriptor: winapi::shared::ntdef::NULL, + bInheritHandle: 1, + }; + let handle: winapi::shared::ntdef::HANDLE = unsafe { + winapi::um::memoryapi::CreateFileMappingW( + winapi::um::handleapi::INVALID_HANDLE_VALUE, + &mut sec, + winapi::um::winnt::PAGE_EXECUTE_READWRITE, + dw_maximum_size_high, + dw_maximum_size_low, + lp_name as *const u16, + ) + }; + match handle { + winapi::um::handleapi::INVALID_HANDLE_VALUE => winapi::shared::ntdef::NULL, + _ => handle, + } +} + +pub fn do_map_view_of_file_ex( + h_file_mapping_object: winapi::shared::ntdef::HANDLE, + dw_file_offset_high: winapi::shared::minwindef::DWORD, + dw_file_offset_low: winapi::shared::minwindef::DWORD, + dw_number_of_bytes_to_map: winapi::shared::basetsd::SIZE_T, + lp_base_address: winapi::shared::ntdef::PVOID, +) -> winapi::shared::ntdef::PVOID { + unsafe { + winapi::um::memoryapi::MapViewOfFileEx( + h_file_mapping_object, + winapi::um::memoryapi::FILE_MAP_ALL_ACCESS, + dw_file_offset_high, + dw_file_offset_low, + dw_number_of_bytes_to_map, + lp_base_address, + ) + } +} + +pub fn do_terminate_process(process_handle: winapi::shared::ntdef::HANDLE, exit_code: i32) -> bool { + !matches!( + unsafe { ntapi::ntpsapi::NtTerminateProcess(process_handle, exit_code) }, + 0 + ) +} + +pub fn do_close_handle(h_object: winapi::shared::ntdef::HANDLE) -> bool { + !matches!(unsafe { winapi::um::handleapi::CloseHandle(h_object) }, 0) +} + +pub fn do_unmap_view_of_file(lp_base_address: winapi::shared::ntdef::PVOID) -> bool { + !matches!( + unsafe { winapi::um::memoryapi::UnmapViewOfFile(lp_base_address) }, + 0 + ) +} + +pub enum WinForkResult { + Parent { + child: winapi::shared::minwindef::DWORD, + }, + Child, +} + +pub enum WinWaitStatus { + Exited(winapi::shared::minwindef::DWORD), + Crash(winapi::shared::minwindef::DWORD), + Timeout(winapi::shared::minwindef::DWORD), +} + +pub type Handle = winapi::shared::ntdef::HANDLE; +pub type ProcessInformation = winapi::um::processthreadsapi::PROCESS_INFORMATION; +pub type Pvoid = winapi::shared::ntdef::PVOID; +pub const NULL: winapi::shared::ntdef::PVOID = winapi::shared::ntdef::NULL; + +pub fn hopper_fork( + lp_process_information: winapi::um::processthreadsapi::LPPROCESS_INFORMATION, +) -> eyre::Result { + let pid = fork(lp_process_information); + match pid { + 0 => Ok(WinForkResult::Child), + u32::MAX => Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: "fork error".to_string(), + }), + _ => Ok(WinForkResult::Parent { child: pid }), + } +} + +pub fn terminate_close_child( + process_information: winapi::um::processthreadsapi::PROCESS_INFORMATION, +) { + if let Err(err) = hopper_terminate_process(process_information.hProcess, 0xdead) { + crate::log!( + error, + "terminate_close_child: hopper_terminate_process error {:?}", + err + ); + } + if let Err(err) = hopper_close_handle(process_information.hProcess) { + crate::log!( + error, + "terminate_close_child: hopper_close_handle hProcess error {:?}", + err + ); + } + if let Err(err) = hopper_close_handle(process_information.hThread) { + crate::log!( + error, + "terminate_close_child: hopper_close_handle hThread error {:?}", + err + ); + } +} + +pub fn close_child(process_information: winapi::um::processthreadsapi::PROCESS_INFORMATION) { + if let Err(err) = hopper_close_handle(process_information.hProcess) { + crate::log!( + error, + "close_child: hopper_close_handle hProcess error {:?}", + err + ); + } + if let Err(err) = hopper_close_handle(process_information.hThread) { + crate::log!( + error, + "close_child: hopper_close_handle hThread error {:?}", + err + ); + } +} + +pub fn hopper_waitpid( + lp_process_information: winapi::um::processthreadsapi::LPPROCESS_INFORMATION, + dw_milliseconds: winapi::shared::minwindef::DWORD, +) -> eyre::Result { + let code = waitpid(lp_process_information, dw_milliseconds); + crate::log!(trace, "waitpid return: {}", code); + match code { + 0 => Ok(WinWaitStatus::Exited(code)), + 0x2002 => Ok(WinWaitStatus::Exited(code)), + 0x103 => Ok(WinWaitStatus::Timeout(code)), + 0xdeadbeef => Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: "waitpid error".to_string(), + }), //todo + _ => Ok(WinWaitStatus::Crash(code)), + } +} + +pub fn hopper_terminate_process( + process_handle: winapi::shared::ntdef::HANDLE, + exit_code: i32, +) -> eyre::Result { + match do_terminate_process(process_handle, exit_code) { + true => Ok(true), + false => Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: "terminate_process error".to_string(), + }), + } +} + +pub fn hopper_close_handle( + h_object: winapi::shared::ntdef::HANDLE, +) -> eyre::Result { + match do_close_handle(h_object) { + true => Ok(true), + false => Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: "close_handle error".to_string(), + }), + } +} + +pub fn hopper_create_file_mapping( + dw_maximum_size_high: winapi::shared::minwindef::DWORD, + dw_maximum_size_low: winapi::shared::minwindef::DWORD, + lp_name: winapi::shared::minwindef::DWORD, +) -> eyre::Result { + let h_file_mapping_object: winapi::shared::ntdef::HANDLE = + do_create_file_mapping(dw_maximum_size_high, dw_maximum_size_low, lp_name); + match h_file_mapping_object { + winapi::shared::ntdef::NULL => Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: "create_file_mapping error".to_string(), + }), + _ => Ok(h_file_mapping_object), + } +} + +pub fn hopper_map_view_of_file_ex( + h_file_mapping_object: winapi::shared::ntdef::HANDLE, + dw_file_offset_high: winapi::shared::minwindef::DWORD, + dw_file_offset_low: winapi::shared::minwindef::DWORD, + dw_number_of_bytes_to_map: winapi::shared::basetsd::SIZE_T, + lp_base_address: winapi::shared::ntdef::PVOID, +) -> eyre::Result { + let lp_base_address: winapi::shared::ntdef::PVOID = do_map_view_of_file_ex( + h_file_mapping_object, + dw_file_offset_high, + dw_file_offset_low, + dw_number_of_bytes_to_map, + lp_base_address, + ); + match lp_base_address { + winapi::shared::ntdef::NULL => Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: "map_view_of_file_ex error".to_string(), + }), + _ => Ok(lp_base_address), + } +} + +pub fn hopper_unmap_view_of_file( + lp_base_address: winapi::shared::ntdef::PVOID, +) -> eyre::Result { + match do_unmap_view_of_file(lp_base_address) { + true => Ok(true), + false => Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: "unmap_view_of_file error".to_string(), + }), + } +} + +extern "system" { + pub fn GetSystemCpuSetInformation( + information: winapi::um::winnt::PSYSTEM_CPU_SET_INFORMATION, + bufferlength: u32, + returnedlength: *mut u32, + process: winapi::shared::ntdef::HANDLE, + flags: u32, + ) -> bool; + + pub fn SetProcessDefaultCpuSets( + process: winapi::shared::ntdef::HANDLE, + cpusetids: *const u32, + cpusetidcount: u32, + ) -> bool; + + pub fn SetThreadSelectedCpuSets( + thread: winapi::shared::ntdef::HANDLE, + cpusetids: *const u32, + cpusetidcount: u32, + ) -> bool; +} + +pub fn get_cpu_num() -> usize { + unsafe { + let mut info: winapi::um::sysinfoapi::SYSTEM_INFO = std::mem::zeroed(); + winapi::um::sysinfoapi::GetSystemInfo(&mut info); + info.dwNumberOfProcessors as usize + } +} + +pub fn get_cpu_id() -> Vec { + let cpu_num: usize = get_cpu_num(); + let buf_len = std::mem::size_of::() * cpu_num; + let mut ids: Vec = Vec::new(); + let mut ret_len: u32 = 0; + let mut infos: Vec> = + Vec::with_capacity(cpu_num); + unsafe { + infos.set_len(cpu_num); + GetSystemCpuSetInformation( + infos.as_ptr() as *mut winapi::um::winnt::SYSTEM_CPU_SET_INFORMATION, + buf_len as u32, + &mut ret_len, + winapi::um::processthreadsapi::GetCurrentProcess(), + 0, + ); + } + for info in infos.iter() { + ids.push(unsafe { info.assume_init() }.CpuSet.Id); + } + ids +} + +pub struct CpuInfo { + _index: usize, + usage: f32, + id: u32, +} + +pub fn get_cpu_info() -> Vec { + let cpu_num: usize = get_cpu_num(); + println!("{}", cpu_num); + let mut infos: Vec< + std::mem::MaybeUninit, + > = Vec::with_capacity(cpu_num); + unsafe { infos.set_len(cpu_num) }; + let ids: Vec = get_cpu_id(); + let mut cpu_infos: Vec = Vec::new(); + for (i, id) in ids.iter().enumerate().take(cpu_num) { + cpu_infos.push(CpuInfo { + _index: i, + usage: 0.0, + id: *id, + }); + } + let len: u32 = (cpu_num + * std::mem::size_of::()) + as u32; + for (i, info) in cpu_infos.iter_mut().enumerate().take(cpu_num) { + let mut cur_idle: i64 = 0; + let mut cur_ker: i64 = 0; + let mut cur_user: i64 = 0; + let mut prev_idle: i64 = cur_idle; + let mut prev_ker: i64 = cur_ker; + let mut prev_user: i64 = cur_user; + let mut usage: f32 = 0.0; + while usage == 0.0 { + unsafe { + ntapi::ntexapi::NtQuerySystemInformation( + ntapi::ntexapi::SystemProcessorPerformanceInformation, + infos.as_ptr() as *mut winapi::ctypes::c_void, + len, + std::ptr::null_mut::(), + ); + cur_idle = infos[i].assume_init().IdleTime.QuadPart().abs(); + cur_ker = infos[i].assume_init().KernelTime.QuadPart().abs(); + cur_user = infos[i].assume_init().UserTime.QuadPart().abs(); + } + let delta_idle = cur_idle - prev_idle; + let delta_kernel = cur_ker - prev_ker; + let delta_user = cur_user - prev_user; + if prev_idle != 0 { + let total = delta_kernel + delta_user; + let cur_use = (delta_kernel - delta_idle + delta_user) as f32; + usage = std::ops::Div::div(cur_use, total as f32); + println!("{}", usage); + } + prev_idle = cur_idle; + prev_ker = cur_ker; + prev_user = cur_user; + let millis = std::time::Duration::from_millis(100); + std::thread::sleep(millis); + } + info.usage = usage; + } + cpu_infos.sort_by(|a, b| { + a.usage + .partial_cmp(&b.usage) + .unwrap_or(std::cmp::Ordering::Equal) + }); + cpu_infos +} + +pub fn bind_cur_process_to_one_core(id: u32) -> bool { + let mut ids: Vec = Vec::new(); + ids.push(id); + unsafe { + SetProcessDefaultCpuSets( + winapi::um::processthreadsapi::GetCurrentProcess(), + ids.as_ptr() as *const u32, + 1, + ) + } +} + +pub fn bind_cur_thread_to_one_core(id: u32) -> bool { + let mut ids: Vec = Vec::new(); + ids.push(id); + unsafe { + SetThreadSelectedCpuSets( + winapi::um::processthreadsapi::GetCurrentThread(), + ids.as_ptr() as *const u32, + 1, + ) + } +} + +pub fn bind_cpu_win() -> eyre::Result<(), crate::HopperError> { + if let Ok(enable) = std::env::var(crate::config::HOPPER_ENABLE_CPU_BINDING_VAR) { + if enable != "1" { + return Ok(()); + } + } else { + return Ok(()); + } + let cpu_info: Vec = get_cpu_info(); + let id = cpu_info[0].id; + let index = cpu_info[0]._index; + crate::log!(info, "bind_cpu {}, index: {}", id, index); + if let false = bind_cur_process_to_one_core(id) { + return Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: format!("bind_cur_process_to_one_core {} error", id), + }); + } + if let false = bind_cur_thread_to_one_core(id) { + return Err(error::HopperError::OSError { + errno: std::io::Error::last_os_error(), + info: format!("bind_cur_thread_to_core {} error", id), + }); + } + Ok(()) +} + +pub fn creat_event(event_str: String) -> winapi::shared::ntdef::HANDLE { + let mut sec: winapi::um::minwinbase::SECURITY_ATTRIBUTES = + winapi::um::minwinbase::SECURITY_ATTRIBUTES { + nLength: std::mem::size_of::() as u32, + lpSecurityDescriptor: winapi::shared::ntdef::NULL, + bInheritHandle: 1, + }; + unsafe { winapi::um::synchapi::CreateEventA(&mut sec, 1, 0, event_str.as_ptr() as *const i8) } +} + +#[allow(clippy::missing_safety_doc)] +pub unsafe fn create_thread( + fun: winapi::um::minwinbase::LPTHREAD_START_ROUTINE, + args: *mut winapi::ctypes::c_void, +) -> (winapi::shared::ntdef::HANDLE, u32) { + let mut sec: winapi::um::minwinbase::SECURITY_ATTRIBUTES = + winapi::um::minwinbase::SECURITY_ATTRIBUTES { + nLength: std::mem::size_of::() as u32, + lpSecurityDescriptor: winapi::shared::ntdef::NULL, + bInheritHandle: 1, + }; + let mut id: u32 = 0; + let h = winapi::um::processthreadsapi::CreateThread( + &mut sec, + 0, + fun, + args, + winapi::um::winbase::CREATE_SUSPENDED, + &mut id, + ); + + (h, id) +} diff --git a/hopper-core/src/execute/forksrv.rs b/hopper-core/src/execute/forksrv.rs new file mode 100644 index 0000000..3ca796b --- /dev/null +++ b/hopper-core/src/execute/forksrv.rs @@ -0,0 +1,295 @@ +use std::{ + io::{prelude::*, BufReader, BufWriter}, + path::PathBuf, + time::Duration, +}; + +use super::*; +use crate::{config, feedback::*, runtime::*, HopperError, TimeUsage}; +use eyre::Context; + +pub struct ForkSrv { + pub reader: BufReader, + pub writer: BufWriter, + pub feedback: Feedback, + pub timeout_limit: Duration, +} + +impl ForkSrv { + pub fn new() -> eyre::Result { + crate::log!(info, "start fork server..."); + #[cfg(target_os = "windows")] + crate::execute::bind_cpu_win()?; + let timeout_limit = Self::timeout_limit()?; + let socket = Self::connect_socket(&timeout_limit)?; + let _ = config::get_api_sensitive_cov(); + Ok(Self { + reader: BufReader::new(socket.try_clone()?), + writer: BufWriter::new(socket), + feedback: Feedback::new()?, + timeout_limit, + }) + } + + fn timeout_limit() -> eyre::Result { + let timeout_setting = + std::env::var(config::TIMEOUT_LIMIT_VAR).unwrap_or_else(|_| "1".to_string()); + let timeout_limit = Duration::from_secs(timeout_setting.parse()?); + crate::log!(trace, "forksrv timeout: {timeout_limit:?}"); + Ok(timeout_limit) + } + + fn connect_socket(_timeout_limit: &Duration) -> eyre::Result { + let socket_path = PathBuf::from(std::env::var(config::FORK_SOCKET_PATH)?); + let socket = UnixStream::connect(socket_path)?; + // Do not set timeout now + // socket.set_read_timeout(Some(timeout_limit.saturating_add(Duration::from_secs(5))))?; + // socket.set_write_timeout(Some(timeout_limit.saturating_add(Duration::from_secs(5))))?; + Ok(socket) + } + + pub fn fork_loop(&mut self) -> eyre::Result<()> { + #[cfg(target_os = "windows")] + if crate::check_hopper_use_thread_win() { + return self.thread_loop_win(); + } + let mut executor = super::Executor::default(); + executor.set_timeout(self.timeout_limit); + let mut exec_usage = TimeUsage::default(); + let start_at = std::time::Instant::now(); + disable_coverage_feedback(); + let timeout_limit = self.timeout_limit; + let loop_num = config::get_fast_execute_loop(); + crate::log!(info, "start fork loop !"); + loop { + let cmd = self.receive_cmd()?; + match cmd { + ForkCmd::Execute => { + crate::log!(debug, "receive {}-th program..", executor.count()); + let buf = self.read_buf()?; + crate::log!(debug, "program: {}", buf); + self.feedback.clear(); + let status = { + let _counter = exec_usage.count(); + executor.execute(|| { + let mut program = self.read_program(&buf)?; + // std::mem::forget(program.stmts); + program.eval() + }) + }; + crate::log!(debug, "status: {:?}", status); + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + } + ForkCmd::Loop => { + crate::log!(debug, "receive {}-loop program (fast)..", executor.count()); + executor.set_timeout(Duration::from_secs(3600)); // long enough + let mut buf = self.read_buf()?; + let status = { + let _counter = exec_usage.count(); + executor.execute(|| { + for i in 0..loop_num { + if i > 0 { + let cmd = self.receive_cmd()?; + crate::log!(debug, "receive : {cmd:?}"); + if cmd != ForkCmd::Loop { + break; + } + buf = self.read_buf()?; + } + crate::log!(debug, "receive {i}-th program in loop (fast).."); + crate::log!(debug, "program: {}", buf); + self.feedback.clear(); + self.feedback.instrs.loop_cnt = i as u32; + // std::mem::forget(program.stmts); + // std::sync::atomic::compiler_fence( std::sync::atomic::Ordering::SeqCst); + let mut program = self.read_program(&buf)?; + // std::sync::atomic::compiler_fence( std::sync::atomic::Ordering::SeqCst); + let (sender, receiver) = std::sync::mpsc::channel(); + let _ = std::thread::Builder::new().spawn(move || { + if receiver.recv_timeout(timeout_limit).is_err() { + std::process::exit(config::TIMEOUT_CODE); + } + }); + + let ret = program.eval(); + let _ = sender.send(true); + let status = if let Err(e) = ret { + if let Some(he) = e.downcast_ref::() { + match he { + HopperError::DoubleFree { .. } => StatusType::Crash { + signal: super::Signal::SIGABRT, + }, + HopperError::AssertError{ msg: _, silent } => { + if *silent { + StatusType::default() + } else { + StatusType::Crash { signal: super::Signal::SIGABRT } + } + }, + HopperError::UseAfterFree { .. } => { + StatusType::default() + } + _ => StatusType::Ignore, + } + } else { + StatusType::Ignore + } + } else { + StatusType::Normal + }; + crate::log!(debug, "loop status(inner): {:?}", status); + if i + 1 >= loop_num { + writeln!(self.writer, "{}", StatusType::LoopEnd.serialize()?)?; + crate::log!(debug, "loop is going to finish!"); + } + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + canary::clear_canary_protection(); + self.feedback.instrs.loop_cnt = i as u32 + 1; + // break if we find some errors + if !status.is_normal() { + break; + } + } + crate::log!(debug, "loop has finished"); + self.feedback.instrs.loop_cnt = loop_num as u32; + Ok(()) + }) + }; + + crate::log!(debug, "loop status(outer): {:?}", status); + let executed_loop = { self.feedback.instrs.loop_cnt }; + crate::log!(info, "executed loop: {executed_loop}, status: {status:?}"); + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + executor.set_timeout(timeout_limit); + } + ForkCmd::Review => { + crate::log!( + debug, + "receive {}-th program for review..", + executor.count() + ); + let buf = self.read_buf()?; + crate::log!(debug, "program: {}", buf); + self.feedback.clear(); + // make timeout longer + executor.set_timeout(self.timeout_limit * 3); + let status = { + let _counter = exec_usage.count(); + executor.execute(|| { + let mut program = self.read_program(&buf)?; + program.review() + }) + }; + executor.set_timeout(self.timeout_limit); + crate::log!(debug, "review status: {:?}", status); + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + } + ForkCmd::Sanitize => { + crate::log!( + debug, + "receive {}-th program for sanitize..", + executor.count() + ); + let buf = self.read_buf()?; + self.feedback.clear(); + let status = { + let _counter = exec_usage.count(); + executor.execute(|| { + let mut program = self.read_program(&buf)?; + program.sanitize() + }) + }; + executor.set_timeout(self.timeout_limit); + crate::log!(debug, "sanitize status: {:?}", status); + writeln!(self.writer, "{}", status.serialize()?)?; + self.writer.flush()?; + } + ForkCmd::Config(config) => { + crate::log!(info, "receive config: {config}"); + if let Some(pos) = config.find('=') { + let key = &config[..pos]; + let value = &config[pos + 1..]; + if key == OPAQUE_CONFIG_KEY { + let list: Vec<&str> = value.split(',').collect(); + for item in list { + global_gadgets::get_mut_instance().add_opaque_type(item); + } + } + } + // ping the client that we have received. + writeln!(self.writer, "{}", StatusType::Ignore.serialize()?)?; + self.writer.flush()?; + } + ForkCmd::Finish => { + crate::log!(warn, "break server loop"); + let all_secs = start_at.elapsed().as_secs(); + crate::log!( + info, + "Time uasge : exec {}({}) - {} ", + crate::utils::format_count(all_secs as usize), + exec_usage.percent(all_secs), + exec_usage.avg_ms() + ); + break; + } + } + } + Ok(()) + } + + fn receive_cmd(&mut self) -> eyre::Result { + match io_utils::receive_line(&mut self.reader) { + Ok(cmd) => Ok(cmd), + Err(err) => { + if let Some(HopperError::ReadLineEOF) = err.downcast_ref::() { + return Err(err); + } + // The fork trick copys the socket we used for comunications. + // since we use it both in the parent and child, + // the socket may be inconsistent after some unpected exits. + // so we should clear the buffer in socket manually. + crate::log!(warn, "try to cosume the remain program, err: {}", err); + let buf = self.read_buf().context("fail to cosume buffer")?; + crate::log!(info, "ignore buf: {buf}"); + io_utils::receive_line(&mut self.reader).context("fail to receive cmd") + } + } + } + + pub fn read_buf(&mut self) -> eyre::Result { + let mut buf = String::new(); + loop { + let n = self + .reader + .read_line(&mut buf) + .context("fail to read line")?; + if n == 6 && buf.ends_with("\n") { + break; + } + } + Ok(buf) + } + + pub fn read_program(&mut self, buf: &str) -> eyre::Result { + let read_result = { read_program(buf, config::USE_CANARY) }; + let program = match read_result { + Ok(p) => p, + Err(e) => { + writeln!( + self.writer, + "fork server error, detailed message is wrote into misc/harness_error.log" + )?; + let path = crate::config::output_file_path("misc/harness_error.log"); + let mut f = std::fs::File::create(path)?; + writeln!(f, "program: {buf}")?; + writeln!(f, "{e:#?}")?; + eyre::bail!(e); + } + }; + Ok(program) + } +} diff --git a/hopper-core/src/execute/io_utils.rs b/hopper-core/src/execute/io_utils.rs new file mode 100644 index 0000000..8547276 --- /dev/null +++ b/hopper-core/src/execute/io_utils.rs @@ -0,0 +1,38 @@ +use std::io::BufRead; + +use eyre::Context; + +use crate::{Deserialize, Deserializer}; + +pub fn read_line(reader: &mut R) -> eyre::Result { + let mut buf = String::new(); + let n = reader + .read_line(&mut buf) + .with_context(|| format!("fail to read line: {buf}"))?; + if n == 0 { + crate::log!(warn, "read EOF, the other side may be down..."); + eyre::bail!(crate::HopperError::ReadLineEOF); + } + trim_newline(&mut buf); + Ok(buf) +} + +pub fn receive_line(reader: &mut R) -> eyre::Result { + let buf = read_line(reader)?; + let mut de = Deserializer::new(&buf, None); + let ret = T::deserialize(&mut de); + if ret.is_err() { + crate::log!(warn, "fail to parse: {buf}"); + } + ret.with_context(|| format!("fail to parse : {}", &buf)) +} + +/// Trim newline chars in lines. +pub fn trim_newline(s: &mut String) { + if s.ends_with('\n') { + s.pop(); + if s.ends_with('\r') { + s.pop(); + } + } +} diff --git a/hopper-core/src/execute/limit.rs b/hopper-core/src/execute/limit.rs new file mode 100644 index 0000000..8c5cd49 --- /dev/null +++ b/hopper-core/src/execute/limit.rs @@ -0,0 +1,84 @@ +//! Limitation of memory and time + +pub trait SetLimit { + /// Limit memory + fn mem_limit(&mut self, size: Option) -> &mut Self; + /// Dumping cores is slow and can lead to anomalies if SIGKILL is delivered + /// before the dump is complete + fn core_limit(&mut self) -> &mut Self; + /// Isolate the process and configure standard descriptors. + fn setsid(&mut self) -> &mut Self; +} + +#[cfg(target_family = "unix")] +use std::{os::unix::process::CommandExt, process::Command}; +#[cfg(target_family = "unix")] +impl SetLimit for Command { + fn mem_limit(&mut self, size: Option) -> &mut Self { + if let Some(size) = size { + let func = move || { + if size > 0 { + let size = size << 20; + let mem_limit: libc::rlim_t = size; + let r = libc::rlimit { + rlim_cur: mem_limit, + rlim_max: mem_limit, + }; + unsafe { + #[cfg(any(target_os = "linux", target_os = "macos"))] + libc::setrlimit(libc::RLIMIT_AS, &r); + // This takes care of OpenBSD, which doesn't have RLIMIT_AS, but + // according to reliable sources, RLIMIT_DATA covers anonymous + // maps - so we should be getting good protection against OOM bugs + #[cfg(target_os = "freebsd")] + libc::setrlimit(libc::RLIMIT_DATA, &r); + } + } + Ok(()) + }; + return unsafe { self.pre_exec(func) }; + } + self + } + + fn setsid(&mut self) -> &mut Self { + let func = move || { + unsafe { + libc::setsid(); + }; + Ok(()) + }; + unsafe { self.pre_exec(func) } + } + + fn core_limit(&mut self) -> &mut Self { + let func = move || { + let r0 = libc::rlimit { + rlim_cur: 0, + rlim_max: 0, + }; + unsafe { + libc::setrlimit(libc::RLIMIT_CORE, &r0); + }; + Ok(()) + }; + unsafe { self.pre_exec(func) } + } +} + +#[cfg(target_os = "windows")] +use std::process::Command; +#[cfg(target_os = "windows")] +impl SetLimit for Command { + fn mem_limit(&mut self, _size: Option) -> &mut Self { + self + } + + fn setsid(&mut self) -> &mut Self { + self + } + + fn core_limit(&mut self) -> &mut Self { + self + } +} diff --git a/hopper-core/src/execute/mod.rs b/hopper-core/src/execute/mod.rs new file mode 100644 index 0000000..14e7f19 --- /dev/null +++ b/hopper-core/src/execute/mod.rs @@ -0,0 +1,110 @@ +mod executor; +mod forkcli; +mod forksrv; +pub mod io_utils; +mod limit; +mod signal; + +pub use executor::*; +pub use forkcli::*; +pub use forksrv::*; +pub use signal::*; + +use hopper_derive::Serde; + +#[cfg(target_family = "unix")] +use std::os::unix::net::{UnixListener, UnixStream}; +#[cfg(target_os = "windows")] +use uds_windows::{UnixListener, UnixStream}; + +#[cfg(target_family = "unix")] +pub use nix::sys::signal::Signal; +#[cfg(target_os = "windows")] +pub type Signal = u32; + +#[cfg(target_family = "unix")] +pub use nix::unistd::Pid; +#[cfg(target_os = "windows")] +pub type Pid = u32; + +#[cfg(target_os = "windows")] +pub mod forklib_win; +#[cfg(target_os = "windows")] +pub use forklib_win::*; + +/// Status type of program's executing result +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serde)] +pub enum StatusType { + /// program runs OK + Normal, + /// program runs timeout + Timeout, + /// program crash + Crash { signal: Signal }, + /// Ignored cases (error) during executing + Ignore, + /// Loop is endding + LoopEnd +} + +impl Default for StatusType { + fn default() -> Self { + Self::Normal + } +} + +impl StatusType { + pub fn is_normal(&self) -> bool { + matches!(self, Self::Normal) + } + pub fn is_ignore(&self) -> bool { + matches!(self, Self::Ignore) + } + pub fn is_crash(&self) -> bool { + matches!(self, Self::Crash { signal: _ }) + } + pub fn is_timeout(&self) -> bool { + matches!(self, Self::Timeout) + } + pub fn is_loop_end(&self) -> bool { + matches!(self, Self::LoopEnd) + } + pub fn is_abort(&self) -> bool { + matches!( + self, + StatusType::Crash { + signal: Signal::SIGABRT + } + ) + } + pub fn is_sigfpe(&self) -> bool { + matches!( + self, + StatusType::Crash { + signal: Signal::SIGFPE + } + ) + } + pub fn is_overflow(&self) -> bool { + matches!( + self, + StatusType::Crash { + signal: Signal::SIGSEGV + } | StatusType::Crash { + signal: Signal::SIGBUS + } + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serde)] +pub enum ForkCmd { + Execute, + Loop, + Review, + Sanitize, + Config(String), + Finish, +} + +pub static OPAQUE_CONFIG_KEY: &str = "opaque"; diff --git a/hopper-core/src/execute/signal.rs b/hopper-core/src/execute/signal.rs new file mode 100644 index 0000000..5490937 --- /dev/null +++ b/hopper-core/src/execute/signal.rs @@ -0,0 +1,209 @@ +//! Hook signal for handing +//! + +use hopper_derive::Serde; + +use crate::{CanaryInfo, FuzzProgram}; + +pub fn install_signal_handler() { + // static mut PREV_HANDLER: extern "C" fn(libc::c_int, *mut libc::siginfo_t, *mut libc::c_void) = std::ptr::null_mut(); + if cfg!(any(not(feature = "e9_mode"), test)) { + return; + } + use nix::sys::signal; + unsafe { + // https://github.com/rust-lang/rust/issues/69533 + // https://github.com/rust-lang/rust/blob/master/library/std/src/sys/unix/stack_overflow.rs + // the handler will overwrite rust's runtime to detect stack overflow. + let sig_action = signal::SigAction::new( + signal::SigHandler::SigAction(sigv_handler), + signal::SaFlags::SA_SIGINFO + | signal::SaFlags::SA_RESETHAND + | signal::SaFlags::SA_ONSTACK, + signal::SigSet::empty(), + ); + for signal in [signal::SIGSEGV, signal::SIGBUS] { + let ret = signal::sigaction(signal, &sig_action); + if let Err(err) = ret { + crate::log!(error, "fail to install signal hook: {:?}", err); + } + } + } + crate::log!(trace, "install signal handler!"); +} + +extern "C" fn sigv_handler( + _sig: libc::c_int, + si: *mut libc::siginfo_t, + _unused: *mut libc::c_void, +) { + println!("signal ! {_sig}"); + unsafe { + if let Some(si) = si.as_ref() { + // println!("Got SIGSEGV at address: {:?}\n", si.si_addr()); + let addr = si.si_addr() as u64; + let instrs = crate::feedback::get_instr_list_mut(); + instrs.segv_addr = addr; + } + #[cfg(target_os = "linux")] + if let Some(context) = (_unused as *mut libc::ucontext_t).as_ref() { + let rip = context.uc_mcontext.gregs[libc::REG_RIP as usize]; + let instrs = crate::feedback::get_instr_list_mut(); + instrs.rip_addr = rip as u64; + } + } +} + +#[cfg(target_family = "unix")] +impl crate::Serialize for super::Signal { + fn serialize(&self) -> eyre::Result { + Ok(self.to_string() + "$") + } +} + +#[cfg(target_family = "unix")] +impl crate::Deserialize for super::Signal { + fn deserialize(de: &mut crate::Deserializer) -> eyre::Result { + let signal: super::Signal = de.parse_next_until("$")?; + Ok(signal) + } +} + +#[derive(Debug, Serde, Clone)] +pub struct CrashSig { + // segv address + pub addr: u64, + // crash RIP + pub rip: u64, + // crash path's hash + pub hash: u64, + // overflow canary + canary: Option, +} + +pub fn get_crash_sig(program: Option<&FuzzProgram>) -> Option { + let addr = get_segv_addr(); + if addr != crate::config::DEFAULT_SEGV_ADDR { + let rip = get_rip_addr(); + let mut oa = CrashSig { + addr, + rip, + hash: 0, + canary: None, + }; + if let Some(p) = program { + if let Some(info) = crate::canary::find_ptr_in_canary(p, addr as *mut u8) { + oa.canary = Some(info); + } + } + return Some(oa); + } + None +} + +#[inline] +pub fn is_overflow_canary() -> bool { + let addr = get_segv_addr() as *mut u8; + crate::canary::is_in_canary(addr) +} + +pub fn is_access_null() -> bool { + let addr = get_segv_addr(); + addr < 0x2000 +} + +pub fn is_overflow_canary_at_rip(_rip: u64) -> bool { + #[cfg(target_os = "linux")] + if get_rip_addr() != _rip { + return false; + } + let addr = get_segv_addr() as *mut u8; + crate::canary::is_in_canary(addr) +} + +#[inline] +pub fn get_segv_addr() -> u64 { + let instr = crate::get_instr_list(); + instr.segv_addr +} + +#[inline] +pub fn get_rip_addr() -> u64 { + let instr = crate::get_instr_list(); + instr.rip_addr +} + +impl CrashSig { + /// access null pointer + /// 0x100 may be offset + pub fn is_null_access(&self) -> bool { + self.addr < 0x2000 + } + + /// overflow in canary + pub fn is_overflow_canary(&self) -> bool { + crate::canary::is_in_canary(self.addr as *mut u8) + } + + /// is stack overflow + pub fn is_stack_overflow(&self) -> bool { + self.addr > 0x7ff000000000 + } + + pub fn get_addr(&self) -> *const u8 { + self.addr as *const u8 + } + + pub fn get_rip(&self) -> *const u8 { + self.rip as *const u8 + } + + pub fn get_canary_info(&self) -> Option<&CanaryInfo> { + self.canary.as_ref() + } + + pub fn is_null_function_pointer(&self) -> bool { + self.is_null_access() && self.rip == 0 + } + + pub fn is_overflow_at_same_rip(&self) -> bool { + is_overflow_canary_at_rip(self.rip) + } + + pub fn is_overflow_at_same_canary(&self) -> bool { + let segv_addr = get_segv_addr(); + if segv_addr > 0 { + let page_size = region::page::size() as u64; + if segv_addr > self.addr { + return segv_addr - self.addr < page_size; + } else { + return self.addr - segv_addr < page_size; + } + } + false + } + + pub fn is_overflow_at_same_rip_or_canary(&self) -> bool { + crate::log!(trace, "rpi: {}, segv: {}", get_rip_addr(), get_segv_addr()); + self.is_overflow_at_same_rip() || self.is_overflow_at_same_canary() + } + + pub fn reason(&self) -> String { + if self.is_null_function_pointer() { + return "access null function pointer".to_string(); + } + if self.is_null_access() { + return "access null pointer".to_string(); + } + if let Some(canary) = self.get_canary_info() { + return format!( + "overflow in hopper canary, stmt index: {}, len: {}", + canary.stmt_index, canary.len + ); + } + if self.is_stack_overflow() { + return "overflow in stack".to_string(); + } + "unoknow".to_string() + } +} diff --git a/hopper-core/src/feedback/branches.rs b/hopper-core/src/feedback/branches.rs new file mode 100644 index 0000000..906e923 --- /dev/null +++ b/hopper-core/src/feedback/branches.rs @@ -0,0 +1,214 @@ +use crate::execute::StatusType; +use crate::{BucketType, BRANCHES_SIZE}; + +use std::io::prelude::*; +use std::{ + fmt, + sync::{ + atomic::{AtomicUsize, Ordering}, + RwLock, + }, +}; + +pub type BranchBuf = [BucketType; BRANCHES_SIZE]; +const BUCKET_MASK: BucketType = BucketType::MAX; + +/// Maintain global feedbacks +pub struct GlobalBranches { + virgin_branches: RwLock>, + tmouts_branches: RwLock>, + crashes_branches: RwLock>, + num_edge: AtomicUsize, +} + +impl Default for GlobalBranches { + fn default() -> Self { + Self { + virgin_branches: RwLock::new(Box::new([BUCKET_MASK; BRANCHES_SIZE])), + tmouts_branches: RwLock::new(Box::new([BUCKET_MASK; BRANCHES_SIZE])), + crashes_branches: RwLock::new(Box::new([BUCKET_MASK; BRANCHES_SIZE])), + num_edge: AtomicUsize::new(0), + } + } +} + +impl GlobalBranches { + pub fn load_from_file() -> Self { + let path = crate::config::output_file_path("misc/branches"); + let mut f = std::fs::File::open(path).unwrap(); + let mut buffer = Vec::new(); + f.read_to_end(&mut buffer).unwrap(); + let array = unsafe { + buffer + .align_to::() + .1 + .to_vec() + .try_into() + .unwrap() + }; + Self { + virgin_branches: RwLock::new(Box::new(array)), + tmouts_branches: RwLock::new(Box::new([BUCKET_MASK; BRANCHES_SIZE])), + crashes_branches: RwLock::new(Box::new([BUCKET_MASK; BRANCHES_SIZE])), + num_edge: AtomicUsize::new(0), + } + } + + pub fn get_num_edge(&self) -> usize { + self.num_edge.load(Ordering::Relaxed) + } + + pub fn get_coverage_density(&self) -> f32 { + (self.get_num_edge() * 10000 / BRANCHES_SIZE) as f32 / 100.0 + } + + pub fn has_new( + &self, + trace: &[(usize, BucketType)], + status: StatusType, + ) -> Vec<(usize, BucketType)> { + let gb_map = match status { + StatusType::Normal { .. } => &self.virgin_branches, + StatusType::Timeout => &self.tmouts_branches, + StatusType::Crash { .. } => &self.crashes_branches, + _ => { + return vec![]; + } + }; + let mut to_update = vec![]; + { + // read only + let gb_map_read = gb_map.read().unwrap(); + for &br in trace { + let gb_v = gb_map_read[br.0]; + if (br.1 & gb_v) > 0 { + to_update.push(br); + } + } + } + + let has_new = !to_update.is_empty(); + crate::log!( + trace, + "has_new: {}, edge_update_num: {}", + has_new, + to_update.len() + ); + + to_update + } + + pub fn has_new_uniq( + &self, + trace: &[(usize, BucketType)], + status: StatusType, + ) -> Vec<(usize, BucketType)> { + let gb_map = match status { + StatusType::Normal { .. } => &self.virgin_branches, + StatusType::Timeout => &self.tmouts_branches, + StatusType::Crash { .. } => &self.crashes_branches, + _ => { + return vec![]; + } + }; + + let mut to_update = vec![]; + { + // read only + let gb_map_read = gb_map.read().unwrap(); + for &br in trace { + let gb_v = gb_map_read[br.0]; + if gb_v == BUCKET_MASK { + to_update.push(br); + } + } + } + + to_update + } + + pub fn merge_coverage(&mut self, update_list: &[(usize, BucketType)], status: StatusType) { + let gb_map = match status { + StatusType::Normal { .. } => &self.virgin_branches, + StatusType::Timeout => &self.tmouts_branches, + StatusType::Crash { .. } => &self.crashes_branches, + _ => { + return; + } + }; + let mut num_new_edge = 0; + let mut gb_map_write = gb_map.write().unwrap(); + for &br in update_list { + let gb_v = gb_map_write[br.0]; + if gb_map_write[br.0] == BUCKET_MASK { + num_new_edge += 1; + } + gb_map_write[br.0] = gb_v & (!br.1); + } + + if num_new_edge > 0 && status.is_normal() { + // only count virgin branches + self.num_edge.fetch_add(num_new_edge, Ordering::Relaxed); + } + } + + pub fn clean(&mut self) { + let mut gb_map_write = self.crashes_branches.write().unwrap(); + gb_map_write.iter_mut().for_each(|m| *m = BUCKET_MASK); + let mut gb_map_write = self.virgin_branches.write().unwrap(); + gb_map_write.iter_mut().for_each(|m| *m = BUCKET_MASK); + let mut gb_map_write = self.tmouts_branches.write().unwrap(); + gb_map_write.iter_mut().for_each(|m| *m = BUCKET_MASK); + } +} + +impl fmt::Display for GlobalBranches { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "#edge: {}, density: {:.2}%", + self.get_num_edge(), + self.get_coverage_density() + ) + } +} + +impl Drop for GlobalBranches { + fn drop(&mut self) { + if cfg!(test) { + return; + } + crate::log!(info, "dump branches.."); + let path = crate::config::output_file_path("misc/branches"); + let mut f = std::fs::File::create(path).unwrap(); + let buf = self.virgin_branches.read().unwrap(); + let slice = unsafe { buf.align_to::().1 }; + f.write_all(slice).unwrap(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn branch_empty() { + let global_branches = GlobalBranches::default(); + let trace = vec![]; + let new_edges = global_branches.has_new(&trace, StatusType::default()); + assert!(new_edges.is_empty()); + let new_edges = global_branches.has_new(&trace, StatusType::Timeout); + assert!(new_edges.is_empty()); + } + + #[test] + fn branch_find_new() { + let mut global_branches = GlobalBranches::default(); + let trace = vec![(4, 1), (5, 1), (8, 3)]; + let new_edges = global_branches.has_new(&trace, StatusType::default()); + assert_eq!(new_edges.len(), 3); + global_branches.merge_coverage(&new_edges, StatusType::default()); + let new_edges = global_branches.has_new(&trace, StatusType::default()); + assert!(new_edges.is_empty()); + } +} diff --git a/hopper-core/src/feedback/cmp.rs b/hopper-core/src/feedback/cmp.rs new file mode 100644 index 0000000..ba1bd0e --- /dev/null +++ b/hopper-core/src/feedback/cmp.rs @@ -0,0 +1,474 @@ +//! Compare feedback +//! including: cmp instructions, compare functions such as strcmp + +use std::{cell::RefCell, cmp::Ordering, collections::HashMap, ffi::CStr, rc::Rc}; + +use super::*; +use crate::{runtime::*, utils}; + +thread_local! { + // key: cmp_id, value: (count, is_variable) + pub static CMP_STAT: RefCell> = RefCell::new(HashMap::new()); +} + +/// Compare types +#[derive(Debug, PartialEq, Eq)] +pub enum CmpType { + Instcmp = 1, + Strcmp = 17, + Strncmp = 18, + Memcmp = 19, + Ignore = 100, +} + +#[derive(Clone, Copy, Debug)] +#[repr(packed)] +pub struct CmpOperation { + /// first operand: 0 + pub operand1: u64, + /// second operand: 8 + pub operand2: u64, + /// ID of the instruction : 16 + pub id: u32, + /// size of operand: 20 + pub size: u32, + /// type of cmp: 24 + pub ty: u16, + /// Invoke at which statement index: 26 + pub stmt_index: u16, + /// State of the cmp: 28 + pub state: u32, +} + +#[derive(Debug, Clone)] +pub struct CmpState { + /// Id of this cmp + pub id: u32, + /// refer to cmp in program's cmp_list + pub op: Rc>, + /// is left side the mutate op affect? + pub affect_left: bool, + /// has deterministic steps for cmp done? + pub det: bool, +} + +#[derive(Debug, Clone)] +pub struct CmpBuf { + pub id: u32, + pub offset: usize, + pub buf: Vec, + pub det: bool, +} + +impl CmpOperation { + /// Calculate a state for the cmp + /// == : 0x04 + /// > : 0x01 + /// < : 0x02 + pub fn calculate_state(&self) -> u32 { + if self.is_instcmp() { + let operand1 = { self.operand1 }; + let operand2 = { self.operand2 }; + match operand1.cmp(&operand2) { + Ordering::Greater => 0x01, + Ordering::Less => 0x02, + Ordering::Equal => 0x04, + } + } else { + // ignore strcmp\memcmp.. + 0 + } + } + + /// are the oprands in the comparison eqaul or nbot + pub fn is_equal(&self) -> bool { + self.is_instcmp() && self.operand1 == self.operand2 + } + + /// Get the compare type + pub fn get_type(&self) -> CmpType { + match self.ty { + 1 => CmpType::Instcmp, + 17 => CmpType::Strcmp, + 18 => CmpType::Strncmp, + 19 => CmpType::Memcmp, + _ => CmpType::Ignore, + } + } + + /// is a compare instruction or not + pub fn is_instcmp(&self) -> bool { + self.get_type() == CmpType::Instcmp + } + + /// Merge state of cmp + /// simply use `or` for states + /// TODO: how to use these state + pub fn merge_state(&mut self, other: &Self) { + self.state |= other.calculate_state(); + } + + /// Check if cmp is solved + /// == and != : > 0x04 + /// > and < : 0x03 + /// there is no >= or <= in asm level + pub fn is_solved(&self) -> bool { + self.state > 0x04 || self.state == 0x03 + } + + pub fn log_cmp(&self) { + crate::log!( + info, + "CMP id: {}, ty: {}, size: {}, stmt: {}, operands: {:?}, {:?}, state: {:#04x}", + { self.id }, + { self.ty }, + { self.size }, + { self.stmt_index }, + { self.operand1 }, + { self.operand2 }, + { self.state } + ); + } +} + +impl ShmIteratorItem for CmpOperation { + fn check(&self) -> bool { + self.stmt_index < 0xFFFF + } + fn get_key(&self) -> u32 { + self.id + } +} + +impl InstrList { + /// Convert to list of cmps that wrapped by Rc/RefCell + /// Only track inst cmp + pub fn get_cmp_ref_list(&self) -> Vec>> { + crate::log!(trace, "cmp_len: {}", self.cmp_len()); + self.cmp_iter(Some(crate::config::CMP_MAX_COUNTER)) + .filter_map(|c| { + if !c.is_instcmp() { + return None; + } + let mut c = *c; + c.state = c.calculate_state(); + // only get the first N + CMP_STAT.with(|s| { + s.borrow_mut() + .entry(c.id) + .and_modify(|v| v.0 += 1) + .or_insert((1, false)); + }); + Some(Rc::new(RefCell::new(c))) + }) + .collect() + } + + pub fn get_cmp_ids(&self) -> Vec { + self.cmp_iter(Some(8)) + .filter(|c| c.is_instcmp()) + .map(|c| c.id) + .collect() + } + + pub fn contain_cmp_chunks(&self, chunks: &[u32]) -> bool { + let chunk_len = chunks.len(); + if chunk_len == 0 { + return true; + } + let ids = self.get_cmp_ids(); + if ids.is_empty() || ids.len() < chunk_len { + return false; + } + ids.windows(chunk_len).any(|w| w == chunks) + } + + /// Can infer cmp instrutions or not + /// Only single operation can be inferred + fn can_associate_loc(program: &FuzzProgram) -> bool { + if program.ops.len() == 1 { + let op = &program.ops[0]; + if !op.key.is_null() && op.op.is_arithmetical() { + return true; + } + } + false + } + + /// Associate location (mutation operator) with cmps + pub fn associate_loc_with_cmp_instructions( + &mut self, + program: &FuzzProgram, + ) -> eyre::Result<()> { + let can_associate = Self::can_associate_loc(program); + // Now we disablel diff cmp if the mutation is complex. + if !can_associate { + return Ok(()); + } + let cmp_diff = self.diff_cmp_operands(program, can_associate); + if !can_associate || cmp_diff.is_empty() { + return Ok(()); + } + let op = &program.ops[0]; + if let Ok(is) = program.get_stmt_by_loc(&op.key) { + match &is.stmt { + FuzzStmt::Load(load) => { + crate::log!(trace, "try infer cmp instruction at : {:?}", op.key); + if let Ok(state) = load.state.get_child_by_fields(op.key.fields.as_slice()) { + // avoid add too many cmp to state + if (state.mutate.borrow()).related_cmps.len() > 25 { + return Ok(()); + } + for cmp in cmp_diff { + crate::log!( + trace, + "loc <{}>{} affects cmp {:?}", + is.index.get(), + state.get_location_fields().serialize()?, + &cmp + ); + // crate::log!(info, "try add {cmp:?} for {:?}", op.key); + state.mutate.borrow_mut().affect_cmp(cmp); + } + } + } + FuzzStmt::Call(_call) => { + // ignore it + } + _ => {} + } + } else { + eyre::bail!("get stmt by loc failed."); + } + Ok(()) + } + + /// Find differneces of cmp operands + /// + /// Use list to do strict compare. + /// why not use map?? or skip match? + fn diff_cmp_operands(&self, program: &FuzzProgram, can_associate: bool) -> Vec { + let mut diff: Vec = vec![]; + let original = program.cmps.as_slice(); + crate::log!( + trace, + "cmp_len: {}, original_len: {}", + self.cmp_len(), + original.len() + ); + for (cur_op, ori_op) in self + .cmp_iter(Some(crate::config::CMP_MAX_COUNTER)) + .filter(|cmp| cmp.is_instcmp()) + .zip(original.iter()) + { + // ignore function compare + if !cur_op.is_instcmp() { + continue; + } + let mut op = ori_op.borrow_mut(); + // crate::log!(trace, "cmp: {cur_op:?} vs {op:?}"); + if op.is_equal() { + continue; + } + if cur_op.id != op.id { + break; + } + let left_diff = op.operand1 != cur_op.operand1; + let right_diff = op.operand2 != cur_op.operand2; + if left_diff && right_diff { + continue; + } + if left_diff || right_diff { + // merge state + op.merge_state(cur_op); + // cmp is variable by input + CMP_STAT.with(|s| { + s.borrow_mut() + .entry(cur_op.id) + .and_modify(|v| v.1 = true) + .or_insert((0, true)); + }); + if can_associate { + let state = CmpState { + id: cur_op.id, + op: ori_op.clone(), + affect_left: left_diff, + det: true, + }; + diff.push(state) + } + } + } + crate::log!(trace, "find cmp diff : {diff:?}"); + diff + } + + /// Find out locations that affect compare function: the pointer of locations is equals to + /// the pointer used in compare functions. + /// This function is invoked at executor/harness, so we should not compare it at fuzzer side. + pub fn associate_loc_with_cmp_fn( + &self, + index: usize, + stmts: &[IndexedStmt], + resource_states: &ResourceStates, + ) -> Vec { + let mut cmp_fn_expectations = vec![]; + let index = index as u16; + for c in self.cmp_iter(None) { + if c.stmt_index != index { + continue; + } + match c.get_type() { + CmpType::Strcmp => { + let ret = find_cmp_fn_ptr_in_program(c, stmts, resource_states); + if let Some((loc, ptr)) = ret { + let buf = unsafe { CStr::from_ptr(ptr as *const i8) }; + let mut buf = Vec::from(buf.to_bytes()); + if let Some(last) = buf.last() { + if *last != 0_u8 { + buf.push(0); + } + } + crate::log_c!(trace, "field {:?} affect strcmp {:?}", loc, buf); + let rela = CmpRecord { + id: c.id, + loc, + buf, + call_index: { c.stmt_index } as usize, + }; + cmp_fn_expectations.push(rela); + } + } + CmpType::Strncmp | CmpType::Memcmp => { + let ret = find_cmp_fn_ptr_in_program(c, stmts, resource_states); + if let Some((loc, ptr)) = ret { + let len = c.size as usize; + let buf = unsafe { std::slice::from_raw_parts(ptr, len) }; + crate::log_c!( + trace, + "field {} affect strncmp/memcmp {:?}", + loc.serialize().unwrap(), + buf + ); + let rela = CmpRecord { + id: c.id, + loc, + buf: buf.to_vec(), + call_index: { c.stmt_index } as usize, + }; + cmp_fn_expectations.push(rela); + } + } + _ => {} + } + } + crate::log_c!(trace, "finish associate cmp"); + cmp_fn_expectations + } +} + +/// Find pointer address used in compare function in the program +fn find_cmp_fn_ptr_in_program( + cmp: &CmpOperation, + stmts: &[IndexedStmt], + resource_states: &ResourceStates, +) -> Option<(Location, *mut u8)> { + let ptr1 = cmp.operand1 as *mut u8; + let ptr2 = cmp.operand2 as *mut u8; + let stmt_index = cmp.stmt_index as usize; + if cmp.operand1 > 0 && utils::is_in_shlib(ptr2) { + crate::log_c!( + trace, + "cmp function ptr1: {:?} before stmt: {}, state: {:#04x} ", + ptr1, + stmt_index, + { cmp.state } + ); + let ret = find_location_at_ptr(stmts, ptr1, resource_states); + if let Some(loc) = ret { + return Some((loc, ptr2)); + } + // since the string may by copy to other memory adress, so we search it by its prefix. + // FIXME: we assume it is little endian + let prefix = &cmp.state.to_le_bytes()[0..2]; + if let Some(loc) = find_string_in_stmts(ptr1, prefix, stmts) { + return Some((loc, ptr2)); + } + } + if cmp.operand2 > 0 && utils::is_in_shlib(ptr1) { + crate::log_c!( + trace, + "cmp function ptr2: {:?} before stmt: {}, state: {:#04x} ", + ptr2, + stmt_index, + { cmp.state } + ); + let ret = find_location_at_ptr(stmts, ptr2, resource_states); + if let Some(loc) = ret { + return Some((loc, ptr1)); + } + let prefix = &cmp.state.to_le_bytes()[2..4]; + if let Some(loc) = find_string_in_stmts(ptr2, prefix, stmts) { + return Some((loc, ptr1)); + } + } + None +} + +/// Search string in stmts +/// +/// pointers may be released and overwrote after function called. +/// so we store its prefix or suffix as an slice for comparison. +pub fn find_string_in_stmts( + _ptr: *mut u8, + slice: &[u8], + stmts: &[IndexedStmt], +) -> Option { + if slice[0] == 0 { + return None; + } + let mut slice = slice; + if let Some(pos) = slice.iter().position(|c| *c == 0) { + slice = &slice[..pos]; + } + for indexed_stmt in stmts.iter().rev() { + let index = &indexed_stmt.index; + if let FuzzStmt::Load(load) = &indexed_stmt.stmt { + if let Some(buf) = load.value.downcast_ref::>() { + let buf = buf.as_slice(); + crate::log_c!(trace, "search {slice:?} in {buf:?}"); + if let Some(i) = twoway::find_bytes(buf, slice) { + let fields = LocFields::new(vec![FieldKey::Index(i)]); + return Some(Location::new(index.use_index(), fields)); + } + } + if let Some(buf) = load.value.downcast_ref::>() { + crate::log_c!(trace, "search {slice:?} in {buf:?}"); + let buf = unsafe { std::slice::from_raw_parts(buf.ptr as *const u8, buf.len) }; + if let Some(i) = twoway::find_bytes(buf, slice) { + let fields = LocFields::new(vec![FieldKey::Index(i)]); + return Some(Location::new(index.use_index(), fields)); + } + } + } + } + None +} + +pub fn dump_cmp_log() { + use std::io::Write; + CMP_STAT.with(|s| { + let path = crate::config::output_file_path("misc/stat_cmp.csv"); + let mut f = std::fs::File::create(path).unwrap(); + writeln!(f, "id, cnt, diff").unwrap(); + let blk_path = crate::config::output_file_path("misc/stat_cmp.blacklist"); + let mut blk_f = std::fs::File::create(blk_path).unwrap(); + let s = s.borrow(); + for c in s.iter() { + writeln!(f, "{}, {}, {}", c.0, c.1 .0, c.1 .1).unwrap(); + if c.1 .0 > 256 && !c.1 .1 { + writeln!(blk_f, "{}", c.0).unwrap(); + } + } + }); +} diff --git a/hopper-core/src/feedback/instr.rs b/hopper-core/src/feedback/instr.rs new file mode 100644 index 0000000..70f8580 --- /dev/null +++ b/hopper-core/src/feedback/instr.rs @@ -0,0 +1,238 @@ +use std::collections::HashMap; + +use crate::{config, runtime::*}; + +use super::*; + +const CMP_ENTRY_SIZE: usize = std::mem::size_of::(); +const CMP_LIST_LEN: usize = config::CMP_LIST_AREA / CMP_ENTRY_SIZE; +const MEM_ENTRY_SIZE: usize = std::mem::size_of::(); +const MEM_LIST_LEN: usize = config::MEM_LIST_AREA / MEM_ENTRY_SIZE; + +#[repr(packed)] +pub struct InstrList { + // cmp instructions + pub cmps: [CmpOperation; CMP_LIST_LEN], + // memory related functions + pub mems: [MemOperation; MEM_LIST_LEN], + // offset of cmp + pub cmp_offset: u32, + // offset of mem + pub mem_offset: u32, + // current index of stmt + pub stmt_index: u32, + pub loop_cnt: u32, + // function pointer address of libc's memory related functions + pub free_addr: u64, + pub malloc_addr: u64, + pub calloc_addr: u64, + pub realloc_addr: u64, + pub rip_addr: u64, + pub segv_addr: u64, +} + +// Cmplist can be a shared memory +impl SHMable for InstrList { + fn name() -> &'static str { + "instr" + } + fn shmid_env_var() -> &'static str { + config::INSTR_SHMID_VAR + } + fn ptr_base() -> *const libc::c_void { + config::SHM_INSTR_BASE as *const libc::c_void + } + fn buf_size() -> usize { + config::CMP_LIST_AREA + config::MEM_LIST_AREA + 64 + } +} + +impl InstrList { + /// Get last stmt index + #[inline] + pub fn last_stmt_index(&self) -> usize { + self.stmt_index as usize + } + + /// Current pointer offset that cmpop will be written + /// which indicate the size of cmps we saw + #[inline] + fn cmp_offset(&self) -> usize { + self.cmp_offset as usize + } + + /// Length of CmpOp + #[inline] + pub fn cmp_len(&self) -> usize { + self.cmp_offset() / CMP_ENTRY_SIZE + } + + /// Iterator for cmps + #[inline] + pub fn cmp_iter(&self, max_counter: Option) -> ShmBufIter<'_, CmpOperation> { + ShmBufIter { + list: &self.cmps, + len: self.cmp_len(), + offset: 0, + counter: max_counter.map(ItemCounter::new), + } + } + + /// Current pointer offset that memop will be written + /// which indicate the size of mem we saw + #[inline] + fn mem_offset(&self) -> usize { + self.mem_offset as usize + } + + /// Length of CmpOp + #[inline] + pub fn mem_len(&self) -> usize { + // length is offset + self.mem_offset() // MEM_ENTRY_SIZE + } + + /// Iterator for frees + #[inline] + pub fn mem_iter(&self) -> ShmBufIter<'_, MemOperation> { + ShmBufIter { + list: &self.mems, + len: self.mem_len(), + offset: 0, + counter: None, + } + } + + pub fn inner_clear(&mut self) { + unsafe { + libc::memset( + self.cmps.as_ptr() as *mut libc::c_void, + 0, + config::CMP_LIST_AREA, + ) + }; + // all entry will overwrite + /* + unsafe { + libc::memset( + self.mems.as_ptr() as *mut libc::c_void, + 0, + self.mem_offset as usize * MEM_ENTRY_SIZE, + ) + }; + */ + self.cmp_offset = 0; + self.mem_offset = 0; + self.stmt_index = 0; + // self.padding1 = 0; + self.free_addr = 0; + self.malloc_addr = 0; + self.calloc_addr = 0; + self.realloc_addr = 0; + self.rip_addr = crate::config::DEFAULT_RIP_ADDR; + self.segv_addr = crate::config::DEFAULT_SEGV_ADDR; + } +} + +/// Iterator for list in shared memoery +pub struct ShmBufIter<'a, T: ShmIteratorItem> { + pub list: &'a [T], + pub len: usize, + pub offset: usize, + pub counter: Option, +} + +pub struct ItemCounter { + pub map: HashMap, + pub max: usize, +} + +/// Check if the item is valid or not, otherwise break the iter. +pub trait ShmIteratorItem { + fn check(&self) -> bool; + fn get_key(&self) -> u32; +} + +impl<'a, T: ShmIteratorItem> Iterator for ShmBufIter<'a, T> { + type Item = &'a T; + fn next(&mut self) -> Option { + if self.offset >= self.len { + return None; + } + let ele = &self.list[self.offset]; + if !ele.check() { + return None; + } + self.offset += 1; + if let Some(counter) = self.counter.as_mut() { + if counter.exceed(ele.get_key()) { + return self.next(); + } + } + Some(ele) + } +} + +impl ItemCounter { + pub fn new(max: usize) -> Self { + Self { + map: HashMap::new(), + max, + } + } + pub fn exceed(&mut self, key: u32) -> bool { + let cnt = self.map.entry(key).and_modify(|counter| *counter += 1).or_insert(1); + *cnt >= self.max + } +} + +/// Find location at specific ptr +pub fn find_location_at_ptr( + stmts: &[IndexedStmt], + ptr: *mut u8, + resource_states: &ResourceStates, +) -> Option { + if ptr.is_null() { + return None; + } + let is_in_canary = is_in_canary(ptr); + for indexed_stmt in stmts.iter().rev() { + let index = &indexed_stmt.index; + match &indexed_stmt.stmt { + FuzzStmt::Load(load) => { + if is_in_canary { + let mut layout = load.value.get_layout(true); + // do not load pointer's layout + layout.lazy_loader = None; + if let Some(fields) = layout.find_ptr(ptr, resource_states) { + return Some(Location::new(index.use_index(), fields)); + } + } + } + FuzzStmt::Call(call) => { + if let Some(ret) = &call.ret { + let layout = ret.get_layout(false); + // crate::log!(trace, "layout: {:?}", layout); + if let Some(fields) = layout.find_ptr(ptr, resource_states) { + return Some(Location::new(index.use_index(), fields)); + } + } + } + FuzzStmt::File(file) => { + if let Some(f) = file.get_value() { + if let Some(f_ptr) = f.downcast_ref::>() { + if f_ptr.get_inner() as *mut u8 == ptr { + return Some(Location::new(index.use_index(), LocFields::default())); + } + } else if let Some(f_ptr) = f.downcast_ref::>() { + if f_ptr.get_inner() as *mut u8 == ptr { + return Some(Location::new(index.use_index(), LocFields::default())); + } + } + } + } + _ => {} + } + } + None +} diff --git a/hopper-core/src/feedback/mem.rs b/hopper-core/src/feedback/mem.rs new file mode 100644 index 0000000..64f9156 --- /dev/null +++ b/hopper-core/src/feedback/mem.rs @@ -0,0 +1,207 @@ +//! Memory related feedbacks + +use super::*; +use crate::runtime::*; + +/// Memory-related feedbacks +#[derive(Debug, PartialEq, Eq)] +pub enum MemType { + Free = 1, + Malloc, + Calloc, + Realloc, + ReallocMalloc, + ReallocFree, + ReallocResize, + Open = 90, + Fdopen, + Lseek, + Read, + Write, + Close, + Ignore = 100, +} + +/// Memeory-related operation +#[derive(Clone, Copy, Debug)] +#[repr(packed)] +pub struct MemOperation { + /// Address or size + pub addr: u64, + /// ID of the instruction + pub id: u32, + /// Type + pub ty: u16, + /// Invoke at which statement index + pub stmt_index: u16, + /// Size + pub size: u32, + // slice of the value in the address. (suffix) + pub slice: [u8; 4], +} + +impl ShmIteratorItem for MemOperation { + fn check(&self) -> bool { + self.stmt_index < 0xFFFF + } + fn get_key(&self) -> u32 { + self.id + } +} + +impl MemOperation { + pub fn get_type(&self) -> MemType { + get_mem_type(self.ty) + } +} + +pub fn get_mem_type(ty: u16) -> MemType { + match ty { + 1 => MemType::Free, + 2 => MemType::Malloc, + 3 => MemType::Calloc, + 4 => MemType::Realloc, + 5 => MemType::ReallocMalloc, + 6 => MemType::ReallocFree, + 7 => MemType::ReallocResize, + 90 => MemType::Open, + 91 => MemType::Fdopen, + 92 => MemType::Lseek, + 93 => MemType::Read, + 94 => MemType::Write, + 95 => MemType::Close, + _ => MemType::Ignore, + } +} + +impl std::fmt::Display for MemOperation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "({}, {:X}, {}, {}, {}, {:?})", + { self.id }, + { self.addr }, + { self.ty }, + { self.stmt_index }, + { self.size }, + { self.slice } + ) + } +} + +impl InstrList { + pub fn set_mem_fn(&mut self) { + // crate::log!(trace, "free: {:?}", libc::free as *const () ); + self.free_addr = libc::free as *const () as u64; + self.malloc_addr = libc::malloc as *const () as u64; + self.calloc_addr = libc::calloc as *const () as u64; + self.realloc_addr = libc::realloc as *const () as u64; + } + + /// Count the allocated resources: memory and files + pub fn count_allocated_resources(&self) -> (usize, usize) { + let mut num_files = 0; + let mut mem_bytes = 0; + for op in self.mem_iter() { + match op.get_type() { + MemType::Open => { + num_files += 1; + }, + MemType::Malloc + | MemType::Calloc + | MemType::ReallocMalloc + | MemType::ReallocResize => { + mem_bytes += op.size as usize; + } + _ => {} + } + } + (num_files, mem_bytes) + } + + /// Try to associate location with memory-related operation + pub fn associate_loc_with_mem_op( + &self, + index: usize, + stmts: &[IndexedStmt], + resource_states: &ResourceStates, + ) -> Vec { + let mut mem_records = vec![]; + for op in self.mem_iter() { + // crate::log_c!(trace, "mem: {:?}", op); + let stmt_index = op.stmt_index as usize; + if stmt_index != index { + // crate::log_c!(trace, "ignore mem: {:?}", op); + continue; + } + let id = op.id; + let addr = op.addr as *mut u8; + let mut size = { op.size } as usize; + let mut mode = 0; + let mut is_file = false; + match op.get_type() { + MemType::Free | MemType::ReallocFree => { + crate::log_c!(trace, "addr {:?} is freed at call {}", addr, stmt_index); + } + MemType::Open => { + is_file = true; + // we reused size for mode + mode = size; + size = 0; + crate::log_c!(trace, "addr {:?} is a file path in {}", addr, stmt_index); + } + MemType::Malloc + | MemType::Calloc + | MemType::ReallocMalloc + | MemType::ReallocResize => { + // Do not send them to fuzzer's side + continue; + } + _ => { + continue; + } + } + // crate::log!(trace, "try associate mem {:?}", op); + let mut loc = find_location_at_ptr(stmts, addr, resource_states); + // try to find the filename in string + if loc.is_none() && is_file { + loc = find_string_in_stmts(addr, op.slice.as_slice(), stmts); + } + if let Some(mut loc) = loc { + // remove index from `find_string` + if let Some(FieldKey::Index(_)) = loc.fields.list.last() { + loc.fields.list.pop(); + } + let record = MemRecord { + id, + size, + mode, + loc, + call_index: stmt_index, + ty: op.ty, + }; + mem_records.push(record); + } + } + crate::log_c!(trace, "finish associate mem"); + mem_records + } + + /// List fd information + pub fn get_fd_list(&self) -> Vec<(i32, bool)> { + let mut fd_list = vec![]; + for op in self.mem_iter() { + match op.get_type() { + MemType::Fdopen | MemType::Lseek | MemType::Read | MemType::Write | MemType::Close => { + let fd = op.addr as i32; + let mode = op.size; + let is_read = mode == 1; + fd_list.push((fd, is_read)); + }, + _ => {} + } + } + fd_list + } + +} diff --git a/hopper-core/src/feedback/mod.rs b/hopper-core/src/feedback/mod.rs new file mode 100644 index 0000000..4f78be7 --- /dev/null +++ b/hopper-core/src/feedback/mod.rs @@ -0,0 +1,133 @@ +mod branches; +mod cmp; +mod instr; +mod mem; +mod observer; +mod ops; +mod path; +mod res; +mod review; +mod sanitize; + +pub use branches::*; +pub use cmp::*; +pub use instr::*; +pub use mem::*; +pub use observer::*; +pub use ops::*; +pub use path::*; +pub use res::*; +pub use review::*; +pub use sanitize::*; + +#[cfg(target_family = "unix")] +mod shm; +#[cfg(target_os = "windows")] +mod shm_win; +#[cfg(target_family = "unix")] +pub use shm::*; +#[cfg(target_os = "windows")] +pub use shm_win::*; + +/// Feedback of program execution, including branch/coverage feedback +pub struct Feedback { + // executed edges + pub path: SharedMemory, + // executed instructions, e.g. cmp, malloc.. + pub instrs: SharedMemory, +} + +#[derive(Default, Debug)] +pub struct FeedbackSummary { + // micro secs + pub time_used: u128, + // path's length + pub path_len: usize, + // is it reach uniq new path + pub has_new_uniq_path: bool, +} + +pub static mut INSTR_LIST: *mut InstrList = std::ptr::null_mut(); + +impl Feedback { + pub fn new() -> eyre::Result { + let feedback = Self { + // t_used: 0, + // path_len: 0, + // has_new_bb: false, + path: setup_shm()?, + instrs: setup_shm()?, + }; + unsafe { + INSTR_LIST = feedback.instrs.ptr; + } + Ok(feedback) + } + + pub fn clear(&mut self) { + self.path.clear(); + self.instrs.inner_clear(); + // mark share memory works! + self.path.buf[0] = 1; + // set func addr + self.instrs.set_mem_fn(); + } + + /// Get last stmt index + pub fn last_stmt_index(&self) -> usize { + self.instrs.last_stmt_index() + } + + // Our path tracking find nothing (none edge) + // the program exit before invoking the target function that we want to track + pub fn track_nothing(&self) -> bool { + self.path.get_list().len() <= 1 + } +} + +pub trait SHMable { + fn name() -> &'static str; + fn shmid_env_var() -> &'static str; + fn ptr_base() -> *const libc::c_void; + fn buf_size() -> usize; + fn post_hander() {} +} + +extern "C" { + // defined in asm.S + fn __hopper_enable_cov(); + fn __hopper_disable_cov(); + fn __hopper_set_context(ctx: u32); +} + +#[inline] +pub fn disable_coverage_feedback() { + #[cfg(all(feature = "e9_mode", not(test)))] + unsafe { + __hopper_disable_cov(); + } +} + +#[inline] +pub fn enable_coverage_feedback() { + #[cfg(all(feature = "e9_mode", not(test)))] + unsafe { + __hopper_enable_cov(); + } +} + +pub fn get_instr_list<'a>() -> &'a InstrList { + unsafe { &*INSTR_LIST } +} + +pub fn get_instr_list_mut<'a>() -> &'a mut InstrList { + unsafe { &mut *INSTR_LIST } +} + +#[inline] +pub fn set_coverage_context(_ctx: u32) { + #[cfg(all(feature = "e9_mode", not(test)))] + unsafe { + __hopper_set_context(_ctx); + } +} diff --git a/hopper-core/src/feedback/observer.rs b/hopper-core/src/feedback/observer.rs new file mode 100644 index 0000000..f755bcd --- /dev/null +++ b/hopper-core/src/feedback/observer.rs @@ -0,0 +1,99 @@ +//! Observer, used to check feedback collected from program +//! inlucing branch coverage, compare instructions/functions + +use eyre::Context; + +use crate::{execute::StatusType, BucketType, FuzzProgram, TimeUsage}; + +use super::*; + +pub struct Observer { + // Feedback for current execution + pub feedback: Feedback, + // All branches our testcases visited + pub branches_state: GlobalBranches, + // Stat for operation + pub op_stat: OperationStat, + // Time usage + pub usage: TimeUsage, +} + +impl Observer { + pub fn new() -> eyre::Result { + Ok(Self { + feedback: Feedback::new()?, + branches_state: GlobalBranches::default(), + op_stat: OperationStat::default(), + usage: TimeUsage::default(), + }) + } + + /// Check if current execution has trigger new feedback or not? + pub fn has_new_path(&mut self, status: StatusType) -> eyre::Result> { + let _counter = self.usage.count(); + let trace = self.feedback.path.get_list(); + // crate::log!(trace, "find cov: {trace:?}"); + let ret = self.branches_state.has_new(&trace, status); + Ok(ret) + } + + /// Check if current execution has trigger new unique path or not? + pub fn get_new_uniq_path(&mut self, status: StatusType) -> Vec<(usize, BucketType)> { + let _counter = self.usage.count(); + let trace = self.feedback.path.get_list(); + self.branches_state.has_new_uniq(&trace, status) + } + + pub fn has_new_uniq_path(&self, trace: &[(usize, BucketType)], status: StatusType) -> bool { + !self.branches_state.has_new_uniq(trace, status).is_empty() + } + + /// Merge the update list to global coverage + pub fn merge_coverage(&mut self, update_list: &[(usize, BucketType)], status: StatusType) { + let _counter = self.usage.count(); + self.branches_state.merge_coverage(update_list, status); + crate::log!(trace, "merge cov: {:?}", update_list); + } + + /// Update cmp state and infer relationship between mutation operator and cmps + pub fn infer_cmp(&mut self, program: &FuzzProgram) -> eyre::Result<()> { + let _counter = self.usage.count(); + self.feedback + .instrs + .associate_loc_with_cmp_instructions(program) + .with_context(|| { + format!( + "fail to asscociate cmp, program:\n {}", + program.serialize_all().unwrap() + ) + }) + } + + /// Check if the program using `val` as fd + pub fn contain_fd(&self, val: i32) -> (bool, bool) { + let fd_list = self.feedback.instrs.get_fd_list(); + let mut is_fd = false; + let mut is_fd_read = false; + for (fd, read) in fd_list { + if fd == val { + is_fd = true; + if read { + is_fd_read = true; + } + } + } + (is_fd, is_fd_read) + } + + pub fn summary_feedback(&self, status: StatusType) -> FeedbackSummary { + let mut sf = FeedbackSummary::default(); + self.update_summary(&mut sf, status); + sf + } + + pub fn update_summary(&self, feedback: &mut FeedbackSummary, status: StatusType) { + let path = self.feedback.path.get_list(); + feedback.path_len = path.len(); + feedback.has_new_uniq_path |= self.has_new_uniq_path(&path, status); + } +} diff --git a/hopper-core/src/feedback/ops.rs b/hopper-core/src/feedback/ops.rs new file mode 100644 index 0000000..a6b10be --- /dev/null +++ b/hopper-core/src/feedback/ops.rs @@ -0,0 +1,262 @@ +use std::collections::HashMap; + +use crate::{execute::StatusType, EnumKind, FuzzProgram, FuzzStmt, MutateOperation}; + +/// Statistics of different operations, +/// mesure how many success or failure feedback it got. +#[derive(Default, Debug)] +pub struct OperationStat { + /// statistics of all operations + pub op_stat: HashMap, + /// statistics of inserting different funcitons + pub call_insert: HashMap, + /// stat for deterministic operations + pub det_stat: HashMap, + /// number of each target functions executed + pub exec_stat: HashMap, + /// times of mutation of each seed + pub seed_stat: HashMap, +} + +/// Use execution status as metrics +#[derive(Default, Debug)] +pub struct StatusMetrics { + pub success: usize, + pub failure: usize, + pub suc_new: usize, + pub fail_new: usize, +} + +impl OperationStat { + pub fn count_ops(&mut self, program: &FuzzProgram, status: StatusType, has_new: bool) { + // count times of mutation + if let Some(parent) = program.parent { + if let Some(metrics) = self.seed_stat.get_mut(&parent) { + metrics.count(status, has_new); + } else { + let mut metrics = StatusMetrics::default(); + metrics.count(status, has_new); + self.seed_stat.insert(parent, metrics); + } + } + // count times of execution of each api + for is in program.stmts.iter() { + if let FuzzStmt::Call(call) = &is.stmt { + self.count_exec(&call.name, status, has_new) + } + } + let ops = &program.ops; + if ops.len() == 1 { + let first = &ops[0]; + if first.det { + count_op(&mut self.det_stat, first.op.kind(), status, has_new); + } + } + if let Some(f_name) = get_op_fname(program) { + self.count_func(f_name, status, has_new); + } + if ops.is_empty() { + let kind = "Generate"; + count_op(&mut self.op_stat, kind, status, has_new); + } + for op in ops { + if let MutateOperation::BufHavoc { + use_bytes: _, + swap: _, + op, + } = &op.op + { + count_op(&mut self.op_stat, op.op.kind(), status, has_new); + } else { + count_op(&mut self.op_stat, op.op.kind(), status, has_new); + } + } + } + + fn count_func(&mut self, f_name: &str, status: StatusType, has_new: bool) -> bool { + if let Some(metrics) = self.call_insert.get_mut(f_name) { + metrics.count(status, has_new); + if metrics.likely_to_fail() { + crate::log!( + warn, + "insert function `{}` is likely to cause crash later!", + f_name + ); + crate::set_function_constraint_with(f_name, |fc| fc.insert_fail = true).unwrap(); + return true; + } + } else { + let mut metrics = StatusMetrics::default(); + metrics.count(status, has_new); + self.call_insert.insert(f_name.to_string(), metrics); + } + false + } + + pub fn count_func_infer(&mut self, f_name: &str, program: &FuzzProgram) -> bool { + if let Some(op_f_name) = get_op_fname(program) { + if f_name == op_f_name { + return false; + } + } + self.count_func(f_name, StatusType::Timeout, true); + true + } + + fn count_exec(&mut self, f_name: &str, status: StatusType, has_new: bool) { + if let Some(metrics) = self.exec_stat.get_mut(f_name) { + metrics.count(status, has_new); + } else { + let mut metrics = StatusMetrics::default(); + metrics.count(status, has_new); + self.exec_stat.insert(f_name.to_string(), metrics); + } + } + + pub fn get_rarely_fuzz_targets(&self) -> Option> { + let mut list: Vec<(&String, &StatusMetrics)> = self.exec_stat.iter().collect(); + if list.len() < 10 { + return None; + } + list.sort_by(|a, b| a.1.success.cmp(&b.1.success)); + let n = 5.max(list.len() / 10); + let keys: Vec = list[..n].iter().map(|v| v.0.to_string()).collect(); + if keys.is_empty() { + return None; + } + Some(keys) + } +} + +impl StatusMetrics { + pub fn count(&mut self, status: StatusType, has_new: bool) { + if status.is_normal() { + self.success += 1; + if has_new { + self.suc_new += 1; + } + } else { + self.failure += 1; + if has_new { + self.fail_new += 1; + } + } + } + + pub fn likely_to_fail(&self) -> bool { + if self.failure > 10 && (self.success == 0 || self.failure / self.success > 5) + || self.fail_new >= 3 + { + return true; + } + false + } + + pub fn log(&self) -> String { + format!( + "{},{},{},{}", + self.success, self.failure, self.suc_new, self.fail_new + ) + } +} + +fn get_op_fname(program: &FuzzProgram) -> Option<&str> { + if let Some(op) = program.ops.first() { + match &op.op { + MutateOperation::CallImplicitInsert { + f_name, + rng_state: _, + } => { + return Some(f_name); + } + MutateOperation::CallRelatedInsert { + f_name, + arg_pos: _, + rng_state: _, + } => { + return Some(f_name); + } + _ => {} + } + } + None +} + +fn count_op( + map: &mut HashMap, + kind: &str, + status: StatusType, + has_new: bool, +) { + if let Some(metrics) = map.get_mut(kind) { + metrics.count(status, has_new); + } else { + let mut metrics = StatusMetrics::default(); + metrics.count(status, has_new); + map.insert(kind.to_string(), metrics); + } +} + +impl Drop for OperationStat { + fn drop(&mut self) { + if cfg!(test) { + return; + } + use std::io::Write; + + crate::log!(info, "save op stat.."); + // crate::log!(info, "{self:?}"); + let path = crate::config::output_file_path("misc/stat_op.csv"); + let mut f = std::fs::File::create(path).unwrap(); + writeln!(f, "op,suc,fail,suc_new,fail_new").unwrap(); + for (op, metrics) in &self.op_stat { + writeln!(f, "{},{}", op, metrics.log()).unwrap(); + } + + let path = crate::config::output_file_path("misc/stat_det.csv"); + let mut f = std::fs::File::create(path).unwrap(); + writeln!(f, "op,suc,fail,suc_new,fail_new").unwrap(); + for (op, metrics) in &self.det_stat { + writeln!(f, "{},{}", op, metrics.log()).unwrap(); + } + + let path = crate::config::output_file_path("misc/stat_call.csv"); + let mut f = std::fs::File::create(path).unwrap(); + writeln!(f, "call,suc,fail,suc_new,fail_new").unwrap(); + for (call, metrics) in &self.call_insert { + writeln!(f, "{},{}", call, metrics.log()).unwrap(); + } + + let path = crate::config::output_file_path("misc/stat_exec.csv"); + let mut f = std::fs::File::create(path).unwrap(); + writeln!(f, "target,suc,fail,suc_new,fail_new").unwrap(); + for (target, metrics) in &self.exec_stat { + writeln!(f, "{},{}", target, metrics.log()).unwrap(); + } + + let path = crate::config::output_file_path("misc/stat_seed.csv"); + let mut f = std::fs::File::create(path).unwrap(); + writeln!(f, "seed,suc,fail,suc_new,fail_new").unwrap(); + for (seed, metrics) in &self.seed_stat { + writeln!(f, "{},{}", seed, metrics.log()).unwrap(); + } + + crate::dump_cmp_log(); + } +} + +#[test] +fn test_get_rarely() { + let mut stats = OperationStat::default(); + for i in 1..20 { + for _ in 0..i { + let name = format!("f_{i}"); + stats.count_exec(&name, StatusType::default(), false); + } + } + + let rare = stats.get_rarely_fuzz_targets(); + println!("rare: {rare:?}"); + assert!(rare.is_some()); + assert_eq!(rare.unwrap()[0], "f_1"); +} diff --git a/hopper-core/src/feedback/path.rs b/hopper-core/src/feedback/path.rs new file mode 100644 index 0000000..c0830ae --- /dev/null +++ b/hopper-core/src/feedback/path.rs @@ -0,0 +1,183 @@ +use crate::{config, BucketType, BRANCHES_SIZE}; + +use super::SHMable; + +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; + +#[repr(transparent)] +pub struct Path { + pub buf: [u8; BRANCHES_SIZE], +} + +impl SHMable for Path { + fn name() -> &'static str { + "trace" + } + fn shmid_env_var() -> &'static str { + config::PATH_SHMID_VAR + } + fn ptr_base() -> *const libc::c_void { + config::SHM_PATH_BASE as *const libc::c_void + } + fn buf_size() -> usize { + config::BRANCHES_SIZE + } + fn post_hander() { + #[cfg(feature = "llvm_mode")] + { + unsafe { __hopper_update_shm_addr(&*shm as *const u8) }; + crate::log!(info, "update {} shm pointer in llvm runtime !", T::name()); + } + } +} + +impl Path { + pub fn get_list(&self) -> Vec<(usize, BucketType)> { + let mut path = Vec::<(usize, BucketType)>::new(); + let flat_buf: &BranchFlatBuf = unsafe { std::mem::transmute(&self.buf) }; + for (i, &v) in flat_buf.iter().enumerate() { + macro_rules! run_loop { + () => {{ + let base = i * ENTRY_SIZE; + for j in 0..ENTRY_SIZE { + let idx = base + j; + let new_val = self.buf[idx]; + if new_val > 0 { + // crate::log!(trace, "id: {}, val: {}", idx, new_val); + path.push((idx, COUNT_LOOKUP[new_val as usize])); + } + } + }}; + } + #[cfg(feature = "unstable")] + { + if unsafe { unlikely(v > 0) } { + run_loop!() + } + } + #[cfg(not(feature = "unstable"))] + { + if v > 0 { + cold(); + run_loop!() + } + } + } + path + } + + pub fn contain_any(&self, edges: &[(usize, BucketType)]) -> bool { + edges.iter().any(|(idx, _k)| self.buf[*idx] > 0) + } + + pub fn is_inclued_by(&self, path: &[(usize, BucketType)]) -> bool { + let crash_path = self.get_list(); + is_sub_set(path, &crash_path) + } + + pub fn hash_trace(&self) -> u64 { + let list = self.get_list(); + let mut hasher = DefaultHasher::new(); + list.hash(&mut hasher); + hasher.finish() + } +} + +fn is_sub_set(path: &[(usize, BucketType)], sub: &[(usize, BucketType)]) -> bool { + let mut i = 0; + let mut j = 0; + let path_len = path.len(); + let sub_len = sub.len(); + while i < sub_len { + while j < path_len { + if sub[i].0 == path[j].0 { + i += 1; + j += 1; + break; + } + j += 1; + } + if j == path_len { + break; + } + } + i == sub_len +} + +#[cfg(feature = "unstable")] +use std::intrinsics::unlikely; +/// `cold` is used to mark sth is unlikely to be invoked +#[inline] +#[cold] +fn cold() {} + +#[cfg(target_pointer_width = "32")] +type BranchEntry = u32; +#[cfg(target_pointer_width = "64")] +type BranchEntry = u64; +#[cfg(target_pointer_width = "32")] +const ENTRY_SIZE: usize = 4; +#[cfg(target_pointer_width = "64")] +const ENTRY_SIZE: usize = 8; +type BranchFlatBuf = [BranchEntry; config::BRANCHES_SIZE / ENTRY_SIZE]; + +// Map of bit bucket (8bit) +// [1], [2], [3], [4, 7], [8, 15], [16, 31], [32, 127], [128, infinity] +#[cfg(not(feature = "fat_bucket"))] +static COUNT_LOOKUP: [u8; 256] = [ + 0, 1, 2, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, +]; + +// Map of bit bucket (16bit) +// [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], +// [13, 15], [16, 31], [32, 127], [128, infinity] +#[cfg(feature = "fat_bucket")] +static COUNT_LOOKUP: [u16; 256] = [ + 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2056, 4096, 4096, 4096, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, + 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, 32768, +]; + +#[cfg(feature = "llvm_mode")] +extern "C" { + fn __hopper_update_shm_addr(addr: *const u8); +} + +#[test] +fn test_include() { + let a = [(1, 1), (2, 1)]; + let b = [(1, 1), (2, 1), (3, 1)]; + assert!(is_sub_set(&b, &a)); + + let a = [(1, 1), (2, 1), (4, 1)]; + let b = [(1, 1), (2, 1), (3, 1)]; + assert!(!is_sub_set(&b, &a)); +} \ No newline at end of file diff --git a/hopper-core/src/feedback/res.rs b/hopper-core/src/feedback/res.rs new file mode 100644 index 0000000..6fe9c87 --- /dev/null +++ b/hopper-core/src/feedback/res.rs @@ -0,0 +1,126 @@ +//! Current resource state +//! used in harness + +use super::*; +use crate::runtime::*; + +#[cfg(target_os = "windows")] +use std::collections::{BTreeMap as PtrMap, BTreeSet as PtrSet}; +#[cfg(target_family = "unix")] +use std::collections::{BTreeSet as PtrSet, HashMap as PtrMap}; + +/// States of memory resource +#[derive(Default)] +pub struct ResourceStates { + /// used for review or not + review_mode: bool, + /// list of freed ptrs + freed_ptrs: PtrSet<*mut u8>, + /// memory object's size + ptr_size_map: PtrMap<*mut u8, usize>, +} + +impl ResourceStates { + /// It is used for review + pub fn set_review(&mut self) { + self.review_mode = true; + } + + /// Get size of ptr + pub fn get_ptr_size(&self, ptr: *mut u8) -> Option { + self.ptr_size_map.get(&ptr).copied() + } + + /// Insert size for specific ptr + pub fn insert_ptr_size(&mut self, ptr: *mut u8, size: usize) { + self.ptr_size_map.insert(ptr, size); + } + + /// Check pointers before filling them + /// - check if pointer is freed + #[inline] + pub fn check_pointer(&self, ptr: *mut u8) -> eyre::Result<()> { + if !ptr.is_null() { + eyre::ensure!( + !self.freed_ptrs.contains(&ptr), + crate::HopperError::UseAfterFree { ptr } + ); + } + Ok(()) + } + + /// Check arguments + /// - Check if any argument is freed + pub fn check_arguments( + &self, + arg_indices: &[StmtIndex], + stmts: &[IndexedStmt], + ) -> eyre::Result<()> { + for arg_i in arg_indices { + let is = &stmts[arg_i.get()]; + if let Some(value) = is.stmt.get_value() { + let layout = value.get_layout(false); + layout.check_ptr(self, 0)?; + } + } + Ok(()) + } + + /// Update pointers after call functions + /// - Record which pointers are freed + pub fn update_pointers_after_call(&mut self) -> eyre::Result<()> { + let instrs = get_instr_list(); + let stmt_index = instrs.last_stmt_index() as u16; + for op in instrs.mem_iter() { + if op.stmt_index == stmt_index { + crate::log!(trace, "mem op: {} ", op); + let ty = op.get_type(); + // find out freed pointer + match ty { + MemType::Free | MemType::ReallocFree => { + if op.addr == 0 { + continue; + } + let ptr = op.addr as *mut u8; + if !self.freed_ptrs.insert(ptr) && canary::is_in_canary(ptr) { + // double free + eyre::bail!(crate::HopperError::DoubleFree { ptr }); + } + self.ptr_size_map.insert(ptr, 0); + } + _ => {} + } + + match ty { + MemType::Malloc + | MemType::Calloc + | MemType::ReallocMalloc + | MemType::ReallocResize => { + let size = op.size as usize; + if size > 0 { + let ptr = op.addr as *mut u8; + // store memory size (only for review mode) + // if self.review_mode { + self.ptr_size_map.insert(ptr, size); + if self.freed_ptrs.contains(&ptr) { + self.freed_ptrs.remove(&ptr); + } + } else { + crate::log!(debug, "zero size memory record!"); + } + } + _ => {} + } + } + } + Ok(()) + } +} + +#[test] +fn test_check_resource() { + let mut resource_states = ResourceStates::default(); + let ptr = 123 as *mut u8; + resource_states.freed_ptrs.insert(ptr); + assert!(resource_states.check_pointer(ptr).is_err()); +} diff --git a/hopper-core/src/feedback/review.rs b/hopper-core/src/feedback/review.rs new file mode 100644 index 0000000..412d2b0 --- /dev/null +++ b/hopper-core/src/feedback/review.rs @@ -0,0 +1,658 @@ +use eyre::{Context, ContextCompat}; +use hopper_derive::Serde; +use std::fmt::Write as _; + +use crate::{ + feedback::*, + runtime::*, + utils::{self, FileAppender}, +}; + +/// Review program executation, records some useful information for fuzzing +#[derive(Debug, Default)] +pub struct ReviewResult { + /// List of compare functions + pub cmp_records: Vec, + /// List of memory related functions + pub mem_records: Vec, + /// List of call returns + pub call_rets: Vec, +} + +pub struct ReviewCollector { + pub cmp_appender: FileAppender, + pub mem_appender: FileAppender, + pub ret_appender: FileAppender, +} + +/// Record of Compare function, e.g. strcmp, memcmp .. +/// if strcmp(p1, p2) and we find that p2 is a static value, +/// then it indicates that p1(loc) should be mutate as p2's value (buf) +#[derive(Debug, Serde)] +pub struct CmpRecord { + /// Call statement index + pub call_index: usize, + /// Id of instrumentation + pub id: u32, + /// Mutate location + pub loc: Location, + /// Expect value of the function + pub buf: Vec, +} + +/// Record of memory related function, e.g. malloc, free .. +/// It records the latest memory size of location `loc`. +/// If visit `free(p)`, p is a loc, then p's memory size is 0 and we record (p, 0); +/// If visit `p = mallo(1024)`, then p's memory size is 1024, and we record (p, 1024). +#[derive(Debug, Serde)] +pub struct MemRecord { + /// Call statement index + pub call_index: usize, + /// Id of instrumentation + pub id: u32, + /// Memory location + pub loc: Location, + /// Mutate size + pub size: usize, + /// Is read mode + pub mode: usize, + /// Record type + pub ty: u16, +} + +impl MemRecord { + pub fn is_mem_op(&self) -> bool { + self.ty < 90 + } +} + +/// Call's return +/// It has two representation, we use `origin` during evaluation, +/// and `ir` during mutatiuon. +#[derive(Debug, Default)] +pub struct CallRet { + /// Call statement index + pub call_index: usize, + // is from static, e.g. global variables or static string + pub static_ret: bool, + // is unwritable, functions may return a pointer that points to a constant value sometimes + pub unwritable_ret: bool, + // init argument + pub init_arg: bool, + // is partial opaque, some of the fields in the return are hidden. + pub partial_opaque_ret: bool, + // raw value: serialize after evaluation + pub raw: Option, + // load value: used for mutating + pub ir: Vec, +} + +fn review_file_path(id: usize, kind: &str) -> std::path::PathBuf { + let mut path = std::path::PathBuf::from(crate::config::OUTPUT_DIR); + path.push(crate::config::REVIEW_DIR); + path.push(format!("{id}_{kind}")); + path +} + +impl ReviewCollector { + pub fn new(id: usize) -> eyre::Result { + Ok(Self { + cmp_appender: FileAppender::create(review_file_path(id, "cmp"))?, + mem_appender: FileAppender::create(review_file_path(id, "mem"))?, + ret_appender: FileAppender::create(review_file_path(id, "ret"))?, + }) + } + + /// Collect review after function call + pub fn collect_call_review( + &mut self, + call: &CallStmt, + index: usize, + prev_stmts: &[IndexedStmt], + resource_states: &ResourceStates, + ) -> eyre::Result<()> { + crate::log_c!(trace, "start call review"); + let instrs = super::get_instr_list(); + let cmp_records = instrs.associate_loc_with_cmp_fn(index, prev_stmts, resource_states); + self.cmp_appender + .append_list(&cmp_records) + .context("fail to write cmp records")?; + let mem_records = instrs.associate_loc_with_mem_op(index, prev_stmts, resource_states); + self.mem_appender + .append_list(&mem_records) + .context("fail to write mem records")?; + let init_arg = is_init_func(call, prev_stmts)?; + // Collect call ret + if let Some(ret_type) = call.fg.ret_type { + if let Some(ret) = &call.ret { + let is_opaque = utils::is_opaque_pointer(ret_type); + crate::log_c!(trace, "ret type: {}, is_opaque: {}", ret_type, is_opaque); + if !is_opaque { + let call_ret = CallRet { + call_index: index, + static_ret: is_static_ret(ret, ret_type)?, + unwritable_ret: is_unwritable_ret(ret, ret_type)?, + init_arg, + partial_opaque_ret: is_partial_opaque_ret(ret, ret_type, resource_states)?, + raw: Some(serialize_call_ret(ret, resource_states)?), + ir: vec![], + }; + self.ret_appender.append(&call_ret)?; + return Ok(()); + } + } + } + if init_arg { + let call_ret = CallRet { + call_index: index, + init_arg, + ..Default::default() + }; + self.ret_appender.append(&call_ret)?; + } + Ok(()) + } +} + +impl ReviewResult { + /// Attach information during review to program + pub fn attach_into_program(self, program: &mut FuzzProgram) -> eyre::Result<()> { + // crate::log!(debug, "review result: {:?}", self); + self.add_into_constraints(program)?; + set_program_call_returns(program, self.call_rets).context("fail to set call returns")?; + set_program_cmp_records(program, self.cmp_records).context("fail to set cmp records")?; + set_program_mem_records(program, self.mem_records).context("fail to set mem records")?; + program + .check_update() + .context("fail to check update for return")?; + Ok(()) + } + + /// Add review's result into constraints + pub fn add_into_constraints(&self, program: &FuzzProgram) -> eyre::Result<()> { + for ret in &self.call_rets { + let call_is = &program.stmts[ret.call_index]; + if let FuzzStmt::Call(call) = &call_is.stmt { + let call_name = call.fg.f_name; + let id = program.id; + // Find calls that init opaque objects. + if ret.init_arg { + crate::set_function_constraint_with(call.fg.f_name, |fc| { + if !fc.role.init_arg { + fc.role.init_arg = true; + crate::log_new_constraint(&format!( + "found {call_name} will init its arg in seed {id}", + )); + } + })?; + } + // Find calls return pointer to static variable + if ret.static_ret { + crate::set_function_constraint_with(call_name, |fc| { + if !fc.ret.is_static { + fc.ret.is_static = true; + crate::log_new_constraint(&format!( + "found {call_name} 's return is static in seed {id}", + )); + } + })?; + } + // Find calls return pointer to unwritable memory + if ret.unwritable_ret { + crate::log!(trace, "{call_name} returns unwritable calls"); + crate::set_function_constraint_with(call_name, |fc| { + if !fc.ret.is_unwriteable { + fc.ret.is_unwriteable = true; + crate::log_new_constraint(&format!( + "found {call_name} 's return is unwritable in seed {id}", + )); + } + })?; + } + if ret.partial_opaque_ret { + crate::log!( + trace, + "{call_name} returns a pointer that is partial opaque." + ); + crate::set_function_constraint_with(call_name, |fc| { + if !fc.ret.is_partial_opaque { + fc.ret.is_partial_opaque = true; + crate::log_new_constraint(&format!( + "found {call_name} 's return is partial opaque in seed {id}", + )); + } + })?; + } + } + } + // file constraint + self.infer_file_name(program)?; + Ok(()) + } + + pub fn read_from_file(program: &mut FuzzProgram) -> eyre::Result { + let id = program.id; + let cmp_records = + utils::read_list_with_program_from_file(review_file_path(id, "cmp"), program) + .with_context(|| format!("read from file failed: {id}_cmp"))?; + let mem_records = + utils::read_list_with_program_from_file(review_file_path(id, "mem"), program) + .with_context(|| format!("read from file failed: {id}_mem"))?; + let call_rets = + utils::read_list_with_program_from_file(review_file_path(id, "ret"), program) + .with_context(|| format!("read from file failed: {id}_ret"))?; + crate::log!(trace, "read review done"); + Ok(Self { + cmp_records, + mem_records, + call_rets, + }) + } +} + +impl FuzzProgram { + pub fn attach_with_review_result(&mut self) -> eyre::Result<()> { + let review = ReviewResult::read_from_file(self)?; + review + .attach_into_program(self) + .with_context(|| format!("program failed: {self}"))?; + Ok(()) + } +} + +/// check if a return pointer to static variable +fn is_static_ret(ret: &FuzzObject, type_name: &str) -> eyre::Result { + if utils::is_pointer_type(type_name) { + let ptr = ret.get_ptr_by_keys(&[FieldKey::Pointer])?; + if !ptr.is_null() && utils::is_in_shlib(ptr) { + return Ok(true); + } + } + Ok(false) +} + +/// Check if the return pointer is unwritable +fn is_unwritable_ret(ret: &FuzzObject, type_name: &str) -> eyre::Result { + if utils::is_pointer_type(type_name) { + let ptr = ret.get_ptr_by_keys(&[FieldKey::Pointer])?; + if !ptr.is_null() && utils::is_unwritable(ptr) { + return Ok(true); + } + } + Ok(false) +} + +fn is_partial_opaque_ret( + ret: &FuzzObject, + type_name: &str, + resource_states: &ResourceStates, +) -> eyre::Result { + if let Some(inner_ty) = utils::get_pointer_inner(type_name) { + if utils::is_primitive_type(inner_ty) || utils::is_opaque_type(inner_ty) { + return Ok(false); + } + let ptr = ret.get_ptr_by_keys(&[FieldKey::Pointer])?; + if ptr.is_null() { + return Ok(false); + } + let size = resource_states.get_ptr_size(ptr); + if let Some(sz) = size { + let ele_size = global_gadgets::get_instance() + .get_object_builder(inner_ty)? + .mem_size(); + if sz > ele_size { + return Ok(true); + } + } + } + Ok(false) +} + +/// Check if the function is used for initilization +fn is_init_func(call: &CallStmt, prev_stmts: &[IndexedStmt]) -> eyre::Result { + for cur_arg in call.args.iter() { + if let FuzzStmt::Load(load) = &prev_stmts[cur_arg.get()].stmt { + if let Some(dst_index) = load.state.get_pointer_stmt_index() { + if let FuzzStmt::Load(load) = &prev_stmts[dst_index.get()].stmt { + let ty = load.state.ty; + if let Some(inner_ty) = utils::get_vec_inner(ty) { + if utils::is_opaque_pointer(inner_ty) + && load.state.children.first().map_or(false, |s| s.is_null()) + && !load + .value + .get_ptr_by_keys(&[FieldKey::Index(0), FieldKey::Pointer])? + .is_null() + { + return Ok(true); + } + } else if utils::is_opaque_pointer(ty) + && load.state.is_null() + && !load.value.get_ptr_by_keys(&[FieldKey::Pointer])?.is_null() + { + return Ok(true); + } + } + } + } + } + Ok(false) +} + +fn set_program_call_returns( + program: &mut FuzzProgram, + call_rets: Vec, +) -> eyre::Result<()> { + for ret in call_rets { + let call_is = &mut program.stmts[ret.call_index]; + match &mut call_is.stmt { + FuzzStmt::Call(call) => { + call.ret_ir = ret.ir; + // ignore root (&[]) fields, which is a pointer + if let Some(first) = call.ret_ir.first_mut() { + if first.fields.is_empty() { + first.used = Some(call_is.index.downgrade()); + } + } + } + _ => { + eyre::bail!(format!("index {} is not a call stmt", ret.call_index)); + } + } + } + Ok(()) +} + +fn set_program_mem_records( + program: &mut FuzzProgram, + mem_records: Vec, +) -> eyre::Result<()> { + crate::log!(trace, "set free records .."); + for r in &mem_records { + eyre::ensure!( + r.call_index < program.stmts.len(), + "index is less than stmts' length" + ); + // free record + if r.is_mem_op() && r.size == 0 { + let fields = &r.loc.fields.list; + // load : empty fields + // call : pointer + if fields.is_empty() || (fields.len() == 1 && fields[0] == FieldKey::Pointer) { + let stmt_index = r.loc.stmt_index.as_ref().context("loc has index")?; + let stmt_i = stmt_index.get(); + crate::log!(trace, "stmt {} is freed by call {}", stmt_i, r.call_index); + // index that use the resource + let call_index = program.stmts[r.call_index].index.use_index(); + program.stmts[stmt_i].freed = Some(call_index.downgrade()); + // find all indices that used it. + for is in program.stmts.iter_mut() { + if let FuzzStmt::Load(load) = &mut is.stmt { + if load.state.find_any_stmt_in_state_with(|ptee| ptee.get() == stmt_i) { + is.freed = Some(call_index.downgrade()); + } + } + // Call? + } + // add free constraint + if let FuzzStmt::Call(call) = &program.stmts[r.call_index].stmt { + let f_name = call.fg.f_name; + crate::log!(trace, "call {f_name} will free args"); + crate::set_function_constraint_with(f_name, |fc| fc.role.free_arg = true)?; + } + } + } + // size change? + } + Ok(()) +} + +fn set_program_cmp_records( + program: &mut FuzzProgram, + cmp_records: Vec, +) -> eyre::Result<()> { + crate::log!(trace, "set cmp records.."); + // cmp inst + let cmps = get_instr_list().get_cmp_ref_list(); + crate::log!(trace, "store inst cmp list, program: {}, length: {}", program.id,cmps.len()); + // eyre::ensure!(!cmps.is_empty(), "should not be empty"); + program.cmps = std::rc::Rc::new(cmps); + // cmp function + for r in cmp_records { + // crate::log!(trace, "r: {r:?}"); + let loc = &r.loc; + let indexed_stmt = program.get_stmt_by_loc(loc).context("cmp loc error")?; + let mut fields = loc.fields.as_slice(); + let mut offset = 0; + if let Some(FieldKey::Index(i)) = fields.last() { + offset = *i; + fields = &fields[0..fields.len() - 1]; + } + let cmp_buf = CmpBuf { + id: r.id, + offset, + buf: r.buf, + det: true, + }; + match &indexed_stmt.stmt { + FuzzStmt::Load(load) => { + // The layout of an object might be changed after relative function calls or UPDATE statements, + // while the state of that object remains the same ever since it's loaded or serialized from a call. + // The discrepancies here is unavoidable and thus we should exempt the attachment of cmp records from falling into errors + // if a field is not found in an outdated object state. + if let Ok(state) = load.state.get_child_by_fields(fields) { + crate::log!( + trace, + "load cmp buf {:?} is used in {}", + &cmp_buf, + loc.serialize()? + ); + // for special case + // the array/vec is the first field/element of other structure/vec.. + let mut first = state.children.first(); + while let Some(inner) = first { + let cmp_buf = cmp_buf.clone(); + inner.mutate.borrow_mut().affect_cmp_buf(cmp_buf); + first = inner.children.first(); + } + state.mutate.borrow_mut().affect_cmp_buf(cmp_buf); + } else { + crate::log!( + warn, + "attach cmp failed with cmp_buf: {:?} and location: {}", + &cmp_buf, + loc.serialize()? + ); + } + } + FuzzStmt::Call(call) => { + let loc_fields = &loc.fields.list; + let pos = loc_fields.iter().rposition(|k| k == &FieldKey::Pointer); + let i = pos.map_or(0, |i| i + 1); + let (prefix, rest) = loc_fields.split_at(i); + let call_ir = call.ret_ir.iter().find(|ir| ir.fields.list == prefix); + if let Some(call_ir) = call_ir { + crate::log!( + trace, + "call cmp buf {:?} is used in {}", + &cmp_buf, + loc.serialize()? + ); + if let Ok(state) = call_ir.state.get_child_by_fields(rest) { + state.mutate.borrow_mut().affect_cmp_buf(cmp_buf); + } else { + crate::log!( + warn, + "attach cmp failed with cmp_buf: {:?} and location: {}", + &cmp_buf, + loc.serialize()? + ); + } + } + } + FuzzStmt::File(_) => { + // ignore file + } + _ => { + eyre::bail!("stmt is not `load` or `call` type!"); + } + }; + } + Ok(()) +} + +fn serialize_call_ret( + call_ret: &FuzzObject, + resource_states: &ResourceStates, +) -> eyre::Result { + crate::log!( + trace, + "start serialize call ret, type: {}", + call_ret.type_name() + ); + let layout = call_ret.get_layout(false); + crate::log!(trace, "layout: {:?}", layout); + // add it self + let mut buf = String::new(); + let _ = write!( + buf, + "[ ([], {}, {}), ", + layout.type_name, + call_ret.serialize()? + ); + for ir in &layout.serialize_return_object_pointers(resource_states)? { + buf.push_str(ir); + buf.push_str(", ") + } + buf.push(']'); + crate::log!(trace, "call ret itself: {}", buf); + Ok(buf) +} + +impl Serialize for CallRet { + fn serialize(&self) -> eyre::Result { + let buf = self + .raw + .as_ref() + .cloned() + .unwrap_or_else(|| "[]".to_string()); + let static_ret = self.static_ret.serialize()?; + let unwritable_ret = self.unwritable_ret.serialize()?; + let init_arg = self.init_arg.serialize()?; + let partial_opaque_ret = self.partial_opaque_ret.serialize()?; + Ok(format!( + "({}, {}, {}, {}, {}, {})", + self.call_index, static_ret, unwritable_ret, init_arg, partial_opaque_ret, buf, + )) + } +} + +impl Deserialize for CallRet { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.eat_token("(")?; + let call_index = de.parse_number()?; + de.eat_token(",")?; + let static_ret = bool::deserialize(de)?; + de.eat_token(",")?; + let unwritable_ret = bool::deserialize(de)?; + de.eat_token(",")?; + let init_arg = bool::deserialize(de)?; + de.eat_token(",")?; + let partial_opaque_ret = bool::deserialize(de)?; + de.eat_token(",")?; + de.eat_token("[")?; + let mut ir = vec![]; + loop { + if de.strip_token("]") { + break; + } + ir.push(CallRetIR::deserialize(de)?); + de.eat_token(",")?; + } + de.eat_token(")")?; + Ok(Self { + call_index, + static_ret, + init_arg, + unwritable_ret, + partial_opaque_ret, + raw: None, + ir, + }) + } +} + +impl Deserialize for CallRetIR { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.eat_token("(")?; + let fields = LocFields::deserialize(de)?; + de.eat_token(",")?; + let ty_tmp = de.eat_ty()?; + let ty = utils::get_static_ty(ty_tmp); + let ident = if let Some(last) = fields.list.last() { + format!("call_ret_{}", last.as_str()?) + } else { + "call_ret".to_string() + }; + let mut state = Box::new(ObjectState::root(ident, ty)); + let value = read_value(de, ty, &mut state)?; + de.eat_token(")")?; + Ok(Self { + fields, + value, + state, + used: None, + }) + } +} + +#[cfg(test)] +pub fn convert_ret_to_ir(ret: &FuzzObject, resource_states: &ResourceStates) -> CallRet { + use crate::execute::io_utils::receive_line; + let call_ret = CallRet { + call_index: 0, + raw: Some(serialize_call_ret(ret, resource_states).unwrap()), + ir: vec![], + static_ret: false, + unwritable_ret: false, + init_arg: false, + partial_opaque_ret: false, + }; + let out = call_ret.serialize().unwrap(); + println!("ir: {out:?}"); + let ret_ir: CallRet = receive_line(&mut out.as_bytes()).unwrap(); + ret_ir +} + +#[test] +fn test_serde_callret() { + use crate::test; + let mut resource_states = ResourceStates::default(); + let val = Box::new(0_u64) as FuzzObject; + let ret = convert_ret_to_ir(&val, &resource_states); + println!("val: {:?}, ir: {:?}", val, ret.ir); + assert_eq!(ret.ir.len(), 1); + assert_eq!(ret.ir[0].fields.len(), 0); + assert_eq!( + ret.ir[0].value.serialize().unwrap(), + val.serialize().unwrap() + ); + + let val = Box::new(test::create_test_ptr()) as FuzzObject; + let inner_fields = vec![ + FieldKey::Pointer, + FieldKey::Field("p".to_string()), + FieldKey::Pointer, + ]; + resource_states.insert_ptr_size(val.get_ptr_by_keys(&[]).unwrap(), 1); + resource_states.insert_ptr_size(val.get_ptr_by_keys(&inner_fields).unwrap(), 10); + let ret = convert_ret_to_ir(&val, &resource_states); + for (i, ir) in ret.ir.iter().enumerate() { + println!("ir-{}: {:?}", i, ir.fields) + } + println!("{:?}", ret.ir[0].value); + assert_eq!(ret.ir.len(), 3); + assert_eq!( + val.get_ptr_by_keys(&ret.ir[2].fields.list).unwrap(), + val.get_ptr_by_keys(&inner_fields).unwrap() + ); +} diff --git a/hopper-core/src/feedback/sanitize.rs b/hopper-core/src/feedback/sanitize.rs new file mode 100644 index 0000000..6373305 --- /dev/null +++ b/hopper-core/src/feedback/sanitize.rs @@ -0,0 +1,479 @@ +use eyre::ContextCompat; +use hopper_derive::Serde; +use std::fmt::Display; +use std::fmt::Write as _; + +use crate::{ + config, + runtime::*, + utils::{self, FileAppender}, + CrashSig, +}; + +use super::{MemType, ResourceStates}; + +pub const SANITIZER_FLAG_NUM: u8 = 3; + +pub const REF_CIRCLE_FLAG: &str = "refcircle"; +pub const ILL_FREE_FLAG: &str = "illfree"; +pub const NOT_TARGET_FLAG: &str = "nottarget"; +pub const GENERATE_FLAG: &str = "generate"; +pub const SET_ALL_FLAG: &str = "all"; + +#[derive(Debug, Default, Serde)] +pub struct SanitizeResult { + pub cause: Vec, +} + +pub struct SanitizeChecker { + pub appender: FileAppender, +} + +#[derive(Debug, Serde, Clone)] +pub enum FailureCause { + RefCircle { + stmt: usize, + field1: LocFields, + field2: LocFields, + }, + DoubleFree { + freed_addr: u64, + }, + IllegalFree { + freed_addr: u64, + }, + NotTrackCall { + fail_at: usize, + }, + SegViolation { + addr: CrashSig, + }, + ViolateRule { + rule: String, + }, + Generate, +} + +impl SanitizeChecker { + pub fn new() -> eyre::Result { + Ok(Self { + appender: FileAppender::create(config::tmp_file_path("sanitize"))?, + }) + } + + pub fn check_before_eval_stmt( + &mut self, + is: &IndexedStmt, + used_stmts: &[IndexedStmt], + resource_states: &ResourceStates, + ) -> eyre::Result<()> { + if let FuzzStmt::Call(call) = &is.stmt { + self.check_reference_circle(call, used_stmts, resource_states)?; + self.check_not_target_call(call, is.index.get())?; + } + Ok(()) + } + + pub fn check_reference_circle( + &mut self, + call: &CallStmt, + used_stmts: &[IndexedStmt], + resource_states: &ResourceStates, + ) -> eyre::Result<()> { + if call.failure { + for arg_stmt in call.args.iter() { + let value = arg_stmt.get_stmt_value(used_stmts).context("has value")?; + // crate::log!(trace, "checking argument: {:#?}", value); + let layout = value.get_layout(false); + + let is_ref_circle = layout.check_reference_circle(resource_states); + let ref_circle_locs = layout.check_reference_circle_loc(resource_states); + if is_ref_circle != ref_circle_locs.is_some() { + crate::log!(warn, "sanitize result inconsitent"); + } + if let Some((field1, field2)) = ref_circle_locs { + self.appender.append(&FailureCause::RefCircle { + stmt: arg_stmt.get(), + field1, + field2, + })?; + break; + } + } + } + Ok(()) + } + + pub fn check_not_target_call(&mut self, call: &CallStmt, index: usize) -> eyre::Result<()> { + if call.failure && !call.track_cov { + self.appender + .append(&FailureCause::NotTrackCall { fail_at: index })?; + } + Ok(()) + } + + pub fn check_generate(&mut self, program: &FuzzProgram) -> eyre::Result<()> { + if program.parent.is_none() { + self.appender.append(&FailureCause::Generate)?; + } + Ok(()) + } + + pub fn check_illegal_free(index: usize) -> Vec { + let instrs = super::get_instr_list(); + let mut allocated = vec![]; + let mut freed = vec![]; + let mut hints = vec![]; + for mem in instrs.mem_iter() { + let addr = { mem.addr }; + match mem.get_type() { + MemType::Malloc | MemType::Calloc | MemType::ReallocMalloc => { + allocated.push(addr); + freed.retain(|v| v != &addr); + } + MemType::Free | MemType::ReallocFree => { + if mem.stmt_index as usize == index { + if freed.contains(&addr) { + hints.push(FailureCause::DoubleFree { freed_addr: addr }); + } else if !allocated.contains(&addr) { + hints.push(FailureCause::IllegalFree { freed_addr: addr }); + } + } + freed.push(addr); + } + _ => {} + } + } + hints + } +} + +impl SanitizeResult { + pub fn add_violated_constraints( + &mut self, + constraints: &[T], + ) -> eyre::Result<()> { + if constraints.is_empty() { + return Ok(()); + } + let mut buf = String::new(); + for c in constraints { + buf.push_str(&c.serialize()?); + buf.push_str(", "); + } + self.cause.push(FailureCause::ViolateRule { rule: buf }); + Ok(()) + } + + pub fn conclusion(program: &FuzzProgram) -> eyre::Result { + let mut cause = utils::read_list_from_file(config::tmp_file_path("sanitize"))?; + let instr = crate::get_instr_list(); + let last_stmt = instr.last_stmt_index(); + let result = SanitizeChecker::check_illegal_free(last_stmt); + cause.extend(result); + if let Some(addr) = crate::get_crash_sig(Some(program)) { + cause.push(FailureCause::SegViolation { addr }); + } + Ok(Self { cause }) + } +} + +impl ToString for SanitizeResult { + fn to_string(&self) -> String { + let mut buf = String::new(); + if self.cause.is_empty() { + return buf; + } + let _ = writeln!( + buf, + " Program crashes or hangs may be due to the following reasons: " + ); + for cause in &self.cause { + let _ = writeln!(buf, "\t* {cause}"); + } + buf.push('\n'); + buf + } +} + +impl Display for FailureCause { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::RefCircle { + stmt, + field1, + field2, + } => { + write!( + f, + "A reference circle in the arguments of the crashed call is detected. statement: {}, fields: {} and {}", + stmt, field1.serialize().unwrap(), field2.serialize().unwrap() + ) + } + Self::DoubleFree { freed_addr } => { + write!( + f, + "A double-free is detected (address: {freed_addr:p}), where the pointer has been freed in other place before it." + ) + } + Self::IllegalFree { freed_addr } => { + write!( + f, + "An illegal call of free() is detected (address: {freed_addr:p}), where the freed pointer is not properly allocatecd. This happens when the pointer is obtained from a field of another object." + ) + } + Self::NotTrackCall { fail_at } => { + write!( + f, + "The program failed at call <{fail_at}>, which is not in its tracking call." + ) + } + Self::SegViolation { addr } => { + write!( + f, + "Segmentation violation at memory {:?} and RIP {:?} : {}", + addr.get_addr(), + addr.get_rip(), + addr.reason() + ) + } + Self::Generate => { + write!(f, "The program is generated from nothing.") + } + Self::ViolateRule { rule } => { + write!(f, "Violate constraint: {rule}") + } + } + } +} + +impl ObjectLayout { + /// Check reference circle, and return the location of pointer that crates the circle. + pub fn check_reference_circle_loc( + &self, + resource_states: &ResourceStates, + ) -> Option<(LocFields, LocFields)> { + let mut cur_path = vec![]; + let mut visited_ptr = vec![]; + if let Some(ptr) = self.check_reference_circle_loc_inner( + resource_states, + &mut cur_path, + &mut visited_ptr, + std::ptr::null_mut(), + 0, + ) { + let loc1 = self.find_ptr(ptr, resource_states).unwrap(); + let loc2 = LocFields::new(cur_path); + return Some((loc1, loc2)); + } + None + } + + pub fn check_reference_circle_loc_inner( + &self, + resource_states: &ResourceStates, + cur_path: &mut Vec, + visited_ptr: &mut Vec<*mut u8>, + last_ptr: *mut u8, + depth: usize, + ) -> Option<*mut u8> { + if depth > 64 || self.ptr.is_null() { + return None; + } + // avoid backtracing fields, e.g. prev, parent.. + // they will be circle in real worlds + if let FieldKey::Field(f) = &self.key { + if crate::literal::is_backtracing_field(f) { + return None; + } + } + crate::log!( + trace, + "visit: {:?} - {:?} - last: {:?}", + cur_path, + self.ptr, + last_ptr + ); + // Avoid first field or index in structure or array. + // e.g a = A { f1, f2 } , &a = &(a.f1) + // they will hold the same address + let not_first_field = self.ptr != last_ptr; + // Pointer key is special, if &a = a.f1, it is a ref-circle + if (not_first_field || self.key == FieldKey::Pointer) && visited_ptr.contains(&self.ptr) { + crate::log!(trace, "ptr {:?} is ref-circle", self.ptr); + return Some(self.ptr); + } + if not_first_field { + visited_ptr.push(self.ptr); + } + let fields = self.get_fields_with_rs(resource_states); + for f in fields { + if let FieldKey::Index(_) = f.key { + if f.fields.is_empty() && f.lazy_loader.is_none() { + break; + } + } + /* + // only focus on types that : T { field : T* } + if let FieldKey::Field(_) = &f.key { + if !self.type_name.contains(f.type_name) { + return false; + } + } + */ + cur_path.push(f.key.clone()); + let ret = f.check_reference_circle_loc_inner( + resource_states, + cur_path, + visited_ptr, + self.ptr, + depth + 1, + ); + if ret.is_some() { + return ret; + } + cur_path.pop(); + } + None + } + + pub fn get_concrete_objs<'a>( + &'a self, + objs: &mut Vec<&'a ObjectLayout>, + resource_states: &ResourceStates, + ) { + match self.key { + FieldKey::Pointer => { + let fields = self.get_fields_with_rs(resource_states); + for f in fields { + f.get_concrete_objs(objs, resource_states); + } + } + FieldKey::Index(_) => { + let fields = self.get_fields_with_rs(resource_states); + for f in fields { + if let FieldKey::Field(_) = f.key { + objs.push(self); + return; + } + } + + // If this is a multi-level pointer + for f in fields { + f.get_concrete_objs(objs, resource_states); + } + } + + // Only falls in here when invoked in check_reference_circle_recurse + FieldKey::Field(_) | FieldKey::Root(_) => { + if utils::is_pointer_type(self.type_name) { + let fields = self.get_fields_with_rs(resource_states); + for f in fields { + f.get_concrete_objs(objs, resource_states); + } + } else if !utils::is_option_type(self.type_name) + && !utils::is_primitive_type(self.type_name) + { + objs.push(self) + } + } + _ => {} + } + } + + pub fn check_reference_circle(&self, resource_states: &ResourceStates) -> bool { + self.check_reference_circle_inner(resource_states, 0) + } + + fn check_reference_circle_inner(&self, resource_states: &ResourceStates, depth: usize) -> bool { + if depth > 1 { + return false; + } + let mut objs: Vec<&ObjectLayout> = vec![]; + self.get_concrete_objs(&mut objs, resource_states); + crate::log!(trace, "check_reference_circle: concrete objs: {:?}", objs); + for obj in objs { + let fields = obj.get_fields_with_rs(resource_states).iter().filter(|l| { + let mut ident_name_flag = false; + if let FieldKey::Field(key) = &l.key { + ident_name_flag = !crate::literal::is_backtracing_field(key); + } + let type_name_flag = if let Some(inner_ty) = utils::get_pointer_inner(l.type_name) { + inner_ty == obj.type_name + } else { + false + }; + ident_name_flag && type_name_flag + }); + let field_keys = fields.fold(vec![], |mut acc, l| { + if let FieldKey::Field(key) = &l.key { + acc.push(key.as_str()); + } + acc + }); + crate::log!(trace, "Elected Fields: {:?}", field_keys); + + let mut visited_ptrs = vec![]; + if obj.check_reference_circle_recurse( + &mut visited_ptrs, + &field_keys, + resource_states, + depth, + ) { + return true; + } + } + false + } + + fn check_reference_circle_recurse( + &self, + visited_ptrs: &mut Vec<*mut u8>, + field_keys: &Vec<&str>, + resource_states: &ResourceStates, + depth: usize, + ) -> bool { + for layout in self.get_fields_with_rs(resource_states) { + crate::log!(trace, "Current Layout: {:?}", layout); + if let FieldKey::Field(key) = &layout.key { + if !field_keys.contains(&key.as_str()) { + if layout.check_reference_circle_inner(resource_states, depth + 1) { + return true; + } + continue; + } + if visited_ptrs.contains(&layout.ptr) { + crate::log!(trace, "ptr {:?} is visited", layout.ptr); + return true; + } + visited_ptrs.push(layout.ptr); + } + let found = layout.check_reference_circle_recurse( + visited_ptrs, + field_keys, + resource_states, + depth, + ); + if found { + return found; + } + } + false + } +} + +#[test] +fn test_reference_circle_checker() { + for i in 1..=4 { + println!("run test case {i}"); + let mut call = crate::test::generate_call_stmt(&format!("reference_circle_{i}")); + let mut resource_states = ResourceStates::default(); + use crate::StmtView; + call.eval(&mut [], &mut resource_states).expect("..."); + let layout = call.ret.unwrap().get_layout(false); + assert!(layout.check_reference_circle(&resource_states)); + let path = layout.check_reference_circle_loc(&resource_states); + println!("locs: {path:?}"); + assert!(path.is_some()); + } +} diff --git a/hopper-core/src/feedback/shm.rs b/hopper-core/src/feedback/shm.rs new file mode 100644 index 0000000..79eee2f --- /dev/null +++ b/hopper-core/src/feedback/shm.rs @@ -0,0 +1,152 @@ +use crate::error; +use std::{ + self, + ops::{Deref, DerefMut}, +}; + +/// Shared memory, used for IPC communication between the fuzzer and testing targets. +pub struct SharedMemory { + /// SHMID + pub id: i32, + /// size of shared memory + pub size: usize, + /// content of shared memory + pub ptr: *mut T, +} + +impl SharedMemory { + /// Create a shared memory at a proper location + pub fn new() -> eyre::Result { + Self::new_at(std::ptr::null()) + } + + /// Create a shared memory at specific location of process memory + pub fn new_at(ptr_base: *const libc::c_void) -> eyre::Result { + let size = std::mem::size_of::(); + let id = unsafe { + libc::shmget( + libc::IPC_PRIVATE, + size, + libc::IPC_CREAT | libc::IPC_EXCL | 0o600, + ) + }; + error::check_os_error(id, "shmget fail")?; + Self::from_id_at(id, ptr_base) + } + + /// Load shared memory by its SHMID + pub fn from_id(id: i32) -> eyre::Result { + Self::from_id_at(id, std::ptr::null()) + } + + /// Load shared memory by its SHMID at specific location of process memory + fn from_id_at(id: i32, ptr_base: *const libc::c_void) -> eyre::Result { + let size = std::mem::size_of::(); + let ptr = unsafe { libc::shmat(id as libc::c_int, ptr_base, 0) as *mut T }; + error::check_os_error(ptr as i64, "shmat fail")?; + Ok(SharedMemory:: { id, size, ptr }) + } + + /// Clear content at shared memory + pub fn clear(&mut self) { + unsafe { libc::memset(self.ptr as *mut libc::c_void, 0, self.size) }; + } + + /// Get unique key for environment + pub fn get_env_var(&self) -> String { + self.id.to_string() + } +} + +pub fn setup_shm() -> eyre::Result> { + let id = match std::env::var(T::shmid_env_var()) { + Ok(s) => Some(s.parse::()?), + Err(_) => None, + }; + let shm = if cfg!(feature = "e9_mode") { + crate::log!(info, "setup {} shm for e9 runtime...", T::name()); + let ret = unsafe { libc::munmap(T::ptr_base() as *mut libc::c_void, T::buf_size()) }; + crate::error::check_os_error(ret, "munmap fail")?; + if let Some(id) = id { + SharedMemory::::from_id_at(id, T::ptr_base())? + } else { + SharedMemory::::new_at(T::ptr_base())? + } + } else { + // llvm or cov mode + if let Some(id) = id { + SharedMemory::::from_id(id)? + } else { + SharedMemory::::new()? + } + }; + crate::log!( + info, + "setup {} shared memory success ! id: {:?}, shm: {:?}", + T::name(), + id, + shm + ); + Ok(shm) +} + +impl Deref for SharedMemory { + type Target = T; + fn deref(&self) -> &Self::Target { + unsafe { &*self.ptr } + } +} + +impl DerefMut for SharedMemory { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.ptr } + } +} + +impl std::fmt::Debug for SharedMemory { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}, {:#x}, {:p}", self.id, self.size, self.ptr) + } +} + +impl Drop for SharedMemory { + fn drop(&mut self) { + let ret = unsafe { libc::shmctl(self.id, libc::IPC_RMID, std::ptr::null_mut()) }; + if let Err(e) = error::check_os_error(ret, "fail to remove shm") { + crate::log!(error, "{}", e); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_u8() { + let mut one = SharedMemory::::new().unwrap(); + *one = 1; + assert_eq!(1, *one); + } + + #[test] + fn test_array() { + let mut arr = SharedMemory::<[u8; 10]>::new().unwrap(); + arr.clear(); + let sl = &mut arr; + assert_eq!(0, sl[4]); + sl[4] = 33; + assert_eq!(33, sl[4]); + } + + #[test] + fn test_shm_fail() { + let arr = SharedMemory::<[u8; 10]>::from_id(88888888); + println!("arr: {arr:?}"); + assert!(arr.is_err()); + let arr = SharedMemory::<[u8; 10]>::new(); + assert!(arr.is_ok()); + let arr2 = SharedMemory::<[u8; 10]>::from_id(arr.unwrap().id); + assert!(arr2.is_ok()); + } +} diff --git a/hopper-core/src/feedback/shm_win.rs b/hopper-core/src/feedback/shm_win.rs new file mode 100644 index 0000000..b4ca598 --- /dev/null +++ b/hopper-core/src/feedback/shm_win.rs @@ -0,0 +1,92 @@ +use super::SHMable; +use std::ops::{Deref, DerefMut}; + +/// Shared memory, used for IPC communication between the fuzzer and testing targets. +pub struct SharedMemory { + /// SHMID + pub handle: crate::execute::Handle, + /// size of shared memory + pub size: usize, + /// content of shared memory + pub ptr: *mut T, +} + +impl SharedMemory { + /// Create a shared memory at a proper location + #[cfg(test)] + pub fn new() -> eyre::Result { + let ptr_base = crate::execute::NULL; + let lp_name = "TEST".to_string(); + Self::new_at(ptr_base, &lp_name) + } + + /// Create a shared memory at specific location of process memory + pub fn new_at(ptr_base: *mut std::os::raw::c_void, lp_name: &str) -> eyre::Result { + let handle = + crate::execute::hopper_create_file_mapping(0, 0x100000, lp_name.as_ptr() as u32)?; + Self::from_id_at(handle, ptr_base) + } + + /// Load shared memory by its SHMID at specific location of process memory + fn from_id_at( + handle: *mut std::os::raw::c_void, + lp_addr: *mut std::os::raw::c_void, + ) -> eyre::Result { + let size = std::mem::size_of::() as usize; + let ptr = match crate::execute::hopper_map_view_of_file_ex(handle, 0, 0, 0, lp_addr) { + Ok(ptr) => ptr as *mut T, + Err(_) => { + // crate::execute::hopper_unmap_view_of_file(ptr_base as crate::execute::PVOID); + // crate::execute::hopper_map_view_of_file_ex(id,0,0,0,ptr_base as *mut std::os::raw::c_void).unwrap() as *mut T + // eyre::bail!("fail to setup shared memory!"); + lp_addr as *mut T + } + }; + Ok(SharedMemory:: { handle, size, ptr }) + } + + /// Clear content at shared memory + pub fn clear(&mut self) { + unsafe { libc::memset(self.ptr as *mut libc::c_void, 0, self.size) }; + } + + /// Get unique key for environment + pub fn get_env_var(&self) -> String { + "".to_string() + } +} + +pub fn setup_shm() -> eyre::Result> { + log::info!("setup {} shm for e9 runtime...", T::name()); + // let area_base = format!("{}_AREA_BASE\x00", crate::config::TASK_NAME); + let lp_name = format!("{}_{}\x00", T::shmid_env_var(),crate::config::TASK_NAME); + let shm = SharedMemory::::new_at(T::ptr_base() as *mut std::os::raw::c_void, &lp_name)?; + log::info!("setup {} shared memory success ! shm: {:?}, lp_name: {}", T::name(), shm, lp_name); + Ok(shm) +} + +impl Deref for SharedMemory { + type Target = T; + fn deref(&self) -> &Self::Target { + unsafe { &*self.ptr } + } +} + +impl DerefMut for SharedMemory { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.ptr } + } +} + +impl std::fmt::Debug for SharedMemory { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?}, {}, {:p}", self.handle, self.size, self.ptr) + } +} + +impl Drop for SharedMemory { + fn drop(&mut self) { + crate::execute::hopper_unmap_view_of_file(self.ptr as crate::execute::Pvoid).unwrap(); + crate::execute::hopper_close_handle(self.handle).unwrap(); + } +} diff --git a/hopper-core/src/fuzz/check.rs b/hopper-core/src/fuzz/check.rs new file mode 100644 index 0000000..89ff5cf --- /dev/null +++ b/hopper-core/src/fuzz/check.rs @@ -0,0 +1,306 @@ +//! Implement program's functions for checking and finding something + +use eyre::ContextCompat; + +use crate::{log, runtime::*}; + +impl FuzzProgram { + /// Check refer-use relationships in statements + /// if a statement is not used by any other, it will be deleted. + pub fn check_ref_use(&mut self) -> eyre::Result<()> { + let len = self.stmts.len(); + log!( + trace, + "start check ref use, len: {}, rng: {:?}", + self.stmts.len(), + self.rng + ); + // log!(trace, "program before check-ref: {}", self.serialize()?); + for i in (0..len).rev() { + if let Some(freed) = &self.stmts[i].freed { + if freed.is_released() { + self.stmts[i].freed = None; + } + } + let is = &self.stmts[i]; + match &is.stmt { + FuzzStmt::Load(_) | FuzzStmt::File(_) => { + if is.index.get_ref_used() <= 1 { + self.delete_stmt(i); + } + } + FuzzStmt::Call(call) => { + if call.is_relative() { + if call.has_reused_args(self).is_none() { + self.delete_stmt(i); + } + } else if !call.is_target() && is.index.get_ref_used() <= 1 { + // remove freed, we should remove weak index first + for j in 0..i { + let is = &mut self.stmts[j]; + if let Some(f_i) = &is.freed { + if !f_i.is_released() && f_i.get() == i { + is.freed = None; + } + } + } + self.delete_stmt(i); + } + } + FuzzStmt::Update(update) => { + if let Some(dst_index) = &update.dst.stmt_index { + if dst_index.get_ref_used() <= 1 { + self.delete_stmt(i); + } + } + } + FuzzStmt::Assert(assert) => { + let can_remove = match &assert.rule { + AssertRule::NonNull { stmt } => { + stmt.is_released() || stmt.get_ref_used() <= 1 + } + AssertRule::Initialized { stmt, call } => { + stmt.is_released() + || call.is_released() + || stmt.get_ref_used() <= 2 + || !self.check_call_initialize_stmt(stmt.get(), call.get()) + } + AssertRule::Eq { stmt, expected: _ } => { + stmt.is_released() + || (stmt.get_ref_used() <= 1 && !self.is_target_index(stmt.get())) + } + AssertRule::Neq { stmt, expected: _ } => { + stmt.is_released() + || (stmt.get_ref_used() <= 1 && !self.is_target_index(stmt.get())) + } + _ => false, + }; + if can_remove { + self.delete_stmt(i); + } + } + _ => {} + } + } + log!(trace, "check ref done"); + self.eliminate_invalidatd_operators(); + self.eliminate_invalidatd_contexts(); + + Ok(()) + } + + /// Check if `index` is target + fn is_target_index(&self, index: usize) -> bool { + if let FuzzStmt::Call(call) = &self.stmts[index].stmt { + return call.is_target(); + } + false + } + + /// Eliminate invalidated operators: + fn eliminate_invalidatd_operators(&mut self) { + self.ops.retain(|op| !op.key.is_released()); + } + + /// Eliminate invalidated contexts: + pub fn eliminate_invalidatd_contexts(&mut self) { + let stmt_uniqs: Vec = self.stmts.iter().map(|is| is.index.get_uniq()).collect(); + for is in &mut self.stmts { + if let FuzzStmt::Call(call) = &mut is.stmt { + call.contexts + .retain(|ctx| stmt_uniqs.contains(&ctx.get_uniq())); + } + if let Some(i) = &is.freed { + if i.is_released() || !stmt_uniqs.contains(&i.get_uniq()) { + is.freed = None; + } + } + } + } + + /// The ret of call may change, so we should remove the update once its dst is disappear + pub fn check_update(&mut self) -> eyre::Result<()> { + let mut to_delete = vec![]; + for i in (0..self.stmts.len()).rev() { + let (prev, rest) = self.stmts.split_at_mut(i); + if let FuzzStmt::Update(update) = &rest[0].stmt { + let dst_i = update.dst.get_index()?.get(); + if let FuzzStmt::Call(call) = &mut prev[dst_i].stmt { + let found = call.ret_ir.iter_mut().find(|ir| { + if ir.fields == update.dst.fields { + return true; + } + if let Some(f) = update.dst.fields.list.strip_prefix(&ir.fields.list[..]) { + return ir.state.get_child_by_fields(f).is_ok(); + } + false + }); + if let Some(ir) = found { + ir.used = Some(rest[0].index.downgrade()); + } else { + // can't find the ir, need remove + crate::log!( + trace, + "can't find {:?} in call ret_ir, remove update: {}", + &update.dst, + i + ); + to_delete.push(i); + } + } + } + } + if !to_delete.is_empty() { + for i in to_delete { + let _stmt = self.stmts.remove(i); + } + self.resort_indices(); + self.check_ref_use()?; + } + Ok(()) + } + + /// Get depth of stub stmt, + /// used for provide properties for mutating + pub fn get_stub_stmt_depth(&self) -> eyre::Result { + let index = self.get_stub_stmt_index().context("can't find stub")?; + let depth = get_stmt_depth(index.get(), self); + Ok(depth) + } + + /// If the opaque pointer has been inited in the program + pub fn has_been_inited(&self, opaque_ptr: &StmtIndex) -> Option { + for is in self.stmts[opaque_ptr.get()..].iter() { + if let FuzzStmt::Assert(assert) = &is.stmt { + if let AssertRule::Initialized { stmt, call } = &assert.rule { + if stmt.get() == opaque_ptr.get() { + return Some(call.get()); + } + } + } + } + None + } + + /// check if call is used to initialize the statement + pub fn check_call_initialize_stmt(&self, stmt: usize, call: usize) -> bool { + if let FuzzStmt::Call(cur_call) = &self.stmts[call].stmt { + if cur_call.is_related_call_for_ptee(stmt, self) { + return true; + } + } + false + } + + /// If the location is a null pointer + pub fn is_loc_null(&self, loc: &Location) -> bool { + let index = loc.get_index().unwrap().get(); + if let FuzzStmt::Load(load) = &self.stmts[index].stmt { + match load.state.get_child_by_fields(loc.fields.as_slice()) { + Ok(state) => { + if let Some(c) = state.children.first() { + return c.is_null(); + } + return state.is_null(); + } + Err(crate::HopperError::UnionErr) => { + return false; + } + Err(he) => unreachable!( + "unable to handle `{he:?}` in is_loc_null, p: \n{self}\nloc:{loc:?}", + ), + } + } + false + } + + /// Check if it is file or not + pub fn is_file_loc(&self, loc: &Location) -> bool { + if loc.is_null() { + return false; + } + let dst_stmt = &self.stmts[loc.get_index().unwrap().get()].stmt; + if loc.fields.is_empty() && matches!(dst_stmt, FuzzStmt::File(_)) { + return true; + } + if let FuzzStmt::Load(load) = dst_stmt { + match load.state.get_child_by_fields(loc.fields.as_slice()) { + Ok(state) => { + if let Some(ps) = state.pointer.as_ref() { + if let Some(stmt_index) = ps.pointer_location.stmt_index.as_ref() { + return matches!(&self.stmts[stmt_index.get()].stmt, FuzzStmt::File(_)); + } + } + } + Err(crate::HopperError::UnionErr) => { + return false; + } + Err(he) => unreachable!( + "unable to handle `{he:?}` in is_file_loc, p: \n{self}\nloc:{loc:?}", + ), + } + } + false + } +} + +/// get depth of current statement +fn get_stmt_depth(index: usize, program: &FuzzProgram) -> usize { + let mut depth = 0; + let mut cur = index; + for is in &program.stmts[index..] { + match &is.stmt { + FuzzStmt::Call(call) => { + if call.args.iter().any(|i| i.get() == cur) { + depth += 1; + cur = is.index.get(); + } + } + FuzzStmt::File(file) => { + if let Some(i) = &file.buf_stmt { + if i.get() == cur { + cur = is.index.get(); + } + } + } + FuzzStmt::Update(update) => { + if update.src.get() == cur { + cur = update.dst.get_index().unwrap().get(); + } + } + FuzzStmt::Load(load) => { + if load + .state + .find_any_stmt_in_state_with(|ptee| ptee.get() == cur) + { + depth += 1; + cur = is.index.get(); + } + } + _ => {} + } + } + depth +} + +#[test] +fn test_program_detph() { + let mut p = FuzzProgram::default(); + let load_index = p.append_stmt(LoadStmt::new( + Box::new(0_u8), + LoadStmt::new_state("val", ""), + )); + let mut ptr_state = LoadStmt::new_state("ptr", ""); + let ptr = crate::FuzzMutPointer::::loc_pointer(&mut ptr_state, Location::stmt(load_index)); + let ptr_index = p.append_stmt(LoadStmt::new(Box::new(ptr), ptr_state)); + let fg = global_gadgets::get_instance() + .get_func_gadget("func_add") + .unwrap() + .clone(); + let mut call = CallStmt::new("call".to_string(), "func_add".to_string(), fg); + call.set_arg(0, ptr_index); + let _call_index = p.append_stmt(call); + assert_eq!(get_stmt_depth(0, &p), 2); + assert_eq!(get_stmt_depth(1, &p), 1); + assert_eq!(get_stmt_depth(2, &p), 0); +} diff --git a/hopper-core/src/fuzz/constraints/constraint.rs b/hopper-core/src/fuzz/constraints/constraint.rs new file mode 100644 index 0000000..69c6142 --- /dev/null +++ b/hopper-core/src/fuzz/constraints/constraint.rs @@ -0,0 +1,531 @@ +use super::*; +use hopper_derive::{EnumKind, Serde}; + +use crate::{config, runtime::*, utils, EnumKind}; + +#[derive(Debug, Clone, Serde, PartialEq, Eq, EnumKind)] +pub enum Constraint { + NonNull, + SetNull, + NeedInit, + File { read: bool, is_fd: bool }, + SetVal { val: IrEntry }, + Range { min: IrEntry, max: IrEntry }, + CastFrom { cast_type: String }, + ArrayLength { len: IrEntry }, + RetFrom { ret_f: String }, + UseUnionMember { member: String }, + InitWith { f_name: String, arg_pos: usize }, + LengthFactor { coef: u64 }, + OpaqueType, + Context { context: CallContext }, + NonZero, + None, +} + +/// Function constraint +#[derive(Debug, Default, Clone, Serde)] +pub struct FuncConstraint { + /// can succeed with refined mutation + pub can_succeed: bool, + /// fail after insert it + pub insert_fail: bool, + /// Internal funciton + pub internal: bool, + /// constraints of argument + pub arg_constraints: Vec, + /// group for related args + pub arg_group: Vec, + /// contexts + pub contexts: Vec, + // the effect caused by the function + pub role: role::FuncRole, + // Return's type + pub ret: ret::RetType, +} + +/// Type constraint +#[derive(Debug, Default, Clone, Serde)] +pub struct TypeConstraint { + pub list: Vec, +} + +#[derive(Debug, Clone, Serde)] +pub struct TypeConstraintItem { + pub key: LocFields, + pub constraint: Constraint, +} + +/// Signature of a constraint +#[derive(Debug, Clone)] +pub struct ConstraintSig { + pub f_name: String, + pub arg_pos: usize, + pub fields: LocFields, + pub constraint: Constraint, +} + +impl Constraint { + pub fn should_refine_first(&self) -> bool { + matches!(self, Self::SetNull | Self::NonNull | Self::CastFrom { .. }) + } + + pub fn should_not_mutate(&self) -> bool { + matches!( + self, + Self::SetNull + | Self::SetVal { val: _ } + | Self::File { read: _, is_fd: _ } + | Self::RetFrom { ret_f: _ } + ) + } + + pub fn resource_related() -> Self { + Self::Range { + min: 0_u64.into(), + max: config::MAX_RANGE_NUM.into(), + } + } + + pub fn less_than(entry: IrEntry) -> Self { + Self::Range { + min: 0_u64.into(), + max: entry, + } + } + + pub fn should_be(entry: IrEntry) -> Self { + Self::SetVal { val: entry } + } + + pub fn is_void_cast(&self) -> bool { + if let Constraint::CastFrom { cast_type } = self { + if cast_type == "hopper::runtime::FuzzMutPointer" { + return true; + } + } + false + } + + pub fn get_length_loc(&self) -> Option<(usize, &LocFields)> { + match self { + Self::SetVal { + val: + IrEntry::Length { + arg_pos, + fields, + is_factor: _, + }, + } => Some((arg_pos.unwrap_or_default(), fields)), + Self::Range { + min: _, + max: + IrEntry::Length { + arg_pos, + fields, + is_factor: _, + }, + } => Some((arg_pos.unwrap_or_default(), fields)), + _ => None, + } + } + + pub fn shrink_range(&mut self) { + if let Self::Range { + min: _, + max: IrEntry::Constant(val), + } = self + { + *val /= 2; + } + } +} + +impl FuncConstraint { + /// Init function's constraints + pub fn init(f_name: &str) -> eyre::Result { + let arg_types = global_gadgets::get_instance() + .get_func_gadget(f_name)? + .arg_types; + let len = if utils::is_variadic_function(arg_types) { + arg_types.len() - 1 + } else { + arg_types.len() + }; + let internal = f_name.starts_with(FN_POINTER_PREFIX); + let mut fc = FuncConstraint { + arg_constraints: vec![TypeConstraint::default(); len], + arg_group: (0..len).collect(), + internal, + ..Default::default() + }; + fc.ret.infer(f_name)?; + literal::infer_func_by_literal(&mut fc, f_name)?; + Ok(fc) + } + + /// If it is successful in executing + pub fn is_success(&self) -> bool { + #[cfg(test)] + return true; + #[cfg(not(test))] + return self.can_succeed && !self.insert_fail && !self.internal; + } + + /// check `f_name` and `arg_pos` is forbidden context or not + pub fn is_forbidden_ctx(&self, f_name: &str, arg_pos: Option) -> bool { + for ctx in &self.contexts { + if ctx.f_name == f_name && ctx.related_arg_pos == arg_pos { + return ctx.is_forbidden(); + } + } + false + } + + /// Set ith-arg 's constraint + pub fn set_arg_constraint( + &mut self, + f_name: &str, + arg_pos: usize, + constraint: Constraint, + ) -> Option { + self.set_constraint(f_name, arg_pos, LocFields::default(), constraint) + } + + /// Set ith-arg 's constraint with key + pub fn set_constraint( + &mut self, + f_name: &str, + arg_pos: usize, + fields: LocFields, + constraint: Constraint, + ) -> Option { + self.group_related_args(arg_pos, &constraint); + if self.arg_constraints[arg_pos].set_constraint(fields.clone(), constraint.clone()) { + crate::log!( + info, + "add constraint on function `{f_name}` 's {arg_pos}-th arg, fields: {}, constraint {constraint:?}", + fields.serialize().unwrap() + ); + return Some(ConstraintSig { + f_name: f_name.to_string(), + arg_pos, + fields, + constraint, + }); + } + crate::log!(trace, "constraint {constraint:?} exists"); + None + } + + /// Check if the function's return can be used for arguments or fields + /// we avoid use static and unwriteable pointers to be arguments + #[inline] + pub fn can_used_as_arg(&self) -> bool { + self.can_succeed + && !self.insert_fail + && self.role.can_used_as_arg() + && self.ret.can_used_as_arg() + } + + /// Group arguments that related + pub fn group_related_args(&mut self, arg_pos: usize, c: &Constraint) { + match c { + Constraint::SetVal { val } => { + self.union_related_args(val, arg_pos); + } + Constraint::Range { min, max } => { + self.union_related_args(min, arg_pos); + self.union_related_args(max, arg_pos); + } + _ => {} + } + } + + fn union_related_args(&mut self, entry: &IrEntry, cur_i: usize) { + if let IrEntry::Length { + arg_pos: Some(arg_pos_inner), + fields: _, + is_factor: _, + } = entry + { + if *arg_pos_inner != cur_i { + self.arg_group[cur_i] = *arg_pos_inner; + } + } + } + + /// Get related arguments + pub fn get_related_args(&self, arg_pos: usize) -> Vec { + let group = self.arg_group[arg_pos]; + let mut args = vec![]; + for (p, i) in self.arg_group.iter().enumerate() { + if *i == group && p != arg_pos { + args.push(p); + } + } + args + } + + pub fn is_file(&self, arg_pos: usize) -> bool { + self.arg_constraints[arg_pos] + .list + .iter() + .any(|item| matches!(item.constraint, Constraint::File { .. })) + } +} + +impl TypeConstraint { + pub fn init(type_name: &str) -> Self { + let mut tc = TypeConstraint::default(); + if let Err(e) = literal::infer_type_by_literal(&mut tc, type_name) { + crate::log!(warn, "infer {type_name} error: {}", e); + } + tc + } + + pub fn set_constraint(&mut self, key: LocFields, constraint: Constraint) -> bool { + // crate::log!(trace, "set constraint: {constraint:?}"); + for item in self.list.iter_mut() { + if item.key != key { + continue; + } + // skip the same constraint + if item.constraint == constraint { + return false; + } + // merge constraint + // choose the minimal bound + match &constraint { + Constraint::NonZero => { + if let Constraint::Range { min, max: _ } = &mut item.constraint { + if let IrEntry::Constant(val) = min { + if *val == 0 { + *min = 1.into(); + return true; + } + } + return false; + } + } + Constraint::Range { min, max } => { + // if let Constraint::SetVal { val: _ } = &item.constraint { + // } + if matches!(item.constraint, Constraint::NonZero) { + if let IrEntry::Constant(val) = min { + if *val == 0 { + item.constraint = Constraint::Range { + min: 1.into(), + max: max.clone(), + }; + return true; + } + } + return false; + } + } + _ => {} + } + // update constraint + if item.constraint.kind() == constraint.kind() { + match constraint { + Constraint::ArrayLength { len: new_len } => { + if let Constraint::ArrayLength { len } = &mut item.constraint { + if let IrEntry::Constant(len_val) = len { + if let IrEntry::Constant(new_len_val) = new_len { + if new_len_val > *len_val { + *len_val = new_len_val; + return true; + } + } + } + if new_len.is_length() { + *len = new_len; + return true; + } + } + return false; + } + Constraint::LengthFactor { coef: new_coef } => { + // update new one + if let Constraint::LengthFactor { coef } = &mut item.constraint { + if new_coef > *coef { + *coef = new_coef; + return true; + } + } + return false; + } + Constraint::File { + read: new_read, + is_fd: _, + } => { + if let Constraint::File { read, is_fd: _ } = &mut item.constraint { + if new_read && !*read { + *read = new_read; + return true; + } + } + return false; + } + Constraint::SetVal { ref val } => { + if let Constraint::SetVal { val: val2 } = &item.constraint { + if val.equal(val2) { + return false; + } + } + continue; + } + Constraint::Range { ref min, ref max } => { + if let Constraint::Range { + min: min2, + max: max2, + } = &item.constraint + { + if min.equal(min2) && max.equal(max2) { + return false; + } + if min.less(min2) && max.greater(max2) { + return false; + } + } + continue; + } + _ => {} + } + // exists + return false; + } + } + if constraint.should_not_mutate() { + self.list.retain(|item| item.key != key); + } + if constraint.should_refine_first() { + let field_len = key.len(); + let mut off = 0; + for c in self.list.iter() { + if !c.constraint.should_refine_first() || field_len <= c.key.len() { + break; + } + off += 1; + } + self.list + .insert(off, TypeConstraintItem { key, constraint }); + } else { + self.list.push(TypeConstraintItem { key, constraint }); + } + true + } +} + +impl Serialize for ConstraintSig { + fn serialize(&self) -> eyre::Result { + if self.fields.is_empty() { + return Ok(format!( + "{}[${}] = {}", + self.f_name, + self.arg_pos, + self.constraint.serialize()? + )); + } + Ok(format!( + "{}[${}][{}] = {}", + self.f_name, + self.arg_pos, + self.fields.serialize()?, + self.constraint.serialize()? + )) + } +} + +impl Deserialize for ConstraintSig { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + let f_name = de.parse_string()?; + de.eat_token("[$")?; + let arg_pos = de.parse_number()?; + de.eat_token("]")?; + let fields = if de.strip_token("[") { + let loc = LocFields::deserialize(de)?; + de.eat_token("]")?; + loc + } else { + LocFields::default() + }; + de.eat_token("= ")?; + let constraint = constraint::Deserialize::deserialize(de)?; + Ok(Self { + f_name, + arg_pos, + fields, + constraint, + }) + } +} + +#[test] +fn test_constraint_sig_serde() -> eyre::Result<()> { + let constraint_sig = ConstraintSig { + f_name: "test_constraint_sig_serde".to_owned(), + arg_pos: 2, + fields: LocFields::default(), + constraint: Constraint::SetVal { + val: IrEntry::Constant(64), + }, + }; + let ser_str = constraint_sig.serialize()?; + println!("{ser_str}"); + let mut de = Deserializer::new(&ser_str, None); + let de_sig = ConstraintSig::deserialize(&mut de)?; + assert_eq!(de_sig.f_name, "test_constraint_sig_serde"); + assert_eq!(de_sig.arg_pos, 2); + assert_eq!(de_sig.fields, LocFields::default()); + assert_eq!( + de_sig.constraint, + Constraint::SetVal { + val: IrEntry::Constant(64) + } + ); + Ok(()) +} + +#[test] +fn test_parse_sig() -> eyre::Result<()> { + let sig = "png_malloc_default[$1] = Range${ min: Constant${ f0: 0, }, max: Constant${ f0: 1024, }, }, "; + let mut de = Deserializer::new(sig, None); + let de_sig = ConstraintSig::deserialize(&mut de)?; + println!("{de_sig:?}"); + Ok(()) +} + +#[test] +fn test_insert_constraint() { + let mut tc = TypeConstraint::default(); + let key = LocFields::new(vec![FieldKey::Index(0)]); + let length_constraint = Constraint::SetVal { + val: IrEntry::Length { + arg_pos: Some(0), + fields: LocFields::default(), + is_factor: false, + }, + }; + let suc = tc.set_constraint(key.clone(), length_constraint.clone()); + assert!(suc); + let suc = tc.set_constraint(key.clone(), length_constraint.clone()); + assert!(!suc); + let range_constraint = Constraint::Range { + min: 0.into(), + max: 100.into(), + }; + let suc = tc.set_constraint(key.clone(), range_constraint.clone()); + assert!(suc); + let small_range_constraint = Constraint::Range { + min: 0.into(), + max: 50.into(), + }; + let suc = tc.set_constraint(key.clone(), small_range_constraint.clone()); + assert!(suc); + let suc = tc.set_constraint(key.clone(), range_constraint.clone()); + assert!(!suc); + + +} diff --git a/hopper-core/src/fuzz/constraints/context.rs b/hopper-core/src/fuzz/constraints/context.rs new file mode 100644 index 0000000..ae4ecea --- /dev/null +++ b/hopper-core/src/fuzz/constraints/context.rs @@ -0,0 +1,55 @@ +use hopper_derive::Serde; + +#[derive(Debug, Clone, Serde, PartialEq, Eq)] +pub struct CallContext { + pub f_name: String, + pub related_arg_pos: Option, + pub kind: ContextKind +} + +#[derive(Debug, Clone, Serde, PartialEq, Eq)] +pub enum ContextKind { + Required, + Prefered, + Forbidden +} + +impl CallContext { + pub fn from_rule(de: &mut crate::Deserializer) -> eyre::Result{ + let related_arg_pos = if de.strip_token("-") || de.strip_token("*") { + None + } else { + de.eat_token("$")?; + let arg_i: usize = de.parse_number()?; + Some(arg_i) + }; + de.eat_token("]")?; + de.eat_token("<-")?; + let mut kind = ContextKind::Required; + let mut f_name = de.buf.trim().trim_end_matches(';'); + if let Some(f) = f_name.strip_prefix('!') { + f_name = f.trim(); + kind = ContextKind::Forbidden; + } else if let Some(f) = f_name.strip_suffix('?') { + f_name = f.trim(); + kind = ContextKind::Prefered; + } + Ok(Self { + f_name: f_name.to_string(), + related_arg_pos, + kind, + }) + } + + pub fn is_required(&self) -> bool { + matches!(self.kind, ContextKind::Required) + } + + pub fn is_preferred(&self) -> bool { + matches!(self.kind, ContextKind::Prefered) + } + + pub fn is_forbidden(&self) -> bool { + matches!(self.kind, ContextKind::Forbidden) + } +} diff --git a/hopper-core/src/fuzz/constraints/entry.rs b/hopper-core/src/fuzz/constraints/entry.rs new file mode 100644 index 0000000..ec41fd1 --- /dev/null +++ b/hopper-core/src/fuzz/constraints/entry.rs @@ -0,0 +1,251 @@ +use eyre::ContextCompat; +use hopper_derive::Serde; + +use crate::{FieldKey, LocFields}; + +#[derive(Debug, Clone, Serde, PartialEq, Eq)] +pub enum IrEntry { + Min(i32), + Max(i32), + Constant(u64), + String(String), + Length { + arg_pos: Option, + fields: LocFields, + is_factor: bool, + }, + Location { + arg_pos: Option, + fields: LocFields, + }, +} + +impl IrEntry { + pub fn arg_length(arg_pos: usize) -> IrEntry { + Self::Length { + arg_pos: Some(arg_pos), + fields: LocFields::default(), + is_factor: false, + } + } + + pub fn field_length(field_key: FieldKey) -> IrEntry { + Self::Length { + arg_pos: None, + fields: LocFields::new(vec![field_key]), + is_factor: false, + } + } + + pub fn is_fixed(&self) -> bool { + matches!(self, Self::Constant(_) | Self::String(_)) + } + + pub fn is_factor(&self) -> bool { + if let Self::Length { + arg_pos: _, + fields: _, + is_factor, + } = self + { + *is_factor + } else { + false + } + } + + pub fn is_length(&self) -> bool { + matches!( + self, + Self::Length { + arg_pos: _, + fields: _, + is_factor: _ + } + ) + } + + pub fn get_location_from_any(&self) -> Option<(&Option, &LocFields)> { + match self { + Self::Location { arg_pos, fields } => Some((arg_pos, fields)), + Self::Length { arg_pos, fields, is_factor: _ } => Some((arg_pos, fields)), + _ => None + } + } + + /// compare without consider `is_factor` + pub fn equal(&self, other: &Self) -> bool { + if let Self::Length { + arg_pos: arg_pos1, + fields: fields1, + is_factor: _, + } = self + { + if let Self::Length { + arg_pos: arg_pos2, + fields: fields2, + is_factor: _, + } = other + { + return arg_pos1 == arg_pos2 && fields1 == fields2; + } + } + self == other + } + + /// If `self` is less or equal than `other` + /// we assume the length is not less than constant + pub fn less(&self, other: &Self) -> bool { + if let Self::Constant(val) = self { + if let Self::Constant(val2) = other { + return val <= val2; + } + } + other.is_length() + } + + /// If `self` is greater or equal than `other` + /// we assume the length is not greater than constant + pub fn greater(&self, other: &Self) -> bool { + if let Self::Constant(val) = self { + if let Self::Constant(val2) = other { + return val >= val2; + } + } + other.is_length() + } + + pub fn from_rule(de: &mut crate::Deserializer) -> eyre::Result { + if de.strip_token("MIN") { + de.trim_start(); + let mut offset = 0; + if de.strip_token("+") { + offset = de.parse_number()?; + } + return Ok(Self::Min(offset)); + } else if de.strip_token("MAX") { + de.trim_start(); + let mut offset = 0; + if de.strip_token("-") { + offset = de.parse_number()?; + } + return Ok(Self::Max(offset)); + } + let c = de.peek_char().context("has char")?; + match c { + '$' => { + de.eat_token("$")?; + let is_len = de.strip_token("len("); + let mut arg_pos = None; + let mut fields = LocFields::default(); + if de.strip_token("$") || de.peek_char().filter(|c| c.is_ascii_digit()).is_some() { + arg_pos = Some(de.parse_number()?); + } + if arg_pos.is_none() && de.strip_token("[") && de.strip_token("$") { + arg_pos = Some(de.parse_number()?); + de.eat_token("]")?; + if de.strip_token("[") { + fields = LocFields::from_rule(de.next_token_until("]")?)?; + } + } + if arg_pos.is_none() && fields.is_empty() && !de.buf.is_empty() { + fields = LocFields::from_rule(de.buf)?; + } + if is_len { + Ok(Self::Length { + arg_pos, + fields, + is_factor: false, + }) + } else { + Ok(Self::Location { arg_pos, fields }) + } + } + '[' => { + de.eat_token("[")?; + de.eat_token("$")?; + let arg_pos = Some(de.parse_number()?); + de.eat_token("]")?; + let fields = if de.strip_token("[") { + LocFields::from_rule(de.next_token_until("]")?)? + } else { + LocFields::default() + }; + Ok(Self::Location { arg_pos, fields }) + } + '"' => { + de.eat_token("\"")?; + let token = de.next_token_until("\"")?; + Ok(Self::String(token.to_string())) + } + '0'..='9' | '-' => { + let val = de.parse_number()?; + Ok(Self::Constant(val)) + } + _ => { + eyre::bail!("wrong rule fo ir entry: {}", de.buf); + } + } + } +} + +impl From for IrEntry { + fn from(i: u64) -> Self { + Self::Constant(i) + } +} + +impl From for IrEntry { + fn from(i: usize) -> Self { + Self::Constant(i as u64) + } +} + +impl From for IrEntry { + fn from(i: i32) -> Self { + Self::Constant(i as u64) + } +} + +#[test] +fn test_parse_entry() { + let ret = IrEntry::from_rule(&mut crate::Deserializer::new("$0", None)).unwrap(); + assert_eq!( + ret, + IrEntry::Location { + arg_pos: Some(0), + fields: LocFields::default() + } + ); + let ret = IrEntry::from_rule(&mut crate::Deserializer::new("[$1][&.$0.name]", None)).unwrap(); + assert_eq!( + ret, + IrEntry::Location { + arg_pos: Some(1), + fields: LocFields { + list: vec![FieldKey::Pointer, 0.into(), "name".into()] + } + } + ); + let ret = IrEntry::from_rule(&mut crate::Deserializer::new("$len($1)", None)).unwrap(); + assert_eq!( + ret, + IrEntry::Length { + arg_pos: Some(1), + fields: LocFields::default(), + is_factor: false + } + ); + let ret = + IrEntry::from_rule(&mut crate::Deserializer::new("$len([$1][&.$0.name])", None)).unwrap(); + assert_eq!( + ret, + IrEntry::Length { + arg_pos: Some(1), + fields: LocFields { + list: vec![FieldKey::Pointer, 0.into(), "name".into()] + }, + is_factor: false + } + ); +} diff --git a/hopper-core/src/fuzz/constraints/internal.rule b/hopper-core/src/fuzz/constraints/internal.rule new file mode 100644 index 0000000..a3f2596 --- /dev/null +++ b/hopper-core/src/fuzz/constraints/internal.rule @@ -0,0 +1,6 @@ +// used for generate FILE* +func_internal fopen +func fopen[$0] = $read_file; +func fopen[$1] = $non_null; +func fopen[$1] = "rb+"; +type _IO_FILE = $opaque \ No newline at end of file diff --git a/hopper-core/src/fuzz/constraints/literal.rs b/hopper-core/src/fuzz/constraints/literal.rs new file mode 100644 index 0000000..7108f6a --- /dev/null +++ b/hopper-core/src/fuzz/constraints/literal.rs @@ -0,0 +1,227 @@ +//! Literal meanings for function names, arguemnt names .. +//! +//! Literal inference is used for extracting information for debugging and testing, +//! we do not use litaral to infer constraints by default now. + +use super::*; +use crate::{runtime::*, utils}; + +// Enable infer constraints by literal +pub const ENABLE_LITERAL: bool = false || cfg!(test); + +fn is_len_ident(ident: &str, prev_ident_holder: Option<&str>) -> bool { + let related_ident = if let Some(prev_ident) = prev_ident_holder { + ident.starts_with(&format!("n{prev_ident}")) + || ident.starts_with(&format!("num{prev_ident}")) + } else { + false + }; + related_ident + || ident.ends_with("Len") + || ident.ends_with("len") + || ident.ends_with("length") + || ident.ends_with("Length") + || ident.ends_with("count") + || ident.ends_with("Count") + || ident.ends_with("size") + || ident.ends_with("Size") + || ident.ends_with("_sz") + || ident.starts_with("num_") +} + +fn is_index_ident(ident: &str) -> bool { + ident.ends_with("idx") + || ident.ends_with("index") + || ident.ends_with("Index") + || ident.ends_with("which") + || ident.ends_with("offset") + || ident.ends_with("pos") +} + +fn is_file_name(ident: &str) -> bool { + ident.ends_with("file_name") + || ident.ends_with("filename") + || ident.ends_with("FileName") + || ident.ends_with("Filename") + || ident.ends_with("file") + || ident.ends_with("f_name") + || ident.ends_with("fname") +} + +fn function_may_save_file(f_name: &str) -> bool { + f_name.contains("Write") + || f_name.contains("write") + || f_name.contains("Save") + || f_name.contains("save") +} + +pub fn is_init_function_by_name(f_name: &str) -> bool { + f_name.contains("init") + || f_name.contains("Init") + || f_name.contains("create") + || f_name.contains("Create") + || f_name.contains("open") + || f_name.contains("Open") +} + +fn is_dealloc_function(f_name: &str) -> bool { + f_name.contains("close") + || f_name.contains("Close") + || f_name.contains("free") + || f_name.contains("Free") + || f_name.contains("delete") + || f_name.contains("Delete") + || f_name.contains("destroy") + || f_name.contains("Destroy") +} + +pub fn is_backtracing_field(field_name: &str) -> bool { + field_name.contains("parent") || field_name == "pre" || field_name.starts_with("prev") +} + +pub fn infer_func_by_literal(fc: &mut FuncConstraint, f_name: &str) -> eyre::Result<()> { + #[cfg(not(test))] + if !crate::config::ENABLE_REFINE || !ENABLE_LITERAL { + return Ok(()); + } + let fg = global_gadgets::get_instance().get_func_gadget(f_name)?; + if is_dealloc_function(fg.f_name) { + fc.role.free_arg = true; + } + if is_init_function_by_name(fg.f_name) { + fc.role.init_arg = true; + } + let mut prev_ptr = false; + let mut prev_ident = None; + for (i, &ident) in fg.arg_idents.iter().enumerate() { + let arg_type_name = fg.arg_types[i]; + if is_file_name(ident) { + let (is_buf, _) = utils::is_c_str_type(arg_type_name); + if is_buf { + let may_read = !function_may_save_file(f_name); + let c = Constraint::File { read: may_read, is_fd: false }; + let ret = fc.set_arg_constraint(f_name, i, c); + if let Some(c) = &ret { + log_new_constraint(&format!("{c:?}, literal infer func arg is file")); + } + } + } + + if utils::is_primitive_type(arg_type_name) { + let is_len = is_len_ident(ident, prev_ident); + let is_index = is_index_ident(ident); + if is_len || is_index { + let c = if prev_ptr { + if is_len { + Constraint::should_be(IrEntry::arg_length(i - 1)) + } else { + Constraint::less_than(IrEntry::arg_length(i - 1)) + } + } else { + Constraint::resource_related() + }; + let ret = fc.set_arg_constraint(f_name, i, c); + if let Some(c) = &ret { + log_new_constraint(&format!("{c:?}, literal infer func arg is len/index")); + } + } + } + prev_ptr = utils::is_pointer_type(arg_type_name); + prev_ident = Some(ident); + } + Ok(()) +} + +pub fn infer_type_by_literal(tc: &mut TypeConstraint, type_name: &str) -> eyre::Result<()> { + #[cfg(not(test))] + if !crate::config::ENABLE_REFINE || !ENABLE_LITERAL { + return Ok(()); + } + if !utils::is_custom_type(type_name) || utils::is_opaque_type(type_name) { + return Ok(()); + } + let type_name = utils::get_static_ty(type_name); + crate::log!(info, "literal infer type `{}`..", type_name); + let mut prev_ptr: Option = None; + let mut prev_ident: Option<&str> = None; + let mut state = ObjectState::root("infer", type_name); + let builder = global_gadgets::get_instance().get_object_builder(type_name)?; + let value = builder.generate_new(&mut state)?; + let mut layout = value.get_layout(true); + let num_fields = layout.fields.len(); + if num_fields > 1 { + // to infer fields that use length first, e.g. { len, buf } + if num_fields == 2 { + if let Some(sub_layout) = layout.fields.first() { + if let FieldKey::Field(_) = &sub_layout.key { + if utils::is_index_or_length_number(sub_layout.type_name) { + layout.fields.reverse(); + } + } + } + } + for sub_layout in layout.fields.iter() { + if let FieldKey::Field(f) = &sub_layout.key { + if utils::is_index_or_length_number(sub_layout.type_name) { + let is_len = is_len_ident(f, prev_ident); + let is_index = is_index_ident(f); + if is_len || is_index { + let index_field = &sub_layout.key; + let c = if let Some(ptr_f) = &prev_ptr { + let ptr_field = ptr_f.clone(); + if is_len { + Constraint::should_be(IrEntry::field_length(ptr_field)) + } else { + Constraint::less_than(IrEntry::field_length(ptr_field)) + } + } else { + Constraint::resource_related() + }; + let mut fields = LocFields::default(); + fields.push(index_field.clone()); + let comment = format!("type: {type_name}, fields: {fields:?}, c: {c:?}, literal infer type is len/index"); + if tc.set_constraint(fields, c) { + log_new_constraint(&comment); + } + } + } + if utils::is_pointer_type(sub_layout.type_name) { + prev_ptr = Some(sub_layout.key.clone()); + prev_ident = Some(f); + } + } + } + } + Ok(()) +} + +#[test] +fn test_literal() { + println!("gadgets: {:?}", global_gadgets::get_instance()); + let target = "test_arr"; + let fg = global_gadgets::get_instance().get_func_gadget(target); + println!("fg: {fg:?}"); + CONSTRAINTS.with(|c| { + let mut c = c.borrow_mut(); + c.init_func_constraint(target).unwrap(); + let f_constraint = c.get_func_constraint(target).unwrap(); + assert_eq!( + f_constraint.arg_constraints[1].list[0].constraint, + Constraint::should_be(IrEntry::arg_length(0)) + ); + }); + + let type_name = "hopper::test::ArrayWrap"; + CONSTRAINTS.with(|c| { + let mut c = c.borrow_mut(); + let tc = c.get_type_constraint_mut(type_name); + assert_eq!( + tc.list[0].key, + LocFields::new(vec![FieldKey::Field("len".to_string())]) + ); + assert_eq!( + tc.list[0].constraint, + Constraint::should_be(IrEntry::field_length(FieldKey::Field("p".to_string()))) + ); + }); +} diff --git a/hopper-core/src/fuzz/constraints/mod.rs b/hopper-core/src/fuzz/constraints/mod.rs new file mode 100644 index 0000000..5a89554 --- /dev/null +++ b/hopper-core/src/fuzz/constraints/mod.rs @@ -0,0 +1,366 @@ +mod constraint; +mod context; +mod entry; +pub mod literal; +mod parse; +mod ret; +mod role; + +pub use constraint::*; +pub use context::*; +pub use entry::*; +pub use role::*; + +use std::cell::RefCell; +use std::collections::HashMap; + +use crate::{config, global_gadgets, log, LocFields, FN_POINTER_PREFIX}; + +pub const UNION_ROOT: &str = "__UNION"; + +/// Constriants for mutation +#[derive(Debug, Default)] +pub struct Constraints { + /// constraints for fucntions + pub func_constraints: HashMap, + /// Constraints for types (Generic) + pub type_constraints: HashMap, +} + +thread_local! { + pub static CONSTRAINTS: RefCell = RefCell::new(Constraints::default()); +} + +pub fn init_constraints() -> eyre::Result { + log!(info, "init constraint..."); + // load configuration file if it exists + let constraint_file = config::constraint_file_path(); + CONSTRAINTS.with(|constraints| { + let mut c = constraints.borrow_mut(); + if constraint_file.exists() { + c.read_from_config(&constraint_file)?; + c.read_from_custom()?; + log!(info, "load constraints from config file"); + return Ok(true); + } + c.read_internal_config()?; + // select functions for inference + for (f_name, fg) in global_gadgets::get_instance().functions.iter() { + if config::get_config().match_func(f_name) { + log!(info, "init constraint for {f_name}"); + c.init_func_constraint(f_name)?; + // check arguments + for &arg_type in fg.arg_types { + let mut check_type = arg_type; + if let Some(inner) = crate::get_pointer_inner(arg_type) { + check_type = inner; + } + if crate::is_custom_type(check_type) { + c.init_type_constraint(check_type)?; + } + } + } + } + c.read_from_custom()?; + Ok(false) + }) +} + +pub fn save_constraints_to_file() -> eyre::Result<()> { + CONSTRAINTS.with(|c| c.borrow().save_to_file(&config::constraint_file_path())) +} + +/// Filter function that can be inserted into program +#[inline] +pub fn filter_function(f_name: &str) -> bool { + CONSTRAINTS.with(|c| { + c.borrow() + .get_func_constraint(f_name) + .map_or(false, |c| c.is_success()) + }) +} + +#[inline] +pub fn filter_target_function(f_name: &str) -> bool { + if let Some(target) = config::get_config().func_target { + if f_name != target { + return false; + } + } + CONSTRAINTS.with(|c| { + c.borrow() + .get_func_constraint(f_name) + .map_or(false, |c| c.is_success()) + }) +} + +#[inline] +pub fn filter_fn_pointer(f_name: &str) -> bool { + f_name.starts_with(FN_POINTER_PREFIX) +} + +#[inline] +pub fn filter_function_constraint_with bool>( + f_name: &str, + filter: F, +) -> bool { + CONSTRAINTS.with(|c| c.borrow().get_func_constraint(f_name).map_or(false, filter)) +} + +pub fn filter_function_field_constraint_with bool>( + f_name: &str, + arg_pos: usize, + fields: &LocFields, + mut filter: F, +) -> bool { + filter_function_constraint_with(f_name, |fc| { + fc.arg_constraints[arg_pos] + .list + .iter() + .any(|tc| &tc.key == fields && filter(&tc.constraint)) + }) +} + +pub fn inspect_function_constraint_with eyre::Result>( + f_name: &str, + mut callback: F, +) -> eyre::Result { + CONSTRAINTS.with(|c| { + if let Some(fc) = c.borrow().get_func_constraint(f_name) { + callback(fc) + } else { + log::trace!("fail to find function `{f_name}` in constraint!"); + let fc = FuncConstraint::init(f_name)?; + callback(&fc) + } + }) +} + +pub fn inspect_function_constraint_mut_with eyre::Result>( + f_name: &str, + mut callback: F, +) -> eyre::Result { + CONSTRAINTS.with(|c| { + if let Ok(fc) = c.borrow_mut().get_func_constraint_mut(f_name) { + callback(fc) + } else { + log::warn!("fail to find function `{f_name}` in constraint!"); + let mut fc = FuncConstraint::init(f_name)?; + callback(&mut fc) + } + }) +} + +pub fn iterate_type_constraint_with eyre::Result<()>>( + mut callback: F, +) -> eyre::Result<()> { + CONSTRAINTS.with(|c| { + for (ty, tc_entries) in c.borrow().type_constraints.iter() { + callback(ty, tc_entries)?; + } + Ok(()) + }) +} + +pub fn inspect_type_constraint_with eyre::Result<()>>( + type_name: &str, + mut callback: F, +) -> eyre::Result<()> { + CONSTRAINTS.with(|c| { + if let Some(tc) = c.borrow().type_constraints.get(type_name) { + callback(tc)?; + } + Ok(()) + }) +} + +pub fn set_function_constraint_with( + f_name: &str, + callback: F, +) -> eyre::Result<()> { + CONSTRAINTS.with(|c| { + callback(c.borrow_mut().get_func_constraint_mut(f_name)?); + Ok(()) + }) +} + +pub fn add_function_arg_constraint( + f_name: &str, + arg_pos: usize, + constraint: Constraint, + comment: &str, +) -> eyre::Result> { + CONSTRAINTS.with(|c| { + let mut c = c.borrow_mut(); + let fc = c.get_func_constraint_mut(f_name)?; + let ret = fc.set_constraint(f_name, arg_pos, LocFields::default(), constraint.clone()); + if let Some(c) = &ret { + log_new_constraint(&format!("{c:?}, comment: {comment}")); + } + Ok(ret) + }) +} + +pub fn add_function_constraint( + f_name: &str, + arg_pos: usize, + fields: LocFields, + constraint: Constraint, + comment: &str, +) -> eyre::Result> { + CONSTRAINTS.with(|c| { + let mut c = c.borrow_mut(); + let fc = c.get_func_constraint_mut(f_name)?; + let ret = fc.set_constraint(f_name, arg_pos, fields.clone(), constraint.clone()); + if let Some(c) = ret.as_ref() { + crate::log!(trace, "{comment}"); + log_new_constraint(&format!("{c:?}, comment: {comment}")); + } + Ok(ret) + }) +} + +#[inline] +pub fn filter_forbidden_context(target: &str, f_name: &str, arg_pos: Option) -> bool { + CONSTRAINTS.with(|c| { + c.borrow() + .get_func_constraint(target) + .map_or(false, |c| c.is_forbidden_ctx(f_name, arg_pos)) + }) +} + +impl Constraints { + /// Init function's constraints + pub fn init_func_constraint(&mut self, f_name: &str) -> eyre::Result<()> { + let fc = FuncConstraint::init(f_name)?; + self.func_constraints.insert(f_name.to_string(), fc); + Ok(()) + } + + /// Get function's constraint + #[inline] + pub fn get_func_constraint(&self, f_name: &str) -> Option<&FuncConstraint> { + self.func_constraints.get(f_name) + } + + /// Get function's mut constraint + #[inline] + pub fn get_func_constraint_mut(&mut self, f_name: &str) -> eyre::Result<&mut FuncConstraint> { + if self.func_constraints.get(f_name).is_none() { + self.init_func_constraint(f_name)?; + } + self.func_constraints + .get_mut(f_name) + .ok_or_else(|| eyre::eyre!("fail to get func's constraint: {f_name}")) + } + + pub fn add_func_context(&mut self, f_name: &str, ctx: CallContext) -> eyre::Result<()> { + if let Ok(fc) = self.get_func_constraint_mut(f_name) { + crate::log!(info, "add context on function `{f_name}`: {ctx:?}"); + fc.contexts.push(ctx); + } + Ok(()) + } + + pub fn init_type_constraint(&mut self, type_name: &str) -> eyre::Result<()> { + if self.type_constraints.get(type_name).is_some() { + return Ok(()); + } + let tc = TypeConstraint::init(type_name); + if !tc.list.is_empty() { + self.type_constraints.insert(type_name.to_string(), tc); + } + Ok(()) + } + + #[cfg(test)] + pub fn get_type_constraint_mut(&mut self, type_name: &str) -> &mut TypeConstraint { + self.type_constraints + .entry(type_name.to_string()) + .or_insert_with(|| TypeConstraint::init(type_name)) + } + + fn add_type_constraint(&mut self, type_name: &str, fields: LocFields, constraint: Constraint) { + let tc = self + .type_constraints + .entry(type_name.to_string()) + .or_insert_with(|| TypeConstraint::init(type_name)); + crate::log!( + info, + "add constraint on type `{type_name}`, field {fields:?}, c: {constraint:?}" + ); + tc.set_constraint(fields, constraint); + } +} + +// logging for constraint updates +pub fn log_new_constraint(content: &str) { + #[cfg(test)] + { + print!("log new constraint: {content}"); + } + #[cfg(not(test))] + { + use std::io::prelude::*; + let path = crate::config::output_file_path("misc/constraint.log"); + let mut f = std::fs::OpenOptions::new() + .append(true) + .create(true) + .open(path) + .unwrap(); + writeln!(f, "{content}").unwrap(); + } +} + +#[cfg(all(feature = "testsuite", not(test)))] +pub fn check_contraints_in_testsuite() -> (bool, bool) { + use once_cell::sync::OnceCell; + #[derive(Debug)] + enum ToCheck { + Constraint(ConstraintSig), + Opaque(String), + } + static TO_CHECK: OnceCell> = OnceCell::new(); + let check_list = TO_CHECK.get_or_init(|| { + let mut list = vec![]; + if let Ok(content) = std::env::var("TESTSUITE_INFER") { + for line in content.split(';') { + let line = line.trim_start(); + if line.is_empty() { + continue; + } + crate::log!(trace, "to check: {line:?}"); + let mut de = crate::Deserializer::new(line, None); + if line.starts_with('@') { + for c in parse::parse_func_constraint(&mut de).unwrap() { + list.push(ToCheck::Constraint(c)); + } + } else { + let (type_name, _fields, _is_pointer) = + parse::parse_type_lvalue(&mut de).unwrap(); + de.trim_start(); + if de.strip_token("$opaque") { + list.push(ToCheck::Opaque(type_name)); + } + } + } + crate::log!(info, "to check constraints: {list:?}"); + } + list + }); + if check_list.is_empty() { + return (false, false); + } + if check_list.iter().all(|to_check| match to_check { + ToCheck::Constraint(sig) => { + filter_function_field_constraint_with(&sig.f_name, sig.arg_pos, &sig.fields, |c| { + c == &sig.constraint + }) + } + ToCheck::Opaque(ty_name) => crate::utils::is_opaque_type(ty_name), + }) { + log!(trace, "All constraint is passed"); + return (true, true); + } + (true, false) +} diff --git a/hopper-core/src/fuzz/constraints/parse.rs b/hopper-core/src/fuzz/constraints/parse.rs new file mode 100644 index 0000000..925440b --- /dev/null +++ b/hopper-core/src/fuzz/constraints/parse.rs @@ -0,0 +1,537 @@ +use super::*; +use crate::{config, utils, Deserialize, FieldKey, Serialize}; +use eyre::{Context, ContextCompat}; +use std::fmt::Write as _; +use std::io::{BufRead, Write}; +use std::path::Path; + +impl Constraints { + /// Save constraints to file + pub fn save_to_file(&self, path: &Path) -> eyre::Result<()> { + let mut buf = String::new(); + for (f, constraint) in self.func_constraints.iter() { + let _ = writeln!(buf, "func {} = {}", f, constraint.serialize()?); + } + for (t, constraint) in self.type_constraints.iter() { + let _ = writeln!(buf, "type {} = {}", t, constraint.serialize()?); + } + if !buf.is_empty() { + let mut f = std::fs::File::create(path)?; + crate::log!(info, "write constraints to file : {:?}", path); + f.write_all(buf.as_bytes())?; + f.flush()?; + } + Ok(()) + } + + /// Read constraints from file + pub fn read_from_config(&mut self, path: &Path) -> eyre::Result<()> { + let buf = std::fs::read(path)?; + for line in buf.lines() { + let line = line.context("fail to read config line")?; + let mut de = crate::Deserializer::new(&line, None); + let ty = de.next_token_until(" ")?; + match ty { + "func" => { + let f = de.next_token_until(" ")?; + de.eat_token("=")?; + let constraint = FuncConstraint::deserialize(&mut de)?; + self.func_constraints.insert(f.to_string(), constraint); + } + "type" => { + let t = de.next_token_until(" ")?; + de.eat_token("=")?; + let constraint = TypeConstraint::deserialize(&mut de)?; + self.type_constraints.insert(t.to_string(), constraint); + } + _ => {} + } + } + Ok(()) + } + + /// Read internal config + pub fn read_internal_config(&mut self) -> eyre::Result<()> { + let buf = include_bytes!("internal.rule"); + let mut func_internal = vec![]; + for line in buf.lines() { + let line = line.context("fail to read rule line")?; + if let Some(next) = line.strip_prefix("func_internal") { + for f in next.split(',') { + func_internal.push(f.trim().to_string()); + } + } + } + // internal functions: do not need for inference + for (f_name, _f) in global_gadgets::get_instance().functions.iter() { + if func_internal.contains(f_name) { + crate::log!(info, "load internal func: {f_name}"); + self.init_func_constraint(f_name)?; + let fc = self.get_func_constraint_mut(f_name)?; + fc.internal = true; + fc.can_succeed = true; + } + } + self.read_from_custom_buf(buf)?; + Ok(()) + } + + /// Read constraint rules from custom file + pub fn read_from_custom(&mut self) -> eyre::Result<()> { + let default_path = config::output_file_path("misc/custom_rule"); + if let Some(f) = &config::get_config().custom_rules { + let buf = std::fs::read(f).context("the path to custom rules is wrong")?; + self.read_from_custom_buf(&buf)?; + std::fs::copy(f, default_path)?; + } else if default_path.is_file() { + let buf = std::fs::read(default_path).context("the path to custom rules is wrong")?; + self.read_from_custom_buf(&buf)?; + } + Ok(()) + } + + fn read_from_custom_buf(&mut self, buf: &[u8]) -> eyre::Result<()> { + let mut need_build_graph = false; + for line in buf.lines() { + let line = line.context("fail to read rule line")?; + crate::log!(trace, "custom line: {line}"); + let mut de = crate::Deserializer::new(&line, None); + if de.peek_char().is_none() { + continue; + } + let ty = de.next_token_until(" ")?.trim(); + match ty { + "alias" => { + let alias_name = de.next_token_until("<-")?.trim(); + let alias_name = utils::get_static_ty(alias_name); + need_build_graph = true; + loop { + de.trim_start(); + let f_name = de.next_token_until("[")?; + let fg = global_gadgets::get_mut_instance() + .functions + .get_mut(f_name) + .with_context(|| format!("function {f_name} is not exited"))?; + de.eat_token("$")?; + if de.strip_token("ret") { + fg.alias_ret_type = Some(alias_name); + } else { + let arg_i: usize = de.parse_number()?; + // pointer + let arg_alias_type = if de.strip_token("*") { + let is_mut = utils::is_mut_pointer_type(fg.arg_types[arg_i]); + if is_mut { + utils::get_static_ty(&utils::mut_pointer_type(alias_name)) + } else { + utils::get_static_ty(&utils::const_pointer_type(alias_name)) + } + } else { + alias_name + }; + fg.alias_arg_types[arg_i] = arg_alias_type; + } + crate::log!(info, "update alias: {:?}", fg); + de.eat_token("]")?; + de.trim_start(); + if !de.strip_token(",") { + break; + } + } + } + "ctx" => { + let f_name = de.next_token_until("[")?; + if self.get_func_constraint(f_name).is_none() { + continue; + } + let ctx = CallContext::from_rule(&mut de)?; + self.add_func_context(f_name, ctx)?; + } + "func" => { + let sig_list = parse_func_constraint(&mut de)?; + for sig in sig_list { + #[cfg(not(test))] + if !crate::config::get_config().match_func(&sig.f_name) + && self.get_func_constraint(&sig.f_name).is_none() + { + continue; + } + if let Ok(fc) = self.get_func_constraint_mut(&sig.f_name) { + fc.set_constraint(&sig.f_name, sig.arg_pos, sig.fields, sig.constraint); + } + } + } + "type" => { + let (type_name, fields, is_pointer) = parse_type_lvalue(&mut de)?; + de.trim_start(); + if de.strip_token("$opaque") { + crate::log!(info, "add `{}` as opaque type", type_name); + global_gadgets::get_mut_instance().add_opaque_type(&type_name); + continue; + } + let c: Constraint = Constraint::from_rule(&mut de)?; + de.trim_start(); + if de.strip_token("<-") { + crate::log!(warn, "Invalid custom constraint, <- is not support now"); + } else if is_pointer { + let const_ty = utils::const_pointer_type(&type_name); + self.add_type_constraint(&const_ty, fields.clone(), c.clone()); + let mut_ty = utils::mut_pointer_type(&type_name); + self.add_type_constraint(&mut_ty, fields, c); + } else { + self.add_type_constraint(&type_name, fields, c); + } + } + "assert" => { + let assertion = crate::fuzz::stmt::parse_assertion(&mut de)?; + if self.get_func_constraint(&assertion.f_name).is_none() { + continue; + } + crate::fuzz::stmt::add_assertion(assertion); + } + _ => {} + } + } + if need_build_graph { + global_gadgets::get_mut_instance().build_arg_and_ret_graph(); + } + Ok(()) + } +} + +impl Constraint { + pub fn from_rule(de: &mut crate::Deserializer) -> eyre::Result { + if de.strip_token("$null") { + return Ok(Self::SetNull); + } + if de.strip_token("$non_null") { + return Ok(Self::NonNull); + } + if de.strip_token("$need_init") { + return Ok(Self::NeedInit); + } + if de.strip_token("$non_zero") { + return Ok(Self::NonZero); + } + if de.strip_token("$write_file") { + return Ok(Self::File { + read: false, + is_fd: false, + }); + } + if de.strip_token("$write_fd") { + return Ok(Self::File { + read: false, + is_fd: true, + }); + } + if de.strip_token("$read_file") { + return Ok(Self::File { + read: true, + is_fd: false, + }); + } + if de.strip_token("$read_fd") { + return Ok(Self::File { + read: true, + is_fd: true, + }); + } + if de.strip_token("$ret_from") { + de.eat_token("(")?; + let f_name = de.next_token_until(")")?.to_string(); + return Ok(Self::RetFrom { ret_f: f_name }); + } + if de.strip_token("$cast_from") { + de.eat_token("(")?; + let type_name = de.next_token_until(")")?; + let cast_type = if let Some(inner) = type_name.strip_prefix("*mut") { + utils::mut_pointer_type(&convert_type(inner.trim())) + } else if let Some(inner) = type_name.strip_prefix("*const") { + utils::const_pointer_type(&convert_type(inner.trim())) + } else { + convert_type(type_name) + }; + return Ok(Self::CastFrom { cast_type }); + } + if de.strip_token("$range") { + de.eat_token("(")?; + let min = IrEntry::from_rule(de)?; + de.eat_token(",")?; + let max = IrEntry::from_rule(de)?; + de.eat_token(")")?; + return Ok(Self::Range { min, max }); + } + if de.strip_token("$lt") { + de.eat_token("(")?; + let max = IrEntry::from_rule(de)?; + de.eat_token(")")?; + return Ok(Self::Range { min: 0.into(), max }); + } + if de.strip_token("$use") { + de.eat_token("(")?; + let member = de.next_token_until(")")?.to_string(); + return Ok(Self::UseUnionMember { member }); + } + if de.strip_token("$arr_len") { + de.eat_token("(")?; + let ir_member = IrEntry::from_rule(de)?; + de.eat_token(")")?; + if let IrEntry::Constant(len) = ir_member { + return Ok(Self::ArrayLength { + len: len.try_into().unwrap(), + }); + } else { + return Err(eyre::eyre!("Expect a length here.")); + } + } + if de.strip_token("$init_with") { + de.eat_token("(")?; + let f_name = de.next_token_until(",")?.to_string(); + de.trim_start(); + let arg_pos = de.parse_number()?; + de.eat_token(")")?; + return Ok(Self::InitWith { f_name, arg_pos }); + } + let val = IrEntry::from_rule(de)?; + Ok(Self::SetVal { val }) + } +} + +fn convert_type(ty: &str) -> String { + if !utils::is_primitive_type(ty) && !ty.starts_with("hopper") { + // all type in harness should starts with hopper_harness + format!("hopper_harness::{ty}") + } else { + ty.to_string() + } +} + +pub fn parse_type_lvalue(de: &mut crate::Deserializer) -> eyre::Result<(String, LocFields, bool)> { + let mut left = de.next_token_until("=")?.trim(); + let mut is_pointer = false; + if let Some(s) = left.strip_suffix('*') { + left = s.trim(); + is_pointer = true; + } + let mut fields = LocFields::default(); + let type_name = if let Some(pos) = left.find('[') { + let (type_name, field) = left.split_at(pos); + let field = field.trim_start_matches('[').trim_end_matches(']'); + let field = if field == "$" { + FieldKey::union_root() + } else { + field.to_string().into() + }; + fields.push(field); + type_name + } else { + left + }; + Ok((convert_type(type_name), fields, is_pointer)) +} + +pub fn parse_func_constraint(de: &mut crate::Deserializer) -> eyre::Result> { + let mut f_name = de.next_token_until("[")?; + if f_name == "@" { + if let Some(target) = crate::config::get_config().func_target { + f_name = target; + } + } + de.eat_token("$")?; + let key_arg_pos: usize = de.parse_number()?; + de.eat_token("]")?; + let key_fields = if de.peek_char() == Some('[') { + LocFields::deserialize(de)? + } else { + LocFields::default() + }; + de.eat_token("=")?; + de.trim_start(); + let mut list = parse_len_factors(de, f_name, key_arg_pos, &key_fields)?; + if list.is_empty() { + let c: Constraint = Constraint::from_rule(de)?; + list.push(ConstraintSig { + f_name: f_name.to_string(), + arg_pos: key_arg_pos, + fields: key_fields, + constraint: c, + }); + } + Ok(list) +} + +fn parse_len_factors( + de: &mut crate::Deserializer, + f_name: &str, + key_arg_pos: usize, + key_fields: &LocFields, +) -> eyre::Result> { + let mut list = vec![]; + if !de.strip_token("$len_factors") { + return Ok(list); + } + de.eat_token("(")?; + for item in de.next_token_until(")")?.split(',') { + let mut sub_de = crate::Deserializer::new(item, None); + let loc: IrEntry = IrEntry::from_rule(&mut sub_de) + .with_context(|| format!("fail to parse loc: `{item}` for len_factors"))?; + if let IrEntry::Constant(val) = loc { + if sub_de.strip_token("..") { + let loc_end = IrEntry::from_rule(&mut sub_de) + .with_context(|| format!("fail to parse loc: `{item}` for len_factors"))?; + if let Some((arg_pos, fields)) = loc_end.get_location_from_any() { + let c = Constraint::Range { + min: val.into(), + max: IrEntry::Length { + arg_pos: Some(key_arg_pos), + fields: key_fields.clone(), + is_factor: true, + }, + }; + list.push(ConstraintSig { + f_name: f_name.to_string(), + arg_pos: arg_pos.unwrap_or_default(), + fields: fields.clone(), + constraint: c, + }); + } + continue; + } + let c = Constraint::LengthFactor { coef: val }; + list.push(ConstraintSig { + f_name: f_name.to_string(), + arg_pos: key_arg_pos, + fields: key_fields.clone(), + constraint: c, + }); + } else if let Some((arg_pos, fields)) = loc.get_location_from_any() { + let c = Constraint::SetVal { + val: IrEntry::Length { + arg_pos: Some(key_arg_pos), + fields: key_fields.clone(), + is_factor: true, + }, + }; + list.push(ConstraintSig { + f_name: f_name.to_string(), + arg_pos: arg_pos.unwrap_or_default(), + fields: fields.clone(), + constraint: c, + }); + } + } + Ok(list) +} + +#[test] +fn test_read_custom_rule() { + println!("gadgets: {:?}", global_gadgets::get_instance()); + let buf = " + func func_add[$0] = 128 + func func_add[$0] = $non_zero + func test_arr[$1] = $len($0) + func test_arr[$0] = $len_factors(2, $0, $1) + func test_arr[$0] = $arr_len(256) + func test_arr[$1] = $range(0, $len($0)) + func func_create[$0] = \"magic\" + func test_mutate_arr[$0] = $read_file + func test_arr[$0] = $ret_from(test_create) + func test_arr[$0] = $cast_from(*mut u32) + type hopper::test::ArrayWrap[len] = $len(p) + // type hopper::test::TestType = $opaque + type hopper::test::TestType* = $init_with(test_arr, 0) + func func_struct[$0][index] = 10 + func func_struct[$0][p] = $null + func func_struct[$0][len] = $len(p) + ctx test_arr[$0] <- test_create + ctx test_arr[$0] <- test_create ? + ctx test_arr[*] <- test_create + "; + let mut constraints = Constraints::default(); + constraints.init_func_constraint("func_add").unwrap(); + constraints.init_func_constraint("test_arr").unwrap(); + constraints.init_func_constraint("func_create").unwrap(); + constraints.init_func_constraint("test_mutate_arr").unwrap(); + constraints.init_func_constraint("func_struct").unwrap(); + constraints.read_from_custom_buf(buf.as_bytes()).unwrap(); + println!("constraints: {constraints:#?}"); +} + +#[test] +fn test_parse_len_factors() { + let len_entry = IrEntry::Length { + arg_pos: Some(3), + fields: LocFields::default(), + is_factor: true, + }; + let factors = parse_len_factors( + &mut crate::Deserializer::new("$len_factors(3, $2)", None), + "fff", + 3, + &LocFields::default(), + ) + .unwrap(); + + println!("factors: {factors:?}"); + assert_eq!(factors[0].constraint, Constraint::LengthFactor { coef: 3 }); + assert_eq!(factors[1].arg_pos, 2); + assert_eq!( + factors[1].constraint, + Constraint::SetVal { + val: len_entry.clone() + } + ); + let factors = parse_len_factors( + &mut crate::Deserializer::new("$len_factors(3, $len($1))", None), + "fff", + 3, + &LocFields::default(), + ) + .unwrap(); + assert_eq!(factors[0].constraint, Constraint::LengthFactor { coef: 3 }); + assert_eq!(factors[1].arg_pos, 1); + assert_eq!( + factors[1].constraint, + Constraint::SetVal { + val: len_entry.clone() + } + ); + let factors = parse_len_factors( + &mut crate::Deserializer::new("$len_factors(3, 0..$len($1))", None), + "fff", + 3, + &LocFields::default(), + ) + .unwrap(); + println!("factors: {factors:?}"); + assert_eq!(factors[0].constraint, Constraint::LengthFactor { coef: 3 }); + assert_eq!(factors[1].arg_pos, 1); + assert_eq!( + factors[1].constraint, + Constraint::Range { + min: IrEntry::Constant(0), + max: len_entry.clone() + } + ); + let factors = parse_len_factors( + &mut crate::Deserializer::new("$len_factors($1, $2)", None), + "fff", + 3, + &LocFields::default(), + ) + .unwrap(); + assert_eq!(factors[0].arg_pos, 1); + assert_eq!( + factors[0].constraint, + Constraint::SetVal { + val: len_entry.clone() + } + ); + assert_eq!(factors[1].arg_pos, 2); + assert_eq!( + factors[1].constraint, + Constraint::SetVal { + val: len_entry.clone() + } + ); +} diff --git a/hopper-core/src/fuzz/constraints/ret.rs b/hopper-core/src/fuzz/constraints/ret.rs new file mode 100644 index 0000000..ee1a665 --- /dev/null +++ b/hopper-core/src/fuzz/constraints/ret.rs @@ -0,0 +1,61 @@ +use hopper_derive::Serde; + +use crate::{global_gadgets, utils}; + +#[derive(Debug, Default, Clone, Serde)] +pub struct RetType { + // return is opaque pointer + pub is_opaque: bool, + // return is malloc'ed statically + pub is_static: bool, + // return pointer is unwriteable + pub is_unwriteable: bool, + // return pointer is partial opaque + pub is_partial_opaque: bool, + // the API is both consumer and producer of certain pointers. e.g. A* = f(A*) + pub both_cosumer_and_producer: bool, +} + +impl RetType { + /// Check if the function's return can be used as arguments or fields + #[inline] + pub fn can_used_as_arg(&self) -> bool { + !self.is_unwriteable + && ((self.is_opaque && !self.both_cosumer_and_producer) + || (!self.is_opaque && !self.is_static)) + } + + /// Infer function return's kind: opaque or recursion + pub fn infer(&mut self, f_name: &str) -> eyre::Result<()> { + let fg = global_gadgets::get_instance().get_func_gadget(f_name)?; + if let Some(ret_type) = fg.ret_type { + if let Some(ret_inner) = utils::get_pointer_inner(ret_type) { + if utils::is_opaque_type(ret_inner) { + self.is_opaque = true; + } + let mut alias_ret_inner = ret_inner; + if let Some(alias_ret_type) = fg.alias_ret_type { + if let Some(inner) = utils::get_pointer_inner(alias_ret_type) { + alias_ret_inner = inner; + } + } + for (t, at) in fg.arg_types.iter().zip(fg.alias_arg_types.iter()) { + let mut t = t; + let mut r = ret_inner; + // avoid recursion + if utils::is_void_pointer(t) { + t = at; + r = alias_ret_inner; + } + if let Some(arg_inner) = utils::get_pointer_inner(t) { + if arg_inner == r { + self.both_cosumer_and_producer = true; + break; + } + } + } + } + } + Ok(()) + } +} diff --git a/hopper-core/src/fuzz/constraints/role.rs b/hopper-core/src/fuzz/constraints/role.rs new file mode 100644 index 0000000..973ae85 --- /dev/null +++ b/hopper-core/src/fuzz/constraints/role.rs @@ -0,0 +1,27 @@ +use hopper_derive::Serde; + +#[derive(Debug, Default, Clone, Serde)] +pub struct FuncRole { + // the function will init argument + pub init_arg: bool, + // the function will free argument + pub free_arg: bool, +} + +impl FuncRole { + /// Check if the function's return can be used as arguments or fields + #[inline] + pub fn can_used_as_arg(&self) -> bool { + !self.free_arg + } +} + +#[inline] +pub fn filter_init_func(f_name: &str) -> bool { + super::filter_function_constraint_with(f_name, |fc| fc.is_success() && fc.role.init_arg && !fc.role.free_arg) +} + +#[inline] +pub fn filter_free_func(f_name: &str) -> bool { + super::filter_function_constraint_with(f_name, |fc| fc.role.free_arg) +} \ No newline at end of file diff --git a/hopper-core/src/fuzz/det.rs b/hopper-core/src/fuzz/det.rs new file mode 100644 index 0000000..2f8705e --- /dev/null +++ b/hopper-core/src/fuzz/det.rs @@ -0,0 +1,141 @@ +//! Deterministic mutation + +use downcast_rs::Downcast; +use std::{collections::HashMap, fmt}; + +use crate::{MutateOperation, ObjectState}; + +type MutatingClosure = + Box (MutateOperation, DetAction) + Send + Sync>; + +/// A warpper/caller that describes how to mutating a value +pub struct DetMutateCaller { + /// name + pub name: &'static str, + /// Mutating closure + pub f: MutatingClosure, +} + +/// Deterministic Mutate Step +pub trait DetMutateStep: 'static + Downcast + Send + Sync + fmt::Debug {} + +/// Object that has deterministic steps in mutation +pub trait DetMutate { + /// Return its deterministic steps + fn det_mutateion_steps() -> Vec>; +} + +downcast_rs::impl_downcast!(DetMutateStep); + +// Caller has mutate step traits +impl DetMutateStep for DetMutateCaller {} + +pub enum DetAction { + Keep, + Next, + Last, + Finish, +} +/// Call det step for object +/// first, we donwcast step to caller, and then invoke it with object and state. +/// finnaly, we check its return and move the `det_iter`, or mark det is done. +pub fn call_det( + obj: &mut T, + state: &mut ObjectState, + // // f: &dyn DetMutateStep, +) -> eyre::Result> { + DET_CACHE.with(|cache| { + if let Some((step, len)) = cache.borrow_mut().get_det_mutate_step::(state) { + //return Ok(state.as_mutate_operator(op)); + if let Some(caller) = step.downcast_ref::>() { + let (op, next) = (caller.f)(obj, state); + match next { + DetAction::Next => { + (*state.mutate).borrow_mut().next_det_iter(); + } + DetAction::Last => { + crate::log!(trace, "move last det"); + (*state.mutate).borrow_mut().set_det_iter(len - 1); + } + DetAction::Finish => { + crate::log!(trace, "done det"); + state.done_deterministic(); + } + _ => {} + } + return Ok(Some(op)); + } + eyre::bail!("fail to call det function") + } + Ok(None) + }) +} + +/// Add det steps. e.g. +/// add_det_mutation!(steps, MutateOperation::BitFlip, |n: T| Mutator::bit_flip_at(n, i)); +#[macro_export] +macro_rules! add_det_mutation { + ($steps:ident, $name:literal, |$n:ident: $ty:ty, $s:ident| $f:expr) => { + $steps.push(Box::new($crate::fuzz::det::DetMutateCaller { + name: $name, + f: Box::new(move |$n: &mut $ty, $s: &mut ObjectState| $f), + })); + }; + ($steps:ident, $name:literal, |$n:ident: $ty:ty| $f:expr) => { + $steps.push(Box::new($crate::fuzz::det::DetMutateCaller { + name: $name, + f: Box::new(move |$n: &mut $ty, _state: &mut ObjectState| $f), + })); + }; +} + +impl fmt::Debug for DetMutateCaller { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DetMutateCaller") + .field("op", &self.name) + .finish() + } +} + +use std::cell::RefCell; + +thread_local! { + static DET_CACHE:RefCell = RefCell::new(DetMutateStepsCache::default()); +} + +/// Cache of deterministic steps +#[derive(Default)] +pub struct DetMutateStepsCache { + /// Key is type of object + /// value is the list its steps + map: HashMap<&'static str, Vec>>, +} + +impl DetMutateStepsCache { + /// Get which step it should use + /// `det_iter` in state is used to determine which step in the list it should use, + pub fn get_det_mutate_step( + &mut self, + state: &mut ObjectState, + ) -> Option<(&dyn DetMutateStep, usize)> { + if !state.is_deterministic() { + return None; + } + let key = std::any::type_name::(); + let det_steps = self + .map + .entry(key) + .or_insert_with(|| T::det_mutateion_steps()); + let det_index = state.mutate.borrow().det_iter; + let len = det_steps.len(); + assert!(det_index <= len, "det_index is large than len"); + crate::log!( + trace, + "type: {}, det index: {}, len: {}", + std::any::type_name::(), + det_index, + len + ); + det_steps.get(det_index).map(|f| (f.as_ref(), len)) + } +} diff --git a/hopper-core/src/fuzz/effective.rs b/hopper-core/src/fuzz/effective.rs new file mode 100644 index 0000000..fefac45 --- /dev/null +++ b/hopper-core/src/fuzz/effective.rs @@ -0,0 +1,712 @@ +use std::{borrow::BorrowMut, collections::HashMap}; + +use eyre::ContextCompat; + +use crate::{fuzz::*, fuzzer::Fuzzer, runtime::*, utils, BucketType}; + +/// Effective argument: a slice of statments +pub struct EffectiveArg { + pub program_id: usize, + pub stmt_index: usize, + pub stmts: Vec, + pub hash: u64, +} + +/// Buf's content and location +#[derive(Debug, Clone)] +pub struct EffectiveBuf { + pub program_id: usize, + pub stmt_index: usize, + pub buf: Vec, + pub hash: u64, +} + +#[derive(Default)] +pub struct EffectiveList { + pub arg_list: HashMap>, + pub buf_list: HashMap>, +} + +use std::cell::RefCell; + +thread_local! { + pub static EFFECT: RefCell = RefCell::new(EffectiveList::default()); +} + +impl Fuzzer { + /// Collect effective arguments from interesting seed programs + pub fn collect_effective_args( + &mut self, + program: &FuzzProgram, + new_edges: &[(usize, BucketType)], + ) -> eyre::Result<()> { + if !crate::config::ENABLE_EFF_ARG { + return Ok(()); + } + if program.ops.is_empty() { + // view target call is effective in generated program + let target_index = program.get_target_index().context("has target")?; + Self::collect_effective_args_in_call(program, target_index)?; + return Ok(()); + } + // For mutated program, we need to locate which call is effective, + let mut p = program.clone(); + // check ops again + p.ops = program.ops.clone_with_program(&mut p); + if p.ops.is_empty() { + return Ok(()); + } + let track_calls = p.get_track_calls(); + p.set_calls_track_cov(false); + // find which call is effective + for i in 0..p.stmts.len() { + if !p.stmts[i].stmt.is_call() { + continue; + } + if !track_calls.contains(&p.stmts[i].index.get_uniq()) { + continue; + } + crate::log!(trace, "call {i} is trackable, check if it is effective"); + // only set current call trackable + if let FuzzStmt::Call(call) = &mut p.stmts[i].stmt { + call.track_cov = true; + } + let status = self.executor.execute_program(&p)?; + // if it trigger the new edges, the call is effective + if status.is_normal() && self.observer.feedback.path.contain_any(new_edges) { + // effective call + Self::collect_effective_args_in_call(&p, i)?; + } + if let FuzzStmt::Call(call) = &mut p.stmts[i].stmt { + call.track_cov = false; + } + } + Ok(()) + } + + /// Collect argument in effective call + pub fn collect_effective_args_in_call( + program: &FuzzProgram, + call_i: usize, + ) -> eyre::Result<()> { + crate::log!(trace, "try to collect effective args in call {call_i}"); + if let FuzzStmt::Call(call) = &program.stmts[call_i].stmt { + for (arg_pos, arg) in call.args.iter().enumerate() { + let arg_index = arg.get(); + let p = slice_arg(program, call, call_i, arg_index)?; + // if the arg is not effective + // 1. the slice is not mutated by any ops or too long + // 2. the arg is not mutated by relative calls + if p.ops.is_empty() || p.stmts.len() > 12 { + continue; + } + if program.ops.iter().any(|op| { + // or call op that mutate arg + let ret = match &op.op { + MutateOperation::CallArg { + arg_pos: arg_i, + rng_state: _, + } => *arg_i == arg_pos, + MutateOperation::CallRelatedInsert { + f_name: _, + arg_pos: arg_i, + rng_state: _, + } => *arg_i == arg_pos, + _ => false, + }; + ret && (op.key.get_index().unwrap().get() == call_i) + }) { + crate::log!( + trace, + "ops do not mutate arg {arg_pos}, ops: {}!", + program.ops.serialize().unwrap() + ); + continue; + } + crate::log!(trace, "sliced: {}", p.serialize_all()?); + crate::log!(trace, "arg_pos {arg_pos} is effective!"); + let arg_ident = call.fg.arg_idents[arg_pos]; + let arg_type = call.fg.arg_types[arg_pos]; + let arg_alias_type = call.fg.alias_arg_types[arg_pos]; + // null pointer + if utils::is_pointer_type(arg_type) && p.stmts.len() == 1 { + continue; + } + EFFECT.with::<_, eyre::Result>(|eff| { + let mut eff = eff.borrow_mut(); + let _ = eff.collect_effective_buf(program, arg_index)? + || eff.collect_effective_arg( + p.stmts, + program.id, + arg_index, + arg_ident, + arg_alias_type, + )?; + Ok(true) + })?; + } + } + Ok(()) + } +} + +impl CallStmt { + /// Set effective argument in call's i-th argument + pub fn set_effective_ith_call_arg( + &mut self, + program: &mut FuzzProgram, + arg_pos: usize, + depth: usize, + ) -> eyre::Result> { + if depth > crate::config::MAX_DEPTH || program.stmts.len() > crate::config::MAX_STMTS_LEN { + return Ok(None); + } + let arg_ident = self.fg.arg_idents[arg_pos]; + let arg_alias_type = self.fg.alias_arg_types[arg_pos]; + let key = format!("{arg_ident}_{arg_alias_type}"); + // insert offset + let mut last_insert = if let Some(stub_i) = program.get_stub_stmt_index() { + stub_i.get() + } else { + program.stmts.len() + }; + // slice previous arg + let prev_slice = slice_arg(program, self, last_insert, self.args[arg_pos].get())?; + let choosed_arg = EFFECT.with(|eff| { + eff.borrow() + .choose_effective_arg(program, &prev_slice.stmts, &key) + }); + if let Some((eff_i, stmts)) = choosed_arg { + // crate::log!(trace, "program: {}", program.serialize()?); + crate::log!( + trace, + "use effective arg-{eff_i}: {}", + stmts.serialize().unwrap() + ); + // hold stmt unit loop finish, to avoid the `skip` ones change the ref-count. + for is in stmts.iter().rev() { + let p_is = is.clone_with_program(program); + crate::log!(trace, "p: {}", p_is.serialize()?); + // if they have the same uniq id and type + // we try to replace it + let mut existing = None; + let index_uniq = is.index.get_uniq(); + for pis in program.stmts.iter_mut() { + if pis.stmt.is_stub() || pis.index.get() >= last_insert { + break; + } + if pis.index.get_uniq() == index_uniq { + existing = Some(pis); + } + } + if let Some(existing) = existing { + crate::log!( + trace, + "replace {} with {}", + existing.index.get(), + is.index.get() + ); + let _ = std::mem::replace(&mut existing.stmt, p_is.stmt); + let new_insert = existing.index.get(); + eyre::ensure!(new_insert < last_insert, "insert order is wrong!"); + last_insert = new_insert; + continue; + } + // should clone again + match &is.stmt { + // argument + FuzzStmt::Load(_load) => { + if is.index.get_ref_used() == 1 { + crate::log!(trace, "set arg index: {}", is.index.get()); + self.args[arg_pos] = p_is.index.use_index(); + } + } + // if the context call exists, we skip it + FuzzStmt::Call(call) => { + // ref <=1 : the context is for our target call + if is.index.get_ref_used() <= 1 && call.is_implicit() { + if self.has_implicit_context(program, call.fg.f_name) { + crate::log!(trace, "ignore dup implicit context"); + // continue; + } + // otherwise, we add it to the ctx + self.contexts.push(is.index.use_index()); + } + // check if duplicated relative call before + if self.has_relative_context(program, call.fg.f_name) { + crate::log!(trace, "ignore dup relative context"); + // continue; + } + } + _ => {} + } + program.stmts.insert(last_insert, p_is); + program.resort_indices(); + } + program.tmp_indices.clear(); + crate::log!(trace, "new: {}", program.serialize()?); + program.check_ref_use()?; + crate::log!(trace, "use effective arg done"); + return Ok(Some(eff_i)); + } + crate::log!(trace, "fail to find eff arg for {key}"); + Ok(None) + } +} + +impl EffectiveList { + pub fn choose_effective_arg( + &self, + program: &mut FuzzProgram, + slice: &[IndexedStmt], + key: &str, + ) -> Option<(usize, Vec)> { + if let Some(list) = self.arg_list.get(key) { + crate::log!(trace, "found {} effective args for `{}`", list.len(), key); + // crate::log!(trace, "slice: {}", slice.serialize().unwrap()); + let slice_hash = utils::hash_buf(slice.serialize().unwrap().as_bytes()); + if let Some((i, arg)) = rng::choose_iter( + list.iter() + .enumerate() + .filter(|(_, ea)| ea.program_id != program.id && ea.hash != slice_hash), + ) { + let mut stmts: Vec = vec![]; + let mut slice_len = slice.len(); + let mut holder = FuzzProgram::default(); + // clone arg slice and avoid the same index uniq + for is in arg.stmts.iter().rev() { + let mut is = is.clone_with_program(&mut holder); + // avoid they have the same uniq + if program.get_mut_stmt_by_index_uniq(&is.index).is_some() { + is.index.reset_uniq(); + } + // crate::log!(trace, "arg stmt: {}", is.serialize().unwrap()); + // if they are likely to the same and used by multiple ref + // we set the same index uniq + if let FuzzStmt::Load(load) = &is.stmt { + if load.state.pointer.is_some() && slice_len > 0 { + let ident = load.get_ident(); + let ty = load.value.type_name(); + let iter = slice[..slice_len].iter().filter_map(|slice_is| { + if let FuzzStmt::Load(load) = &slice_is.stmt { + if ident == load.get_ident() && ty == load.value.type_name() { + let index = &slice_is.index; + if let Some(p_is) = program.get_stmt_by_index_uniq(index) { + // && stmts.iter().all(|s| s.index.get_uniq() != index.get_uniq()) + if p_is.index.get_ref_used() > 2 { + return Some(index.use_index()); + } + } + } + } + None + }); + if let Some(index) = rng::choose_iter(iter) { + crate::log!(trace, "update uniq for : {}", index.get()); + slice_len = index.get(); + is.index.borrow_mut().set_uniq(index.get_uniq()); + } + } + } + stmts.push(is); + } + stmts.reverse(); + return Some((i, stmts)); + } + } + None + } + + pub fn collect_effective_arg( + &mut self, + slice_stmts: Vec, + program_id: usize, + stmt_index: usize, + arg_ident: &str, + arg_type: &str, + ) -> eyre::Result { + let buf = slice_stmts.serialize()?; + let hash = utils::hash_buf(buf.as_bytes()); + let arg = EffectiveArg { + program_id, + stmt_index, + stmts: slice_stmts, + hash, + }; + let key = format!("{arg_ident}_{arg_type}",); + crate::log!(trace, "new effective arg in `{}`: {}", key, buf); + if let Some(list) = self.arg_list.get_mut(&key) { + if list.iter().all(|s| s.hash != hash) { + // only keep 5 newest effective arguments + if list.len() >= 5 { + list.remove(0); + } + list.push(arg); + } + } else { + let list = vec![arg]; + self.arg_list.insert(key.to_string(), list); + } + Ok(true) + } + + fn collect_effective_buf(&mut self, program: &FuzzProgram, arg_i: usize) -> eyre::Result { + if let FuzzStmt::Load(load) = &program.stmts[arg_i].stmt { + if load.state.pointer.is_none() { + return Ok(false); + } + let ptr_loc = &load.state.pointer.as_ref().unwrap().pointer_location; + if !ptr_loc.is_null() + && (ptr_loc.fields.is_empty() + || (ptr_loc.fields.len() == 1 && ptr_loc.fields.list[0] == FieldKey::Pointer)) + { + let is = &program.stmts[ptr_loc.get_index()?.get()]; + if let Some((value, ident, is_u8)) = get_buf_value(&is.stmt) { + let key = if is_u8 { + format!("{ident}_u8") + } else { + format!("{ident}_i8") + }; + let buf = cast_bytes(value.as_ref(), is_u8)?.to_vec(); + let hash = utils::hash_buf(&buf); + let eff_buf = EffectiveBuf { + program_id: program.id, + stmt_index: is.index.get(), + buf, + hash, + }; + crate::log!( + trace, + "save buf at program {} stmt {} to {key}, hash: {hash}", + eff_buf.program_id, + eff_buf.stmt_index + ); + if let Some(list) = self.buf_list.get_mut(&key) { + if list.iter().all(|s| s.hash != hash) { + list.push(eff_buf); + } + } else { + let list = vec![eff_buf]; + self.buf_list.insert(key, list); + } + return Ok(true); + } + } + } + Ok(false) + } +} + +fn get_buf_value(stmt: &FuzzStmt) -> Option<(&FuzzObject, &str, bool)> { + match stmt { + FuzzStmt::Load(load) => { + let type_name = load.value.type_name(); + let u8_vec = type_name == "alloc::vec::Vec"; + let i8_vec = type_name == "alloc::vec::Vec"; + if u8_vec || i8_vec { + return Some((&load.value, load.get_ident(), u8_vec)); + } + } + // Now we do not use it because we move check_effective before review + FuzzStmt::Call(call) => { + if let Some(ret_type) = call.fg.ret_type { + if let Some(inner_type) = utils::get_pointer_inner(ret_type) { + let is_u8 = inner_type == "u8"; + let is_i8 = inner_type == "i8"; + if is_u8 || is_i8 { + // crate::log!(trace, "get buf from call: {call:?}"); + // ret's layout + // { {field: [], value: ptr}, {field: [Pointer], value: Vec} + if call.ret_ir.len() == 2 { + let ret_val = &call.ret_ir[1]; + // ret_val.fields.as_slice() == &[FieldKey::Pointer] + return Some((&ret_val.value, &call.ident, is_u8)); + } + } + } + } + } + _ => {} + } + None +} + +fn cast_bytes(value: &dyn ObjFuzzable, is_u8: bool) -> eyre::Result<&[u8]> { + if is_u8 { + Ok(value.downcast_ref::>().context("downcast buf")?) + } else { + let buf = value.downcast_ref::>().context("downcast buf")?; + let u8_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len()) }; + Ok(u8_buf) + } +} + +pub fn slice_arg( + program: &FuzzProgram, + call: &CallStmt, + call_i: usize, + arg_i: usize, +) -> eyre::Result { + crate::log!(trace, "slice arg {arg_i} at call {call_i}"); + let mut p = program.clone(); + p.stmts.truncate(call_i); + let index = p.stmts[arg_i].index.use_index(); + // if the calls are related to `arg` + let mut ctxs = vec![]; + for ctx in &call.contexts { + if let FuzzStmt::Call(call) = &p.stmts[ctx.get()].stmt { + if call.args.contains(&index) { + ctxs.push(ctx.clone_with_program(&mut p)); + } + } + } + + // crate::log!(trace, "ctxs for slice: {:?}", ctxs); + // check ref use will remove statements that do not related to `index` + p.check_ref_use()?; + + // remove those unrelated calls + { + let mut removed = false; + let mut used_indices = get_relative_indices_recusively(&p, vec![index.use_index()]); + for i in (0..p.stmts.len()).rev() { + let is = &p.stmts[i]; + if let FuzzStmt::Call(call) = &is.stmt { + let used = get_relative_indices_recusively(&p, call.args.clone()); + if call.is_relative() && used.iter().all(|uniq| !used_indices.contains(uniq)) { + p.delete_stmt(i); + removed = true; + continue; + } + for uniq in used { + if !used_indices.contains(&uniq) { + used_indices.push(uniq); + } + } + } + } + if removed { + crate::log!(trace, "remove some unrelated calls, check ref again!"); + p.check_ref_use()?; + } + } + + // move ctxs to last + for ctx in ctxs { + let stmt = p.stmts.remove(ctx.get()); + p.stmts.push(stmt); + p.resort_indices(); + } + p.ops = program.ops.clone_with_program(&mut p); + // remove those useless ops + p.ops.retain(|o| !o.key.is_released()); + Ok(p) +} + +/// Get relative indices that used by the list of stmts recursively +fn get_relative_indices_recusively(program: &FuzzProgram, mut list: Vec) -> Vec { + let mut used: Vec = vec![]; + while let Some(i) = list.pop() { + if let Some(is) = program.get_stmt_by_index_uniq(&i) { + if let FuzzStmt::Load(load) = &is.stmt { + let _ = load.state.find_any_stmt_in_state_with(|index| { + list.push(index.use_index()); + // to visit all + false + }); + } + } + used.push(i.get_uniq()); + } + used +} + +pub fn save_effective_args() -> eyre::Result<()> { + use std::io::Write; + EFFECT.with(|eff| { + let eff = eff.borrow(); + let path = crate::config::output_file_path("misc/eff_arg.json"); + let mut f = std::fs::File::create(path)?; + for args in &eff.arg_list { + writeln!(f, "***********************************")?; + writeln!(f, "KEY: {}", args.0)?; + writeln!(f, "***********************************")?; + for arg in args.1 { + writeln!( + f, + "ID: {}, Index: {}, Hash: {}", + arg.program_id, arg.stmt_index, arg.hash + )?; + for stmt in &arg.stmts { + write!(f, "{}", stmt.serialize()?)?; + } + writeln!(f, "------------------------------------")?; + } + } + + let path = crate::config::output_file_path("misc/eff_buf.json"); + let mut f = std::fs::File::create(path)?; + for args in &eff.buf_list { + writeln!(f, "***********************************")?; + writeln!(f, "KEY: {}", args.0)?; + writeln!(f, "***********************************")?; + for arg in args.1 { + writeln!( + f, + "ID: {}, Index: {}, Hash: {}", + arg.program_id, arg.stmt_index, arg.hash + )?; + f.write_all(&arg.buf)?; + writeln!(f, "\n------------------------------------")?; + } + } + Ok(()) + }) +} + +pub fn load_effective_args() -> eyre::Result<()> { + use std::io::BufRead; + let path = crate::config::output_file_path("misc/eff_arg.json"); + if !path.exists() { + return Ok(()); + } + EFFECT.with(|eff| { + let mut eff = eff.borrow_mut(); + let buf = std::fs::read(path)?; + let mut key = "".to_string(); + let mut program = FuzzProgram::default(); + let mut id: usize = 0; + let mut index: usize = 0; + let mut hash: u64 = 0; + for line in buf.lines() { + let line = line?; + if line.is_empty() || line.starts_with("****") { + continue; + } + if let Some(found_key) = line.strip_prefix("KEY: ") { + key = found_key.trim().to_string(); + eff.arg_list.insert(key.clone(), vec![]); + continue; + } + let mut de = Deserializer::new(&line, Some(&mut program)); + if de.strip_token("ID: ") { + id = de.parse_number()?; + de.eat_token(", Index: ")?; + index = de.parse_number()?; + de.eat_token(", Hash: ")?; + hash = de.parse_number()?; + continue; + } + de.canary = false; + if de.strip_token("------") { + let stmts = std::mem::take(&mut program.stmts); + program.clear_tmp_indices()?; + program.stmts.clear(); + if let Some(list) = eff.arg_list.get_mut(&key) { + let arg = EffectiveArg { + program_id: id, + stmt_index: index, + stmts, + hash, + }; + list.push(arg); + } + // crate::log!(trace, "add new arg"); + continue; + } + // crate::log!(trace, "line: {}", de.buf); + if de.strip_token("vec(") { + let _ = de.next_token_until("["); + } + if de.strip_token(", ") { + continue; + } + let is = IndexedStmt::deserialize(&mut de)?; + program.stmts.push(is); + } + Ok(()) + }) +} + +#[test] +fn test_effective_num() { + use crate::test; + let arg_ident = "a"; + let arg_type = "u8"; + let mut tmp_p = FuzzProgram::default(); + let arg = test::generate_load_stmt::(arg_ident, arg_type); + let arg_val = arg.serialize().unwrap(); + tmp_p.append_stmt(arg); + println!("*arg: {}", tmp_p.serialize().unwrap()); + EFFECT.with(|eff| { + eff.borrow_mut() + .collect_effective_arg(tmp_p.stmts, 1, 0, arg_ident, arg_type) + .unwrap(); + }); + let mut call_p = FuzzProgram::generate_program_for_func("func_add").unwrap(); + println!("*before: {}", call_p.serialize().unwrap()); + if let FuzzStmt::Call(mut call) = call_p.stmts.last_mut().unwrap().stmt.lend() { + call.set_effective_ith_call_arg(&mut call_p, 0, 0).unwrap(); + } + + println!("*set: {}", call_p.serialize().unwrap()); + assert!( + call_p.stmts[0].stmt.serialize().unwrap() == arg_val + || call_p.stmts[1].stmt.serialize().unwrap() == arg_val + ); +} + +#[test] +fn test_effective_pointer() { + let arg_ident = "ptr"; + let arg_type = utils::pointer_type("u8", true); + let mut tmp_p = FuzzProgram::default(); + flag::set_pilot_det(true); + let arg = LoadStmt::generate_new(&mut tmp_p, &arg_type, arg_ident, 0).unwrap(); + flag::set_pilot_det(false); + let arg_val = tmp_p.stmts[0].stmt.serialize().unwrap(); + tmp_p.append_stmt(arg); + println!("arg: {}", tmp_p.serialize().unwrap()); + EFFECT.with(|eff| { + eff.borrow_mut() + .collect_effective_arg(tmp_p.stmts, 1, 0, arg_ident, &arg_type) + .unwrap(); + }); + let mut call_p = FuzzProgram::generate_program_for_func("test_arr").unwrap(); + println!("p: {}", call_p.serialize().unwrap()); + if let FuzzStmt::Call(mut call) = call_p.stmts.last_mut().unwrap().stmt.lend() { + println!("try effective arg"); + call.set_effective_ith_call_arg(&mut call_p, 0, 0).unwrap(); + } + + println!("set: {}", call_p.serialize().unwrap()); + + assert!( + call_p.stmts[0].stmt.serialize().unwrap() == arg_val + || call_p.stmts[1].stmt.serialize().unwrap() == arg_val + ); + + println!("test multiple ref case"); + // multiple ref argument + let mut call_p = FuzzProgram::generate_program_for_func("test_arr").unwrap(); + let mut call_p2 = FuzzProgram::generate_program_for_func("test_arr").unwrap(); + let index = call_p.stmts.len() - 1; + if let FuzzStmt::Call(call) = &mut call_p2.stmts.last_mut().unwrap().stmt { + call.args[0] = call_p.get_target_stmt().unwrap().args[0].use_index(); + } + for is in call_p2.stmts { + call_p.stmts.push(is); + } + call_p.resort_indices(); + // call_p.check_ref_use(); + println!("new call_p: {}", call_p.serialize().unwrap()); + if let FuzzStmt::Call(mut call) = call_p.stmts[index].stmt.lend() { + println!("try effective arg"); + call.set_effective_ith_call_arg(&mut call_p, 0, 0).unwrap(); + println!("call: {}", call.serialize().unwrap()); + let _index = call_p.withdraw_stmt(FuzzStmt::Call(call)).unwrap(); + } + println!("new set: {}", call_p.serialize().unwrap()); + + assert_eq!(call_p.get_target_stmt().unwrap().args[0].get_ref_used(), 3) +} diff --git a/hopper-core/src/fuzz/find.rs b/hopper-core/src/fuzz/find.rs new file mode 100644 index 0000000..2c89f0e --- /dev/null +++ b/hopper-core/src/fuzz/find.rs @@ -0,0 +1,113 @@ +//! Implement program's functions for finding something + +use crate::{log, runtime::*, fuzz::*}; + +impl FuzzProgram { + /// Get number's value by loc + pub fn find_number_by_loc(&self, loc: Location) -> eyre::Result { + let op = MutateOperator::new(loc, MutateOperation::IntGet); + let mut ptmp = self.clone(); + ptmp.mutate_program_by_op(&op)?; + Ok(flag::get_tmp_u64()) + } + + /// Find length entry in all calls' arguments + pub fn find_stmt_loc_in_all_calls( + &self, + target_index: usize, + max: usize, + ) -> Option<(usize, &CallStmt, usize, LocFields)> { + for call_i in (0..=max).rev() { + if let FuzzStmt::Call(call_stmt) = &self.stmts[call_i].stmt { + if let Some((arg_pos, prefix)) = + self.find_stmt_loc_for_call(target_index, &call_stmt.args) + { + return Some((call_i, call_stmt, arg_pos, prefix)); + } + } + } + None + } + + /// Find stmt in call's arguments + pub fn find_stmt_loc_for_call( + &self, + target_index: usize, + call_args: &[StmtIndex], + ) -> Option<(usize, LocFields)> { + for (arg_pos, arg_stmt) in call_args.iter().enumerate() { + let field = LocFields::default(); + if arg_stmt.get() == target_index { + return Some((arg_pos, field)); + } + let ret = self.find_stmt_loc_for_stmt(target_index, arg_stmt, &field); + if let Some(f) = ret { + return Some((arg_pos, f)); + } + } + None + } + + /// Return the fields that how to get the `target_index` inside `root_stmt` + fn find_stmt_loc_for_stmt( + &self, + target_index: usize, + root_stmt: &StmtIndex, + prefix: &LocFields, + ) -> Option { + match &self.stmts[root_stmt.get()].stmt { + FuzzStmt::Load(load) => { + let mut st = vec![&load.state]; + while let Some(s) = st.pop() { + if let Some(ptee_stmt) = s.get_pointer_stmt_index() { + let mut sub_prefix = prefix.with_suffix(s.get_location_fields()); + sub_prefix.push(FieldKey::Pointer); + if ptee_stmt.get() == target_index { + return Some(sub_prefix); + } + let ret = self.find_stmt_loc_for_stmt( + target_index, + ptee_stmt, + &sub_prefix, + ); + if ret.is_some() { + return ret; + } + } + for sub_state in s.children.iter() { + st.push(sub_state); + // only consider first element for sequence + if sub_state.children.is_empty() { + if let FieldKey::Index(_) = sub_state.key { + break; + } + } + } + } + } + FuzzStmt::Call(_call_stmt) => { + for is in &self.stmts[root_stmt.get()..] { + if let FuzzStmt::Update(update_stmt) = &is.stmt { + if let Some(dst_index) = update_stmt.dst.stmt_index.as_ref() { + if update_stmt.src.get() == target_index + && dst_index.get_uniq() == root_stmt.get_uniq() + { + let mut dst_fields = update_stmt.dst.fields.clone(); + // remove duplicated pointer fieldkey + dst_fields.strip_pointer_suffix(); + if dst_fields.list.first() == Some(&FieldKey::Pointer) { + dst_fields.pop(); + } + let sub_prefix = prefix.with_suffix(dst_fields); + log!(trace, "update prefix: {sub_prefix:?}"); + return Some(sub_prefix); + } + } + } + } + } + _ => {} + } + None + } +} diff --git a/hopper-core/src/fuzz/flag.rs b/hopper-core/src/fuzz/flag.rs new file mode 100644 index 0000000..8eaa46b --- /dev/null +++ b/hopper-core/src/fuzz/flag.rs @@ -0,0 +1,173 @@ +use super::rng::*; +use std::cell::Cell; + +thread_local! { + // Deterministic generation in pilot phase + pub static PILOT_DET: Cell = Cell::new(false); + // Generate only single call + pub static SINGLE_CALL: Cell = Cell::new(true); + // Reuse statement in generation + pub static REUSE_STMT: Cell = Cell::new(false); + // Only mutate argument inputs. + pub static INPUT_ONLY: Cell = Cell::new(false); + // Refine successful or not + pub static REFINE_SUC: Cell = Cell::new(false); + // Mutate pointer or not + pub static MUTATE_PTR: Cell = Cell::new(false); + // deterministic mutation for call + pub static CALL_DET: Cell = Cell::new(false); + // Generate an incomplete program + pub static INCOMPLETE_GEN: Cell = Cell::new(false); + // Running in pilot infer phase + pub static PILOT_INFER: Cell = Cell::new(false); + // u64 temp value + pub static TMP_U64: Cell = Cell::new(0); +} + +pub fn is_pilot_det() -> bool { + PILOT_DET.with(|c| c.get()) +} + +pub fn set_pilot_det(flag: bool) -> bool { + PILOT_DET.with(|c| c.replace(flag)) +} + +pub fn is_single_call() -> bool { + SINGLE_CALL.with(|c| c.get()) +} + +pub fn set_single_call(flag: bool) -> bool { + SINGLE_CALL.with(|c| c.replace(flag)) +} + +pub fn is_reuse_stmt() -> bool { + REUSE_STMT.with(|c| c.get()) +} + +pub fn set_reuse_stmt(flag: bool) -> bool { + REUSE_STMT.with(|c| c.replace(flag)) +} + +pub fn is_input_only() -> bool { + INPUT_ONLY.with(|c| c.get()) +} + +pub fn set_input_only(flag: bool) -> bool { + INPUT_ONLY.with(|c| c.replace(flag)) +} + +pub fn is_refine_suc() -> bool { + REFINE_SUC.with(|c| c.get()) +} + +pub fn set_refine_suc(flag: bool) -> bool { + REFINE_SUC.with(|c| c.replace(flag)) +} + +pub fn is_mutate_ptr() -> bool { + MUTATE_PTR.with(|c| c.get()) +} + +pub fn set_mutate_ptr(flag: bool) -> bool { + MUTATE_PTR.with(|c| c.replace(flag)) +} + +pub fn is_incomplete_gen() -> bool { + INCOMPLETE_GEN.with(|c| c.get()) +} + +pub fn set_incomplete_gen(flag: bool) -> bool { + INCOMPLETE_GEN.with(|c| c.replace(flag)) +} + +pub fn is_pilot_infer() -> bool { + PILOT_INFER.with(|c| c.get()) +} + +pub fn set_pilot_infer(flag: bool) -> bool { + PILOT_INFER.with(|c| c.replace(flag)) +} + +pub fn set_tmp_u64(val: u64) { + TMP_U64.with(|c| c.replace(val)); +} + +pub fn get_tmp_u64() -> u64 { + TMP_U64.with(|c| c.get()) +} + +/// Enable call's deterministic mutation +pub fn enable_call_det() { + CALL_DET.with(|c| { + if c.get() && std::env::var("DISABLE_CALL_DET").is_err() { + crate::log!(info, "start enable call det!"); + c.replace(true); + } + }); +} + +pub fn disable_call_det() { + CALL_DET.with(|c| c.replace(false)); +} + +pub fn is_call_det() -> bool { + CALL_DET.with(|c| c.get()) +} + +/// decide whether to use call or not +/// type_name is inner type of pointer +pub fn use_call(inner_type: &str, is_opaque: bool, depth: usize) -> bool { + if depth >= crate::config::MAX_DEPTH { + return false; + } + if is_opaque { + return is_pilot_det() || mostly(); + } + if is_single_call() { + return false; + } + // de-prioritize primitive pointer + if crate::utils::is_primitive_type(inner_type) { + return rarely(); + } + unlikely() +} + +pub fn get_mutate_flag() -> u8 { + let mut flag: u8 = 0; + if is_pilot_det() { + flag |= 1; + } + if is_single_call() { + flag |= 2; + } + if is_reuse_stmt() { + flag |= 4; + } + flag +} + +pub fn set_mutate_flag(flag: u8) { + set_pilot_det(flag & 1 > 0); + set_single_call(flag & 2 > 0); + set_reuse_stmt(flag & 4 > 0); +} + +pub struct ReuseStmtGuard { + cur: bool, +} + +impl ReuseStmtGuard { + // disable reuse stmt in the scope + pub fn temp_disable() -> Self { + REUSE_STMT.with(|cell| { + Self { cur: cell.replace(false) } + }) + } +} + +impl Drop for ReuseStmtGuard { + fn drop(&mut self) { + set_reuse_stmt(self.cur); + } +} diff --git a/hopper-core/src/fuzz/generate.rs b/hopper-core/src/fuzz/generate.rs new file mode 100644 index 0000000..3078a62 --- /dev/null +++ b/hopper-core/src/fuzz/generate.rs @@ -0,0 +1,143 @@ +use eyre::Context; + +use crate::{fuzz::rng, runtime::*, utils}; + +impl FuzzProgram { + /// Generate random program, + pub fn generate_program( + candidates: Option<&Vec>, + enable_fail: bool, + ) -> eyre::Result { + let f_name = if let Some(f_name) = crate::config::get_config().func_target { + f_name + } else { + // Choose function target from gadgets with the pattern defined in configuration. + // Even the functions are marked as can_success:false, we still generate program for them. + // We hope that those generation found some success programs. + let f = |f_name| { + if enable_fail { + crate::filter_function_constraint_with(f_name, |fc| !fc.internal) + } else { + crate::filter_function(f_name) + } + }; + if let Some(list) = candidates { + rng::choose_iter(list.iter().filter(|f_name| f(f_name))).ok_or_else(|| { + eyre::eyre!("function gadget is empty with candidates {:?}", list) + })? + } else { + rng::choose_iter( + global_gadgets::get_instance() + .functions + .keys() + .filter(|f_name| f(f_name)), + ) + .map(|f_name| f_name.as_str()) + .ok_or_else(|| eyre::eyre!("function gadget is empty"))? + } + }; + + Self::generate_program_for_func(f_name) + } + + /// Generate random program for function `f` + pub fn generate_program_for_func_randomly(f_name: &str) -> eyre::Result { + // create an empty program + let mut program: FuzzProgram = Default::default(); + program.save_mutate_state(); + let mut call = CallStmt::generate_new(&mut program, CallStmt::TARGET, f_name, 0) + .with_context(|| format!("fail to generate call `{f_name}`"))?; + // only track target function + call.track_cov = true; + let _stmt = program.append_stmt(call); + program.check_ref_use()?; + program + .refine_program() + .with_context(|| program.serialize_all().unwrap())?; + Ok(program) + } + + /// Generate program for function `f` + pub fn generate_program_for_func(f_name: &str) -> eyre::Result { + #[cfg(feature = "slices")] + if let Some(p) = Self::generate_program_for_func_by_slices(f_name)? { + return Ok(p); + } + Self::generate_program_for_func_randomly(f_name) + } +} + +/// Choose any function that provide specific type. +pub fn find_func_with_return_type(type_name: &str, alias_type_name: &str) -> Option<&'static str> { + let gadgets = global_gadgets::get_instance(); + crate::log!( + trace, + "find return for type `{type_name} / {alias_type_name}`" + ); + + let is_void_ptr = utils::is_void_pointer(type_name); + // stupid case: alias void type to another type named void + if is_void_ptr && (alias_type_name.contains("void") || alias_type_name.contains("Void")) { + return None; + } + // try alias type first + // try different pointers for inner type + if let Some(inner) = utils::get_pointer_inner(alias_type_name) { + let mut_ptr = utils::mut_pointer_type(inner); + let mut_iter: &[&str] = gadgets + .ret_graph + .get(mut_ptr.as_str()) + .map_or(&[], |l| l.as_slice()); + let const_ptr = utils::const_pointer_type(inner); + let const_iter: &[&str] = gadgets + .ret_graph + .get(const_ptr.as_str()) + .map_or(&[], |l| l.as_slice()); + let iter = mut_iter.iter().chain(const_iter); + return choose_provider(iter); + } + + // alias type may not starts with `FuzzMutPointer` or `FuzzConstPointer`. + // alias type is a pointer itself. + if let Some(fs) = gadgets.ret_graph.get(alias_type_name) { + let iter = fs.iter(); + return choose_provider(iter); + } + + // try type_name if it is not void pointer + if is_void_ptr { + return None; + } + if let Some(inner) = utils::get_pointer_inner(type_name) { + let mut_ptr = utils::mut_pointer_type(inner); + let mut_iter: &[&str] = gadgets + .ret_graph + .get(mut_ptr.as_str()) + .map_or(&[], |l| l.as_slice()); + let const_ptr = utils::const_pointer_type(inner); + let const_iter: &[&str] = gadgets + .ret_graph + .get(const_ptr.as_str()) + .map_or(&[], |l| l.as_slice()); + let iter = mut_iter.iter().chain(const_iter); + return choose_provider(iter); + } + + None +} + +/// Choose one item randomly in providers +fn choose_provider<'a, I>(iter: I) -> Option<&'static str> +where + I: Iterator + Clone + std::fmt::Debug, +{ + let iter = iter.filter(|f_name| { + crate::filter_function_constraint_with(f_name, |fc| fc.can_used_as_arg()) + }); + if let Some(provider) = rng::choose_iter(iter) { + crate::log!(trace, "choose provider: {provider}"); + return Some(provider); + } + crate::log!(trace, "can not find any provider"); + None +} diff --git a/hopper-core/src/fuzz/infer/array.rs b/hopper-core/src/fuzz/infer/array.rs new file mode 100644 index 0000000..dfed3ae --- /dev/null +++ b/hopper-core/src/fuzz/infer/array.rs @@ -0,0 +1,117 @@ +//! Infer array's minimal length. +//! +//! Some APIs assume that the arrays referenced by the pointers have sufficient elements rather than asking for +//! arguments indicating boundaries. +//! +//! We attempt to pad the arrays in the arguments to a specific length K (e.g., 64) to see whether it resolves the crash. +//! If so, an ARRAY-LEN constraint is added to ensure that this array is at least K bytes. + +use crate::{fuzz::*, fuzzer::*, runtime::*, CrashSig}; + +impl Fuzzer { + /// Infer array's implicit length by padding + pub fn infer_array_length( + &mut self, + program: &FuzzProgram, + fail_at: usize, + crash_sig: &CrashSig, + ) -> eyre::Result> { + let Some(canary_info) = crash_sig.get_canary_info() else { + return Ok(None); + }; + let Some((_call_i, call_stmt, arg_pos, fields)) = + program.find_stmt_loc_in_all_calls(canary_info.stmt_index, fail_at) + else { + return Ok(None); + }; + let stmt_index = &call_stmt.args[arg_pos]; + crate::log_trace!( + "try to infer array length, array loc is arg_pos: {arg_pos} fields: {fields:?}" + ); + let loc = fields.to_loc_for_refining(program, stmt_index, &LocFields::default()); + if loc.is_none() { + crate::log!(trace, "can't find loc!"); + return Ok(None); + } + let loc = loc.unwrap(); + let mut paddings = vec![4, 64, 128, 256, 512, 1024, 4096]; + // if it is not an array of primitive types + if let FuzzStmt::Load(load) = &program.stmts[canary_info.stmt_index].stmt { + if let Some(ch) = load.state.children.first() { + if !ch.children.is_empty() { + paddings = vec![4, 8, 16, 32]; + } + } + } + for pad_size in paddings { + if canary_info.len >= pad_size { + continue; + } + let rng_state = rng::gen_rng_state(); + let pad_op = MutateOperator::new( + loc.clone(), + MutateOperation::VecPad { + len: pad_size, + zero: false, + rng_state, + }, + ); + let _status = self.execute_with_op(program, &pad_op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + continue; + } + // we should verify it! + // check all numbers, try to set them to big values + let mut refined_p = program.clone(); + refined_p.mutate_program_by_op(&pad_op)?; + for is in &program.stmts { + if is.index.get() == fail_at { + break; + } + let FuzzStmt::Load(load) = &is.stmt else { + continue; + }; + let num_fields = load + .state + .find_fields_with(|s| crate::utils::is_index_or_length_number(s.ty), true); + for f in num_fields { + let num_loc = Location::new(stmt_index.use_index(), f.clone()); + let num_op = MutateOperator::new( + num_loc.clone(), + MutateOperation::IntSet { val: 10000.into() }, + ); + let _status = self.execute_with_op(&refined_p, &num_op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + return Ok(None); + } + } + } + // random mutate other data.. + for _ in 0..256 { + let mut rand_p = refined_p.clone(); + rand_p.mutate_program_inputs()?; + let _status = self.executor.execute_program(&rand_p)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + return Ok(None); + } + } + let f_name = call_stmt.fg.f_name; + let constraint = Constraint::ArrayLength { + len: pad_size.into(), + }; + // padding may luckily avoid the crash but not solve the constraints! + let new_c = add_function_constraint( + f_name, + arg_pos, + fields, + constraint, + &format!( + "array should have enough length from crash {} #bug", + program.id + ), + )?; + return Ok(new_c); + } + Ok(None) + } +} diff --git a/hopper-core/src/fuzz/infer/cast.rs b/hopper-core/src/fuzz/infer/cast.rs new file mode 100644 index 0000000..5da236d --- /dev/null +++ b/hopper-core/src/fuzz/infer/cast.rs @@ -0,0 +1,103 @@ +//! Infer cast constraint. +//! +//! Due to missing layout information of the void type, developers have to generate objects with +//! concrete types and cast their references to the void pointers. +//! +//! We assume that their pointees do not contain any pointer. +//! Therefore, they can be cast from a large enough random byte array, and we add CAST constraints +//! that treat them as char* type. +//! +//! For other illegal access, if there are CAST constraints for the arguments, we tries to mutate +//! the byte array pointed to by the arguments. If the illegal address varies with the mutated bytes, +//! the void pointer may be interpreted as a structure containing pointers. +//! Thus, We remove the CAST constraints with char* type. + +use crate::{fuzz::*, fuzzer::*, runtime::*, CrashSig}; + +impl Fuzzer { + /// Infer if the void type is casted to a concrete type that contain pointers. + /// If so, we can't use a huge byte array to interpret the void object. + pub fn infer_void_cast( + &mut self, + program: &FuzzProgram, + fail_at: usize, + crash_sig: &CrashSig, + ) -> eyre::Result { + let Some(call) = program.get_call_stmt(fail_at) else { + return Ok(false); + }; + let f_name = call.fg.f_name; + + // find all existing casts + let mut casts = vec![]; + let prefix = LocFields::default(); + inspect_function_constraint_with(f_name, |fc| { + for (arg_pos, tc) in fc.arg_constraints.iter().enumerate() { + if tc.list.is_empty() { + continue; + } + for c in tc.list.iter() { + if c.constraint.is_void_cast() { + casts.push((arg_pos, c.key.clone())); + } + } + } + Ok(()) + })?; + + for (arg_pos, fields) in casts { + crate::log_trace!("try to remove void cast: `{f_name}`, {arg_pos}, {fields:?}"); + let stmt_index = &call.args[arg_pos]; + let mut ptr_fields = fields.clone(); + ptr_fields.push(FieldKey::Pointer); + let loc = ptr_fields.to_loc_for_refining(program, stmt_index, &prefix); + if loc.is_none() { + crate::log!(trace, "can't find loc: {loc:?}"); + continue; + } + let loc = loc.unwrap(); + let FuzzStmt::Load(load) = &program.stmts[loc.get_index()?.get()].stmt else { + continue; + }; + let len = load.value.get_length(); + crate::log!(trace, "load : {}, length: {len}", load.value.serialize()?); + + for i in 0..len { + // if the pointer address > maximal address in x86, si_addr becomes 0. + // so we set word to zero instead of single byte. + let mut key = Location::null(); + key.fields.push(FieldKey::Index(i)); + let set_word_zero = MutateOperation::BufHavoc { + use_bytes: 2, + swap: false, + op: Box::new(MutateOperator::new( + key, + MutateOperation::IntSet { val: 0.into() }, + )), + }; + let op = MutateOperator::new(loc.clone(), set_word_zero); + let _status = self.execute_with_op(program, &op, false)?; + let cur_crash_sig = crate::get_crash_sig(None); + if cur_crash_sig.is_none() { + continue; + } + let cur_crash_sig = cur_crash_sig.unwrap(); + crate::log!(trace, "sig: {crash_sig:?} vs {cur_crash_sig:?}"); + if cur_crash_sig.rip == crash_sig.rip && cur_crash_sig.addr != crash_sig.addr { + // address changes, remove void constriant + log_new_constraint(&format!( + "remove void constraint for `{f_name}`, {arg_pos}, {fields:?}" + )); + inspect_function_constraint_mut_with(f_name, |fc| { + let tc = &mut fc.arg_constraints[arg_pos]; + tc.list + .retain(|c| !(c.key == fields && c.constraint.is_void_cast())); + Ok(()) + })?; + return Ok(true); + } + } + } + Ok(false) + } +} diff --git a/hopper-core/src/fuzz/infer/context.rs b/hopper-core/src/fuzz/infer/context.rs new file mode 100644 index 0000000..91c3304 --- /dev/null +++ b/hopper-core/src/fuzz/infer/context.rs @@ -0,0 +1,75 @@ +//! Infer context that must not been used +//! e.g API A must not be called before API B. + +use eyre::ContextCompat; + +use crate::{fuzz::*, fuzzer::*, runtime::*}; + +impl Fuzzer { + /// Infer that the crash is due to certain relative/implicit calls + pub fn infer_broken_contexts( + &mut self, + program: &FuzzProgram, + ) -> eyre::Result> { + let fail_at = program + .get_fail_stmt_index() + .context("fail to get fail index")? + .get(); + let target_call = if let Some(crash_func) = program.get_call_stmt(fail_at) { + crash_func + } else { + return Ok(None); + }; + crate::log!(trace, "start infer broken contexts"); + for index in (0..fail_at).rev() { + let is = &program.stmts[index]; + let FuzzStmt::Call(call) = &is.stmt else { + continue; + }; + // we only consider impicit contexts + if !call.is_implicit() { + // call.is_relative() + continue; + } + let mut p = program.clone(); + p.delete_stmt(index); + p.eliminate_invalidatd_contexts(); + crate::log!( + trace, + "remove {}, program is: {}", + is.index.get(), + p.serialize()? + ); + let status = self.executor.execute_program(&p)?; + if !status.is_normal() { + continue; + } + let target_f_name = target_call.fg.f_name; + let call_f_name = call.fg.f_name; + let context = CallContext { + f_name: call_f_name.to_string(), + related_arg_pos: target_call.has_overlop_arg(program, call), + kind: ContextKind::Forbidden, + }; + crate::log!(trace, "function {call_f_name} is likely to broken context"); + if self.observer.op_stat.count_func_infer(call_f_name, program) { + crate::inspect_function_constraint_mut_with(target_f_name, |fc| { + fc.contexts.push(context.clone()); + log_new_constraint(&format!( + "add context on function `{target_f_name}`: {context:?}" + )); + Ok(()) + })?; + } + // just for hints + let sig = ConstraintSig { + f_name: target_f_name.to_string(), + arg_pos: 0, + fields: LocFields::default(), + constraint: Constraint::Context { context }, + }; + return Ok(Some(sig)); + } + Ok(None) + } +} diff --git a/hopper-core/src/fuzz/infer/file.rs b/hopper-core/src/fuzz/infer/file.rs new file mode 100644 index 0000000..d32f59c --- /dev/null +++ b/hopper-core/src/fuzz/infer/file.rs @@ -0,0 +1,144 @@ +//! Infer file constraint. +//! +//! When an API function reads from or writes to a file, the file name (and FD) provided as an argument must be valid. +//! If the file name is randomly generated, the API call may terminate early, or it could mess up the disk if +//! used as an output stream. +//! +//! When new paths are explored by inputs, we check to see if any file open function (e.g., \verb|fopen|) +//! has been triggered, and compares the file name with the arguments used to invoke the API. +//! If there is a match, a FILE constraint is created for the corresponding argument. +//! +//! We also infer if an integer is a FD. + +use std::collections::HashMap; + +use eyre::ContextCompat; + +use crate::{config, fuzz::*, fuzzer::*, runtime::*, utils, ReviewResult}; + +impl ReviewResult { + /// Infer file name by review's memory records + pub fn infer_file_name(&self, program: &FuzzProgram) -> eyre::Result { + let mut found_file = false; + for rec in &self.mem_records { + let ty = crate::get_mem_type(rec.ty); + if ty != crate::MemType::Open { + continue; + } + let Some(stmt_i) = rec.loc.stmt_index.as_ref() else { + continue; + }; + crate::log!(trace, "find file record: {:?}", rec); + for call_i in (0..=rec.call_index).rev() { + let FuzzStmt::Call(call_stmt) = &program.stmts[call_i].stmt else { + continue; + }; + let Some((arg_pos, mut prefix)) = + program.find_stmt_loc_for_call(stmt_i.get(), &call_stmt.args) + else { + continue; + }; + crate::log_trace!( + "find target stmt in call: {call_i}, arg_pos: {arg_pos}, field: {prefix:?}" + ); + if !rec.loc.fields.is_empty() { + prefix.list.extend(rec.loc.fields.list.clone()); + } + if !prefix.strip_pointer_suffix() && !prefix.list.is_empty() { + crate::log!( + warn, + "file is not a pointer, stmt: {stmt_i}, loc: {}", + rec.loc.serialize()? + ); + continue; + } + found_file = true; + let call_name = call_stmt.fg.f_name; + crate::add_function_constraint( + call_name, + arg_pos, + prefix, + crate::Constraint::File { + read: rec.mode == 1, + is_fd: false, + }, + &format!( + "found {call_name}'s arg is file by review in seed {}", + program.id + ), + )?; + break; + } + } + Ok(found_file) + } +} + +impl Fuzzer { + /// Infer an integer is a FD by mem records merely + pub fn infer_file_fd(&mut self, program: &FuzzProgram) -> eyre::Result> { + let fd_list = self.observer.feedback.instrs.get_fd_list(); + let mut used_reserved_fd = HashMap::new(); + for (fd, fd_read) in fd_list { + if is_valid_fd(fd) { + continue; + } + used_reserved_fd + .entry(fd) + .and_modify(|read| *read |= fd_read) + .or_insert(fd_read); + } + if used_reserved_fd.is_empty() { + return Ok(None); + } + let max = program.get_target_index().context("has target index")?; + for is in &program.stmts { + let stmt_index = &is.index; + let FuzzStmt::Load(load) = &is.stmt else { + continue; + }; + let Some((_call_i, call_stmt, arg_pos, prefix)) = + program.find_stmt_loc_in_all_calls(stmt_index.get(), max) + else { + continue; + }; + let num_fields = load + .state + .find_fields_with(|s| utils::is_index_or_length_number(s.ty), true); + for f in num_fields { + let loc: Location = Location::new(stmt_index.use_index(), f.clone()); + let val = program.find_number_by_loc(loc.clone())? as i32; + if is_valid_fd(val) { + continue; + } + if let Some(read) = used_reserved_fd.get(&val) { + let fd2 = 9123; // a magic number + let op = MutateOperator::new(loc, MutateOperation::IntSet { val: fd2.into() }); + let _status = self.execute_with_op(program, &op, false)?; + let (is_fd, _is_fd_read) = self.observer.contain_fd(fd2); + if is_fd { + let full_f = prefix.with_suffix(f); + return add_function_constraint( + call_stmt.fg.f_name, + arg_pos, + full_f, + Constraint::File { + read: *read, + is_fd: true, + }, + &format!("infer file fd by magic number from seed: {}", program.id), + ); + } + } + } + } + Ok(None) + } +} + +/// Check if it is a valid fd +pub fn is_valid_fd(fd: i32) -> bool { + (-1..config::RESERVED_FD_MIN).contains(&fd) + || (config::RESERVED_FD_MAX..config::RESERVED_FD_HUGE).contains(&fd) + // (fd >= -1 && fd < RESERVED_FD_MIN) || (fd > RESERVED_FD_MAX && fd < RESERVED_FD_HUGE) +} diff --git a/hopper-core/src/fuzz/infer/length.rs b/hopper-core/src/fuzz/infer/length.rs new file mode 100644 index 0000000..bf2fd53 --- /dev/null +++ b/hopper-core/src/fuzz/infer/length.rs @@ -0,0 +1,727 @@ +//! Infer length related constraints (including Set and Range). +//! +//! APIs that use a number in the arguments to designate the boundary or index of an array pointer may suffer from +//! overflow errors if the number is incorrect. +//! +//! If the crash is caused by accessing a canary appended right after an array ($si\_addr$ is in the range of canary), +//! we try to figure out whether there is a length or index of a variable-sized array in the arguments. +//! Firstly, we locate which array has been overflowed. We denote the array's length as $N$. For each numerical value +//! in the call's arguments, we attempt to set it as $N-1$, $N$, and $N+1$, respectively. +//! If both $N$ and $N+1$ lead to a crash by accessing the canary, we add a RANGE constraint to set the value within +//! a range of $[0, N)$. +//! If only $N+1$ makes a crash, an EQUAL constraint is added to set the value to be the same as the array's length. + +use std::cmp::Ordering; + +use crate::{fuzz::*, fuzzer::*, log, log_trace, runtime::*, utils, CrashSig}; + +impl Fuzzer { + /// Try to infer each number in pilot phase + pub fn pilot_infer_number_length_and_resource( + &mut self, + program: &FuzzProgram, + stmt_index: &StmtIndex, + load_state: &ObjectState, + call_stmt: &CallStmt, + arg_pos: usize, + prefix: &LocFields, + ) -> eyre::Result<()> { + // numbers include values whose types are: u16/i16/u32/i32/u64/i64 + let num_fields = + load_state.find_fields_with(|s| utils::is_index_or_length_number(s.ty), false); + log!( + trace, + "start infer numbers for stmt {:?} at function `{}`, #num_fields: {}", + stmt_index.get(), + call_stmt.fg.f_name, + num_fields.len() + ); + for f in num_fields { + // Infer pointer related constraints, e.g index of array, or length of array + if self + .infer_number_length(program, call_stmt, stmt_index, arg_pos, prefix, &f)? + .is_some() + { + continue; + } + + // Infer resource/loop-related to avoid huge numbers + let _ = self + .infer_resource_exhaustion(program, call_stmt, stmt_index, arg_pos, prefix, &f)?; + } + Ok(()) + } + + // Make huge number to be smaller to avoid overflowing to other canary + pub fn adjust_numbers_for_inference( + &mut self, + program: &mut FuzzProgram, + fail_at: usize, + crash_sig: &mut CrashSig, + ) -> eyre::Result<()> { + for stmt_i in 0..program.stmts.len() { + if stmt_i == fail_at { + break; + } + let FuzzStmt::Load(load) = &program.stmts[stmt_i].stmt else { + continue; + }; + let num_fields = load + .state + .find_fields_with(|s| utils::is_index_or_length_number(s.ty), false); + for f in num_fields { + let num_loc = Location::new(program.stmts[stmt_i].index.use_index(), f.clone()); + let val = program.find_number_by_loc(num_loc.clone())?; + if val < 4096 { + continue; + } + let op = MutateOperator::new(num_loc, MutateOperation::IntSet { val: 4096.into() }); + let _status = self.execute_with_op(program, &op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + program.mutate_program_by_op(&op)?; + if let Some(new_crash_sig) = crate::get_crash_sig(Some(program)) { + *crash_sig = new_crash_sig; + } + } + } + } + Ok(()) + } + + /// Infer number length constraints in an argument once overflow crash happens + pub fn crash_infer_number_length( + &mut self, + program: &FuzzProgram, + fail_at: usize, + crash_sig: &CrashSig, + ) -> eyre::Result> { + let mut p: FuzzProgram = program.clone(); + let targets = find_infer_targets(self, &mut p, fail_at, crash_sig)?; + for num_loc in targets { + let stmt_index = num_loc.stmt_index.as_ref().unwrap(); + let stmt_i = stmt_index.get(); + let Some((call_i, call_stmt, arg_pos, prefix)) = + p.find_stmt_loc_in_all_calls(stmt_i, fail_at) + else { + continue; + }; + let f = &num_loc.fields; + log_trace!("crash infer number length at load stmt: {stmt_i} for call {call_i}, location {arg_pos} - {f:?}"); + let found = self.infer_number_length(&p, call_stmt, stmt_index, arg_pos, &prefix, f)?; + if found.is_some() { + return Ok(found); + } + } + log_trace!("fail to infer any number length"); + Ok(None) + } + + /// Infer number's length related constraints, e.g index of array, or length of array + pub fn infer_number_length( + &mut self, + program: &FuzzProgram, + call_stmt: &CallStmt, + stmt_index: &StmtIndex, + arg_pos: usize, + prefix: &LocFields, + fields: &LocFields, + ) -> eyre::Result> { + let f_name = call_stmt.fg.f_name; + let num_loc = Location::new(stmt_index.use_index(), fields.clone()); + let mut op: MutateOperator = + MutateOperator::new(num_loc, MutateOperation::IntSet { val: 0.into() }); + + for sample in get_sample_list(program) { + // try to find a number that will overflow something + log!(trace, "try sample: {sample}"); + op.op = MutateOperation::IntSet { val: sample.into() }; + let _status = self.execute_with_op(program, &op, false)?; + let Some(crash_sig) = crate::get_crash_sig(Some(program)) else { + continue; + }; + log_trace!("program crash at sample: {sample}, segv_addr: {crash_sig:?}"); + + // if crash in accessing canary, + // try to find the corresponding pointer that the canary belongs to. + let Some(canary_info) = crash_sig.get_canary_info() else { + break; + }; + + let mut len = canary_info.len; + let mut is_len = true; + let mut p = program.clone(); + // try to find the length entry for the call + let Some((len_entry_arg_pos, mut len_entry_fields)) = + p.find_stmt_loc_for_call(canary_info.stmt_index, &call_stmt.args) + else { + log_trace!("can't find length entry, the canary is not in call's arguments"); + break; + }; + // remove _pointer_ suffix, since length(xx) assume xx is a pointer instead of vector + len_entry_fields.strip_pointer_suffix(); + log_trace!("len_entry: {len_entry_arg_pos} - {len_entry_fields:?}"); + // avoid the array is too small. + if sample < len { + continue; + } + if sample > len + 1 { + // resize array's length to sample + // there are cases the sample is not len or len + 1. + // the program checks the length before use it, e.g if (K > 10) arr[K] + // so K can't be too samll, or K can be large than LEN + 1 + let resize_p = resize_array_length(&p, canary_info.stmt_index, len, sample)?; + op.op = MutateOperation::IntSet { + val: (sample + 1).into(), + }; + let _ = self.execute_with_op(&resize_p, &op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "resize array length to: {}", sample); + len = sample; + p = resize_p; + } + } + + // 1. run with boundary that must not overflow + op.op = MutateOperation::IntSet { + val: (len - 1).into(), + }; + let status1 = self.execute_with_op(&p, &op, false)?; + log!(trace, "N-1 status: {status1:?}"); + let mut coef = 1; + if crash_sig.is_overflow_at_same_rip_or_canary() { + // since we have checked the current val is 0 and 1 (it success), + // the number X may determine the path that reach the crash, e.g if X > N { a[Y] }, + // where N >= 1 + // or X is one of the factor of a length. e.g len = X * Y or len = c * X (c is a constant). + log!(trace, "crash at N-1, try to infer COEF"); + if let Some((new_p, infered_coef, new_len)) = + infer_coef(self, &p, &mut op, &crash_sig, canary_info, len)? + { + p = new_p; + coef = infered_coef; + len = new_len; + } else { + log_trace!("skip this round!"); + break; + } + } + + // 2. run with boundary index that may overflow (used as index) + // let bound = num::Integer::div_ceil(&len, &coef); + let bound = len / coef; + op.op = MutateOperation::IntSet { val: bound.into() }; + let status2 = self.execute_with_op(&p, &op, false)?; + log!(trace, "N status: {status2:?}"); + if crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "it is used as index instead of length"); + is_len = false; + } + + // 3. run with boundary index that must overflow (used as length) + op.op = MutateOperation::IntSet { + val: (bound + 1).into(), + }; + let status3 = self.execute_with_op(&p, &op, false)?; + log!(trace, "N+1 status: {status3:?}"); + if !crash_sig.is_overflow_at_same_rip_or_canary() { + // may appear in some special case, e.g. adding length checking before use it. + // e.g. 1 page size for decode data_sz in libaom + // otherwise, increase the length. + if !veirfy_overflow_with_offsets( + self, + &p, + &mut op, + &crash_sig, + canary_info, + bound, + len, + coef, + &mut is_len, + )? { + crate::log!(trace, "can't not find any crash"); + // if it is not strictly related to the array, try next sample. + if coef > 1 { + crate::log!(trace, "continue sample"); + continue; + } + break; + } + } + + // add length factor as constraint + if coef > 1 { + let c = Constraint::LengthFactor { coef: coef as u64 }; + let _factor_constraint = add_function_constraint( + f_name, + len_entry_arg_pos, + len_entry_fields.clone(), + c, + &format!("infer number at {}", p.id), + )?; + } + + let mut full_f = prefix.with_suffix(fields.clone()); + // set &.$0 -> & + full_f.strip_index_suffix(); + let len_entry = IrEntry::Length { + arg_pos: Some(len_entry_arg_pos), + fields: len_entry_fields.clone(), + is_factor: coef > 1, + }; + let c = if is_len { + // if the number is both the length of arrary A and B, + // then A and B must have the same length. + if let Some(sig) = add_constraint_to_array( + &p, + f_name, + arg_pos, + &full_f, + len_entry_arg_pos, + &len_entry_fields, + )? { + return Ok(Some(sig)); + } + Constraint::should_be(len_entry) + } else { + Constraint::less_than(len_entry) + }; + let new_constraint = add_function_constraint( + f_name, + arg_pos, + full_f, + c.clone(), + &format!("infer number length at {}", p.id), + )?; + infer_factors(self, &p, &crash_sig, f_name, &c)?; + return Ok(new_constraint); + } + Ok(None) + } +} + +/// Get existing length's location +fn find_existing_length_constraint_locations( + program: &FuzzProgram, + fail_at: usize, + crash_sig: &CrashSig, +) -> eyre::Result> { + let Some(canary_info) = crash_sig.get_canary_info() else { + return Ok(vec![]); + }; + let Some((_call_i, call_stmt, arr_arg_pos, mut arr_fields)) = + program.find_stmt_loc_in_all_calls(canary_info.stmt_index, fail_at) + else { + return Ok(vec![]); + }; + + let f_name = call_stmt.fg.f_name; + log!(trace, "try to find existing length constraint in {f_name}"); + arr_fields.strip_pointer_suffix(); + log!(trace, "array loc: {arr_arg_pos} - {arr_fields:?}"); + // if there are some length or range constraints on the array + crate::inspect_function_constraint_with(f_name, |fc| { + let mut locs = vec![]; + for (arg_i, cs) in fc.arg_constraints.iter().enumerate() { + for c in cs.list.iter() { + let Some((arg_pos, fields)) = c.constraint.get_length_loc() else { + continue; + }; + // If exsting length constraints can not satisfy the usage of the array, + // there are some other missing constraints, we try to set c.key to 1. + if arg_pos == arr_arg_pos && fields == &arr_fields { + let arg_stmt = &call_stmt.args[arg_i]; + if let Some(num_loc) = + c.key + .to_loc_for_refining(program, arg_stmt, &LocFields::default()) + { + log!(trace, "existing length constraint at: {num_loc:?}"); + locs.push(num_loc); + } + } + } + } + Ok(locs) + }) +} + +/// Find targets for infering length or index +fn find_infer_targets( + fuzzer: &mut Fuzzer, + program: &mut FuzzProgram, + fail_at: usize, + crash_sig: &CrashSig, +) -> eyre::Result> { + let existings = find_existing_length_constraint_locations(program, fail_at, crash_sig)?; + // Make exsting length constraints to be 1 to avoid overflow + // ATTN: but it can be normal after our refining + for num_loc in &existings { + let op = MutateOperator::new(num_loc.clone(), MutateOperation::IntSet { val: 1.into() }); + program.mutate_program_by_op(&op)?; + } + let mut targets = vec![]; + for stmt_i in 0..program.stmts.len() { + if stmt_i == fail_at { + break; + } + let FuzzStmt::Load(load) = &program.stmts[stmt_i].stmt else { + continue; + }; + let num_fields: Vec = load + .state + .find_fields_with(|s| utils::is_index_or_length_number(s.ty), false); + for f in num_fields { + // if the number fails at the same place when it is zero, then it is not length or index, + // special case: SIGFPE + let stmt_index = program.stmts[stmt_i].index.use_index(); + let num_loc = Location::new(stmt_index, f.clone()); + // skip those existings, and put them at the end of target list. + if existings.contains(&num_loc) { + continue; + } + let mut op = + MutateOperator::new(num_loc.clone(), MutateOperation::IntSet { val: 0.into() }); + let _status = fuzzer.execute_with_op(program, &op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + // log!(trace, "add assumption for length inference: {op:?}"); + continue; + } + log!(trace, "add loc into tragets"); + targets.push(num_loc.clone()); + + // magnify the number values. + op.op = MutateOperation::IntSet { val: 1.into() }; + let _status = fuzzer.execute_with_op(program, &op, false)?; + // if the number fails at the same place when it is one, + // indicates there are some other constraints producing the clash. + if crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "set number to be 1 for length inference: {op:?}"); + // we assume the value is 1 in this round of inference + program.mutate_program_by_op(&op)?; + continue; + } + // if it is a combined length, and now it is too small, + // if the number fails at the same place when it is a big value . + // we set it a big value (> length). + let mut len = 512; + if let Some(canary_info) = crash_sig.get_canary_info() { + if canary_info.len > 512 - 64 { + len = canary_info.len + 64; + } + } + let val = program.find_number_by_loc(num_loc.clone())?; + if val < len as u64 { + op.op = MutateOperation::IntSet { val: len.into() }; + let _status = fuzzer.execute_with_op(program, &op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + log!( + trace, + "set number to be >len for length inference: {}", + op.serialize().unwrap() + ); + program.mutate_program_by_op(&op)?; + } + } + } + } + // append existing + targets.extend(existings); + + Ok(targets) +} + +/// Get a list of numbers for sampling overflow +fn get_sample_list(program: &FuzzProgram) -> Vec { + let mut samples = vec![17, 33, 65, 129, 513, 4096]; + for is in &program.stmts { + if let FuzzStmt::Load(load) = &is.stmt { + let len = load.value.get_length(); + if len <= 1 { + continue; + } + samples.push(len + 1); + samples.push(len + 32); + } + } + samples.sort(); + samples.dedup(); + samples +} + +/// Infer coef of the length/index +fn infer_coef( + fuzzer: &mut Fuzzer, + program: &FuzzProgram, + op: &mut MutateOperator, + crash_sig: &CrashSig, + canary_info: &CanaryInfo, + len: usize, +) -> eyre::Result> { + // let mut last_bound = 0; + for k in 2..=16 { + let assumed_bound = len / k; + log!(trace, "k: {k}, assumed_bound: {assumed_bound}"); + if assumed_bound < 2 {// assumed_bound == last_bound + log_trace!("bound is too samll, skip"); + break; + } + // last_bound = assumed_bound; + op.op = MutateOperation::IntSet { + val: (assumed_bound - 1).into(), + }; + let _status = fuzzer.execute_with_op(program, op, false)?; + // N-1 should not overflow! + if crash_sig.is_overflow_at_same_rip_or_canary() { + continue; + } + // `k` does not overflow now + let assumed_coef = k; + log!(trace, "assumed_coef: {assumed_coef}"); + // verify the coef + // first we make the array's length to be multiple of coef, and check it + let rem = len % assumed_coef; + let (p, len) = if rem > 0 { + let resize_p = resize_array_length(program, canary_info.stmt_index, len, len - rem)?; + log_trace!("resize program to be multile of coef: {assumed_coef}, rem: {rem}"); + let _status = fuzzer.execute_with_op(&resize_p, op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + log_trace!("the COEF for length is wrong."); + continue; + } + (resize_p, len - rem) + } else { + (program.clone(), len) + }; + let mut verified = true; + for i in 2..5 { + // make the array's length to be `len * i`, and check `i * assumed_len -1 ` + let resize_p = resize_array_length(&p, canary_info.stmt_index, len, i * len)?; + log!(trace, "assume resize length to {}", len * i); + op.op = MutateOperation::IntSet { + val: (i * assumed_bound - 1).into(), + }; + let _status = fuzzer.execute_with_op(&resize_p, op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "assumed_len fail, the COEF for length is wrong."); + verified = false; + break; + } + } + if verified && assumed_coef > 1 { + return Ok(Some((p, assumed_coef, len))); + } + } + Ok(None) +} + +fn veirfy_overflow_with_offsets( + fuzzer: &mut Fuzzer, + program: &FuzzProgram, + op: &mut MutateOperator, + crash_sig: &CrashSig, + canary_info: &CanaryInfo, + bound: usize, + len: usize, + coef: usize, + is_len: &mut bool, +) -> eyre::Result { + for offset in [4, 16, 64, 256, 4096] { + log!(trace, "try to add offset: {offset}"); + let new_offset = bound + offset; + op.op = MutateOperation::IntSet { + val: new_offset.into(), + }; + let _ = fuzzer.execute_with_op(program, op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + // doule checking + // we try to set buffer to be (len + offset) + // so if the number is used for length, it won't crash, + // if it is index, it will crash again. + log_trace!("overflow at {new_offset} ({offset}), checking.."); + let addend = offset * coef; + let resize_len = len + addend; + log_trace!("resize buffer to {resize_len} by adding {addend} elements"); + let resize_p = resize_array_length(program, canary_info.stmt_index, len, resize_len)?; + let _ = fuzzer.execute_with_op(&resize_p, op, false)?; + // if is still crash + // the number may be used as index + if crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "the program crash, the number has range contraint"); + // check if it is index or not again + op.op = MutateOperation::IntSet { + val: (new_offset - 1).into(), + }; + // it should not crash + let _ = fuzzer.execute_with_op(&resize_p, op, false)?; + if crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "it should not overflow!"); + break; + } + *is_len = false; + } + // check the number again to ensure it is related to the array. + // if number is set as len + next_offset(= 2 * offset), + // but it is not overflow the array, they must be irrelevant. + let next_offset = 2 * offset; + op.op = MutateOperation::IntSet { + val: (bound + next_offset).into(), + }; + let _ = fuzzer.execute_with_op(&resize_p, op, false)?; + // since some specifc checking for length, we assume it is ok if coef is 1. + if coef > 1 && !crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "it should overflow!"); + break; + } + return Ok(true); + } + } + Ok(false) +} + +/// Check and add constraint to array if two arrays should have the same length +fn add_constraint_to_array( + program: &FuzzProgram, + f_name: &str, + arg_pos: usize, + full_f: &LocFields, + len_entry_arg_pos: usize, + len_entry_fields: &LocFields, +) -> eyre::Result> { + let Some((target_arg_pos, target_fields, existing_len_entry)) = + inspect_function_constraint_mut_with(f_name, |fc| { + for tc in fc.arg_constraints[arg_pos].list.iter_mut() { + if &tc.key != full_f { + continue; + } + if let Constraint::SetVal { + val: + IrEntry::Length { + arg_pos: Some(existing_arg_pos), + fields: existing_fields, + is_factor: _, + }, + } = &mut tc.constraint + { + if *existing_arg_pos != len_entry_arg_pos || existing_fields != len_entry_fields + { + let mut target_arg_pos = len_entry_arg_pos; + let mut target_fields = len_entry_fields.clone(); + // make the order always the same + if *existing_arg_pos > target_arg_pos { + std::mem::swap(existing_arg_pos, &mut target_arg_pos); + std::mem::swap(existing_fields, &mut target_fields); + } + return Ok(Some(( + target_arg_pos, + target_fields, + IrEntry::Length { + arg_pos: Some(*existing_arg_pos), + fields: existing_fields.clone(), + is_factor: false, + }, + ))); + } + } + } + Ok(None) + })? + else { + return Ok(None); + }; + add_function_constraint( + f_name, + target_arg_pos, + target_fields, + Constraint::ArrayLength { + len: existing_len_entry, + }, + &format!("infer arrary length at {}", program.id), + ) +} + +/// Resize program's `stmt_index` array fron `len` to `new_len`. +fn resize_array_length( + program: &FuzzProgram, + stmt_index: usize, + len: usize, + new_len: usize, +) -> eyre::Result { + log_trace!("try resize array length {stmt_index} from {len} to {new_len}"); + let mut resize_p: FuzzProgram = program.clone(); + let op = match new_len.cmp(&len) { + Ordering::Greater => MutateOperation::VecPad { + len: new_len, + zero: true, + rng_state: super::rng::gen_rng_state(), + }, + Ordering::Less => MutateOperation::VecDel { + offset: new_len - 1, + len: len - new_len, + }, + Ordering::Equal => return Ok(resize_p), + }; + let len_loc = Location::stmt(resize_p.stmts[stmt_index].index.use_index()); + let op = MutateOperator::new(len_loc, op); + resize_p.mutate_program_by_op(&op)?; + log_trace!("resized: {resize_p}"); + Ok(resize_p) +} + +/// Infer factors if find a new constraint +fn infer_factors( + fuzzer: &mut Fuzzer, + program: &FuzzProgram, + crash_sig: &CrashSig, + f_name: &str, + constraint: &Constraint, +) -> eyre::Result<()> { + let mut refined_p = program.clone(); + refined_p.refine_program()?; + let _status = fuzzer.executor.execute_program(&refined_p)?; + if !crash_sig.is_overflow_at_same_rip_or_canary() { + log!(trace, "Do not crash at {crash_sig:?}"); + log!(trace, "program: {}", refined_p.serialize_all().unwrap()); + return Ok(()); + } + // if has multiple length constraints, check if they are factors of a length + let Some((arr_arg_pos, arr_fields)) = constraint.get_length_loc() else { + return Ok(()); + }; + log!(trace, "try to infer factors.."); + let _ = crate::inspect_function_constraint_mut_with(f_name, |fc| { + let mut factors = vec![]; + for (key_arg_pos, cs) in fc.arg_constraints.iter_mut().enumerate() { + for c in &mut cs.list { + let val = match &mut c.constraint { + Constraint::SetVal { val } => val, + Constraint::Range { min: _, max } => max, + _ => continue, + }; + if let IrEntry::Length { + arg_pos, + fields, + is_factor, + } = val + { + if arg_pos == &Some(arr_arg_pos) && fields == arr_fields { + factors.push((key_arg_pos, &c.key, is_factor)); + } + } + } + } + log!(trace, "found factors: {factors:?}"); + if factors.len() > 1 { + for (key_arg_pos, key_fields, is_factor) in factors { + if !*is_factor { + *is_factor = true; + let comment = format!( + "update {f_name}'s <{key_arg_pos}> {key_fields} to be a factor of <{arr_arg_pos}> {arr_fields}", + ); + crate::log!(trace, "{comment}"); + crate::log_new_constraint(&comment); + } + } + } + Ok(()) + }); + Ok(()) +} diff --git a/hopper-core/src/fuzz/infer/mod.rs b/hopper-core/src/fuzz/infer/mod.rs new file mode 100644 index 0000000..fc95233 --- /dev/null +++ b/hopper-core/src/fuzz/infer/mod.rs @@ -0,0 +1,569 @@ +//! Constraints inference learns functions and types' constraints +//! e.g. arguments should be non-null, some integers are loop-related ... +//! The constraints will be used in subsequent fuzzing. + +mod array; +mod cast; +mod context; +mod file; +mod length; +mod non_null; +mod opaque; +mod res; +mod sigfpe; + +use eyre::ContextCompat; + +use crate::{config, execute::StatusType, fuzz::*, fuzzer::*, log, runtime::*, utils}; + +impl Fuzzer { + /// Run inferences at pilot phase, it will try to sample some simplest inputs, + /// and executes them to infer some relationship as constraints. + pub fn pilot_infer(&mut self) -> eyre::Result<()> { + log!(info, "start pilot infer..."); + let to_infer_funcs = get_ordered_func_list(); + let num_f = to_infer_funcs.len(); + let mut suc_num = 0; + let mut retry_funcs = vec![]; + for (f_i, f_name) in to_infer_funcs.into_iter().enumerate() { + if !self.is_running() { + break; + } + // disable infer contraints, used for evalution + if !config::ENABLE_REFINE { + set_function_constraint_with(f_name, |fc| fc.can_succeed = true)?; + continue; + } + log!(info, "[{f_i}/{num_f}] start pilot infer `{f_name}` .."); + crate::set_pilot_infer(true); + let (program, status) = self.generate_pilot_det_program(f_name)?; + // If the `pilot-det` program succeeds, + // we try to sample based on the input and then infer its constraints. + let can_succeed = if status.is_normal() { + self.infer_by_review_program(&program)?; + self.pilot_infer_func_constraints(&program)?; + self.verify_func_constraints(f_name)? + } else { + log!(warn, "fail to generate successful pilot-det program!"); + log!(warn, "pilot-det program: {}", program.serialize_all()?); + false + }; + set_function_constraint_with(f_name, |fc| fc.can_succeed = can_succeed)?; + // log something + if can_succeed { + log!(info, "API `{f_name}` succeed after refining."); + suc_num += 1; + crate::set_pilot_infer(false); + self.pilot_generate_func(f_name)?; + } else { + retry_funcs.push(f_name); + log!(warn, "API `{f_name}` crashed after refining."); + } + } + log!( + info, + "finish pilot infer, suc / all: {suc_num} / {num_f}..." + ); + // retry those failure functions since their required functions may inferred after it in the `to_infer_funcs` list. + let retry_num = retry_funcs.len(); + let mut retry_suc_num = 0; + for (f_i, f_name) in retry_funcs.into_iter().enumerate() { + if !self.is_running() { + break; + } + log!(info, "[{f_i}/{retry_num}] retry to infer failed `{f_name}`"); + crate::set_pilot_infer(true); + let (program, status) = self.generate_pilot_det_program(f_name)?; + if status.is_normal() { + self.infer_by_review_program(&program)?; + self.pilot_infer_func_constraints(&program)?; + let can_succeed = self.verify_func_constraints(f_name)?; + if can_succeed { + log!(info, "API `{f_name}` succeed after retrying"); + retry_suc_num += 1; + set_function_constraint_with(f_name, |fc| fc.can_succeed = can_succeed)?; + crate::set_pilot_infer(false); + self.pilot_generate_func(f_name)?; + } + } + } + if retry_num > 0 { + log!( + info, + "finish pilot infer (retry), suc / all: {retry_suc_num} / {retry_num}..." + ); + } + save_constraints_to_file()?; + crate::set_pilot_infer(false); + Ok(()) + } + + /// We first generate `pilot-det` program, + /// which has small numbers & non-null pointer in its arguments & fields. + fn generate_pilot_det_program( + &mut self, + f_name: &str, + ) -> eyre::Result<(FuzzProgram, StatusType)> { + set_pilot_det(true); + let mut program = FuzzProgram::generate_program_for_func(f_name)?; + log!(debug, "pilot-det program\n{}", program.serialize()?); + let mut status = self.executor.execute_program(&program)?; + if !status.is_normal() { + // try to make crash program to be success during pilot inference + for is in &program.stmts { + if let FuzzStmt::Load(load) = &is.stmt { + // try to set void pointer (may be there is a NON_NULL constraint) + if let Some(ps) = &load.state.pointer { + if ps.pointer_location.is_null() && utils::is_void_type(ps.pointer_type) { + log!(debug, "try to set void pointer"); + let op = MutateOperator::new( + Location::stmt(is.index.use_index()), + MutateOperation::PointerGenChar, + ); + let mut p = program.clone(); + p.mutate_program_by_op(&op)?; + log!(debug, "crafted program : {}", p.serialize_all()?); + let new_status = self.executor.execute_program(&p)?; + log!(debug, "crafted status : {:?}", new_status); + if new_status.is_normal() { + status = new_status; + program = p; + break; + } + } + } + } + } + } + set_pilot_det(false); + if !status.is_normal() { + // try to infer some constraints for the crashes + self.verify_func_constraints(f_name)?; + set_pilot_det(true); + program = FuzzProgram::generate_program_for_func(f_name)?; + log!( + debug, + "re-generate pilot-det program\n{}", + program.serialize()? + ); + status = self.executor.execute_program(&program)?; + set_pilot_det(false); + } + Ok((program, status)) + } + + /// Infer constraints by the feedback of reviewing program + fn infer_by_review_program(&mut self, program: &FuzzProgram) -> eyre::Result { + let mut p = program.clone(); + // avoid id collision in review + p.id = 1000000; + let review_status = self.executor.review_program(&p)?; + let review = crate::feedback::ReviewResult::read_from_file(&mut p)?; + review.add_into_constraints(&p)?; + Ok(review_status) + } + + /// Infer constraints for a function in pilot phase + fn pilot_infer_func_constraints(&mut self, program: &FuzzProgram) -> eyre::Result<()> { + // call is at the second to last one in program's statments + let call_stmt = program.get_target_stmt().context("has target call")?; + for is in &program.stmts { + // infer each load statements + if let FuzzStmt::Load(load) = &is.stmt { + self.pilot_infer_load_stmt_constraints(program, &is.index, &load.state, call_stmt)?; + } + } + Ok(()) + } + + /// Infer constraints in an argument in pilot phase + fn pilot_infer_load_stmt_constraints( + &mut self, + program: &FuzzProgram, + stmt_index: &StmtIndex, + load_state: &ObjectState, + call_stmt: &CallStmt, + ) -> eyre::Result<()> { + // find the relationship between the load statement and call statement + let Some((arg_pos, prefix)) = + program.find_stmt_loc_for_call(stmt_index.get(), &call_stmt.args) + else { + return Ok(()); + }; + // ---------------------------------------- + // 0. Infer void pointer: try to cast it to char* + // ---------------------------------------- + // only infer void pointer arguments + if prefix.is_empty() { + self.pilot_infer_void_type(program, stmt_index, call_stmt, arg_pos)?; + } + // ---------------------------------------- + // 1. Infer pointer : set the pointer to NULL + // ---------------------------------------- + // 1.1 remove init opaque + self.pilot_infer_need_init(program, stmt_index, load_state, call_stmt, arg_pos, &prefix)?; + // 1.2 set null pointer + self.pilot_infer_non_null(program, stmt_index, load_state, call_stmt, arg_pos, &prefix)?; + // ---------------------------------------------------- + // 2. Infer numbers + // ---------------------------------------------------- + self.pilot_infer_number_length_and_resource( + program, stmt_index, load_state, call_stmt, arg_pos, &prefix, + )?; + Ok(()) + } + + /// Verify if the function can be successful invoked with generated arguaments + /// with constraints or not + fn verify_func_constraints(&mut self, f_name: &str) -> eyre::Result { + let mut fail_program = None; + let mut fail_cnt = 0; + log!(info, "start verify function `{}`", f_name); + let mut i = 0; + let mut crash_infered_cnt = 0; + while i < config::ROUND_PILOT_NUM { + i += 1; + let program = FuzzProgram::generate_program_for_func(f_name)?; + crate::log_trace!("pilot check #program-{i}\n{program}"); + if program.stmts.len() > config::MAX_STMTS_LEN || is_incomplete_gen() { + continue; + } + let status = self.executor.execute_program(&program)?; + if !status.is_normal() { + let mut crash_program = program.clone(); + fail_program = Some(program); + if crash_infered_cnt < 8 { + crash_infered_cnt += 1; + let fail_at = self.observer.feedback.last_stmt_index(); + log!(trace, "crash fail at: {fail_at}"); + if let Some(call) = crash_program.get_call_stmt_mut(fail_at) { + call.failure = true; + let mut infered = vec![]; + if status.is_crash() { + infered = self.crash_infer(&crash_program)? + } else if status.is_timeout() { + infered = self.timeout_infer(&crash_program)? + } + if !infered.is_empty() { + log!(info, "crash infer new constraints, verify again"); + fail_program = None; + i = 0; + continue; + } + } else { + log!(warn, "fail at error index: {fail_at}"); + break; + } + } + fail_cnt += 1; + if fail_cnt > 25 { + log!(trace, "fail too many times, break"); + break; + } + } + } + log!(info, "finish verify function `{f_name}`"); + if let Some(p) = fail_program.as_ref() { + log!(warn, "verify fail program: {}", p.serialize_all()?); + } + Ok(fail_program.is_none()) + } + + /// Infer constraints for new seeds + pub fn seed_infer(&mut self, program: &FuzzProgram) -> eyre::Result> { + let mut new_constraints = vec![]; + crate::log!(trace, "infer new seed"); + if let Some(c) = self.infer_file_fd(program)? { + new_constraints.push(c); + } + Ok(new_constraints) + } + + /// Infer constraints once crash happens + pub fn crash_infer(&mut self, program: &FuzzProgram) -> eyre::Result> { + let mut new_constraints = vec![]; + crate::log!(debug, "infer crash: {}", program.serialize_all()?); + let mut p = program.clone(); + + macro_rules! crash_iter { + ( $p:ident, $max:ident, $list:ident, $f:ident, $($arg:expr),* ) => { { + let mut has_new = false; + for is in &$p.stmts { + if is.index.get() == $max { + break; + } + if let FuzzStmt::Load(load) = &is.stmt { + let stmt_index = &is.index; + // the argument leads to crash may be not in crash call + if let Some((call_i, call_stmt, arg_pos, prefix)) = + $p.find_stmt_loc_in_all_calls(stmt_index.get(), $max) + { + if let Some(new_one) = self.$f( + &p, call_i, call_stmt, stmt_index, &load.state, arg_pos, prefix, $($arg),* + )? { + $list.push(new_one); + has_new = true; + } + } + } + } + has_new + } }; + ( $p:ident, $max:ident, $list:ident, $f:ident) => (crash_iter!($p, $max, $list, $f,)) + } + + // check crash for update opeartion + if let Some(c) = self.infer_opaque_if_update_fail(program)? { + new_constraints.push(c); + return Ok(new_constraints); + } + + for _ in 0..3 { + // warp with a loop to continue infer in some cases + let mut has_new = false; + let status = self.infer_by_review_program(&p)?; + let fail_at = p + .get_fail_stmt_index() + .with_context(|| format!("Can't find fail stmt, program: {p}"))? + .get(); + // only track crash invoking + p.set_calls_track_cov(false); + p.get_call_stmt_mut(fail_at).unwrap().track_cov = true; + + // avoid assertion + // we prefer make assert to exit instead of aborting that we need to infer them + if config::ENABLE_INFER_ABORT && status.is_abort() { + // assert null + has_new |= crash_iter!(p, fail_at, new_constraints, crash_infer_null_for_abort); + // TODO: other assertion.. e.g. compare + } + + // check SIGFPE + if status.is_sigfpe() { + let _ = self.executor.execute_program(&p)?; + let cmp_list = self.observer.feedback.instrs.get_cmp_ids(); + let last_cmps = if cmp_list.len() > 10 { + &cmp_list[cmp_list.len() - 10..] + } else { + &cmp_list[..] + }; + has_new |= crash_iter!(p, fail_at, new_constraints, infer_sigfpe, last_cmps); + } + + // memory error: segv + if status.is_overflow() { + let _ = self.executor.execute_program(&p)?; + if let Some(mut crash_sig) = crate::get_crash_sig(Some(&p)) { + crash_sig.hash = self.observer.feedback.path.hash_trace(); + crate::log!(trace, "crash sig : {crash_sig:?}"); + if crash_sig.is_null_access() { + has_new |= crash_iter!( + p, + fail_at, + new_constraints, + crash_infer_null_for_overflow, + &crash_sig + ); + + } else { + // make huge number to be smaller to avoid overflowing to other canary + self.adjust_numbers_for_inference(&mut p, fail_at, &mut crash_sig)?; + + // if crash_sig.is_overflow_canary() + if let Some(c) = self.crash_infer_number_length(&p, fail_at, &crash_sig)? { + new_constraints.push(c); + has_new = true; + } + // infer opaque type // partial opaque + if !has_new { + if let Some(c) = self.infer_opaque_type(&p, fail_at, &crash_sig)? { + new_constraints.push(c); + has_new = true; + } + } + // if cannot find any number constarints, try to padding buffers to find ARRAY-LEN constraints + if !has_new { + if let Some(c) = self.infer_array_length(&p, fail_at, &crash_sig)? { + new_constraints.push(c); + has_new = true; + } + } + } + if !has_new { + // OOM: malloc return NULL or access overflow + if let Some(c) = self.crash_infer_resource_exhaustion(&p, fail_at)? { + new_constraints.push(c); + has_new = true; + } + } + if !has_new { + // check void cast + has_new = self.infer_void_cast(&p, fail_at, &crash_sig)?; + } + } + } + + if has_new { + // refine with new adding constraints + p.refine_program()?; + let status = self.executor.execute_program(&p)?; + crate::log!(trace, "updated program: {p}"); + // if it still crash we run next loop + if status.is_crash() { + crate::log!(trace, "still crash after length inference"); + continue; + } + } + + break; + } + // check contexts + if new_constraints.is_empty() { + if let Some(c) = self.infer_broken_contexts(program)? { + new_constraints.push(c); + } + } + Ok(new_constraints) + } + + /// Infer if program timeout + pub fn timeout_infer(&mut self, program: &FuzzProgram) -> eyre::Result> { + let mut new_constraints = vec![]; + crate::log!(debug, "infer timeout: {}", program.serialize_all()?); + let fail_at = program + .get_fail_stmt_index() + .with_context(|| { + format! {"Can't find fail stmt, program: {}", program.serialize().unwrap()} + })? + .get(); + if let Some(c) = self.crash_infer_resource_exhaustion(program, fail_at)? { + new_constraints.push(c); + } + Ok(new_constraints) + } + + /// Mutate program with operation, and then execute it. + pub fn execute_with_op( + &mut self, + program: &FuzzProgram, + op: &MutateOperator, + refine: bool, + ) -> eyre::Result { + let mut p = program.clone(); + log!(trace, "mutate with op : {}", op.serialize()?); + flag::set_incomplete_gen(false); + p.mutate_program_by_op(op)?; + if refine { + p.refine_program()?; + } + log!(trace, "mutated program : {}", p.serialize()?); + let status = self.executor.execute_program(&p)?; + Ok(status) + } +} + +/// Find all functions that we need to infer in a proper order +fn get_ordered_func_list() -> Vec<&'static str> { + let mut to_infer_funcs: Vec<&'static str> = vec![]; + let gadgets = global_gadgets::get_instance(); + let mut fgs: Vec<(&FnGadget, bool)> = gadgets + .functions + .values() + .filter(|fg| filter_function_constraint_with(fg.f_name, |fc| !fc.internal)) + .map(|fg| (fg, false)) + .collect(); + // crate::log!(trace, "fgs: {fgs:?}"); + // if a function can be generated with the functions in existing `to_infer` list, + // then it can be merged into the list. + for round in 0..20 { + for (fg, pick) in &mut fgs { + if *pick { + continue; + } + let mut can_gen = true; + let f_name = fg.f_name; + crate::log!(trace, "try pick {f_name}"); + for (i, arg_type) in fg.arg_types.iter().enumerate() { + let mut may_be_opaque = utils::is_opaque_type(arg_type); + if let Some(inner) = utils::get_pointer_inner(arg_type) { + if !utils::is_primitive_type(inner) { + may_be_opaque = true; + } + } + if may_be_opaque { + if round == 0 { + crate::log!(trace, "arg-{i} is opaque pointer in round-0, skip"); + can_gen = false; + break; + } + let mut has_provider = false; + let alias_type = fg.alias_arg_types[i]; + if let Some(inner) = utils::get_pointer_inner(alias_type) { + let mut_ptr = utils::mut_pointer_type(inner); + if let Some(fs) = gadgets.ret_graph.get(mut_ptr.as_str()) { + if let Some(f) = fs.iter().find(|&f| to_infer_funcs.contains(f)) { + crate::log!(trace, "{f} return arg-{i}"); + continue; + } + has_provider = true; + } + let const_ptr = utils::const_pointer_type(inner); + if let Some(fs) = gadgets.ret_graph.get(const_ptr.as_str()) { + if let Some(f) = fs.iter().find(|&f| to_infer_funcs.contains(f)) { + crate::log!(trace, "{f} return arg-{i}"); + continue; + } + has_provider = true; + } + } else if let Some(fs) = gadgets.ret_graph.get(alias_type) { + if let Some(f) = fs.iter().find(|&f| to_infer_funcs.contains(f)) { + crate::log!(trace, "{f} return arg-{i}"); + continue; + } + has_provider = true; + } + let ptr_type = utils::mut_pointer_type(alias_type); + if let Some(fs) = gadgets.arg_graph.get(ptr_type.as_str()) { + if let Some((f, _)) = fs.iter().find(|&(f, _)| to_infer_funcs.contains(f)) { + crate::log!(trace, "{f} init arg-{i}"); + continue; + } + has_provider = true; + } + let ptr_type = utils::const_pointer_type(alias_type); + if let Some(fs) = gadgets.arg_graph.get(ptr_type.as_str()) { + if let Some((f, _)) = fs.iter().find(|&(f, _)| to_infer_funcs.contains(f)) { + crate::log!(trace, "{f} init arg-{i}"); + continue; + } + has_provider = true; + } + // found nothing + if has_provider { + crate::log!(trace, "arg-{i} can't find any producer in list, skip"); + can_gen = false; + } + break; + } + } + if can_gen { + to_infer_funcs.push(f_name); + *pick = true; + } + } + } + // infer remain ones + for (fg, pick) in &mut fgs { + if !*pick && literal::is_init_function_by_name(fg.f_name) { + to_infer_funcs.push(fg.f_name); + *pick = true; + } + } + for (fg, pick) in &mut fgs { + if !*pick { + to_infer_funcs.push(fg.f_name); + } + } + + to_infer_funcs +} diff --git a/hopper-core/src/fuzz/infer/non_null.rs b/hopper-core/src/fuzz/infer/non_null.rs new file mode 100644 index 0000000..6ca175b --- /dev/null +++ b/hopper-core/src/fuzz/infer/non_null.rs @@ -0,0 +1,291 @@ +//! Infer non-null constraint. +//! +//! APIs that do not check for null pointers can crash when invoked with null pointers. +//! It's often unclear whether this is a real bug, as some developers argue it is the user's responsibility +//! to perform null checks. +//! +//! If the call triggers a segmentation fault due to accessing a null pointer ($si\_addr$ is $0$ or close to $0$, +//! where $si\_addr$ is the address of the faulting memory reference), we locate each null pointer in the arguments, +//! sets it to the address of a protected memory chunk, and runs this mutated program again. +//! If the program crashes again at the same program location (indicated by the RIP register) and triggers illegal +//! access inside the protected memory chunk, it means the pointer is accessed without a null check in the API invocation. +//! In that case, we add a NON-NULL constraint for this pointer. + +use eyre::ContextCompat; + +use crate::{fuzz::*, fuzzer::*, log, runtime::*, utils, CrashSig}; + +impl Fuzzer { + /// Try to remove init function for an opaque pointer, and check if they are necessary. + pub fn pilot_infer_need_init( + &mut self, + program: &FuzzProgram, + stmt_index: &StmtIndex, + load_state: &ObjectState, + call_stmt: &CallStmt, + arg_pos: usize, + prefix: &LocFields, + ) -> eyre::Result<()> { + if utils::is_opaque_pointer(load_state.ty) && program.has_been_inited(stmt_index).is_some() + { + let op = MutateOperator::new( + Location::stmt(stmt_index.use_index()), + MutateOperation::RemoveInitOpaque, + ); + let status = self.execute_with_op(program, &op, false)?; + if !status.is_normal() { + let mut full_f = prefix.clone(); + full_f.strip_pointer_suffix(); + add_function_constraint( + call_stmt.fg.f_name, + arg_pos, + full_f, + Constraint::NeedInit, + "add need init for opaque in pilot-infer", + )?; + } + } + Ok(()) + } + + /// Try to set each pointer to be NULL, if it crash, the pointer should not be NULL. + pub fn pilot_infer_non_null( + &mut self, + program: &FuzzProgram, + stmt_index: &StmtIndex, + load_state: &ObjectState, + call_stmt: &CallStmt, + arg_pos: usize, + prefix: &LocFields, + ) -> eyre::Result<()> { + let non_null_fields = load_state.find_fields_with(|s| s.is_non_null(), false); + let f_name = call_stmt.fg.f_name; + log!( + trace, + "start infer pointer for stmt {} at function `{f_name}`: {non_null_fields:?}", + stmt_index.get() + ); + for f in non_null_fields { + log!(trace, "try to set field {:?} as null", f); + // we try to set the pointer to be null + let op = MutateOperator::new( + Location::new(stmt_index.use_index(), f.clone()), + MutateOperation::PointerNull, + ); + let status = self.execute_with_op(program, &op, false)?; + let field_state = load_state.get_child_by_fields(f.as_slice())?; + // Do not impose the non-null constraint if the field is a pointer that points to an object + // that has the same type as the field's parent. + // This will probably incur an endless loop when refining. + if !f.is_empty() { + let struct_type_name = field_state.get_parent().context("has parent")?.ty; + let is_nested = utils::is_pointer_type(field_state.ty) + && utils::get_pointer_inner(field_state.ty).unwrap() == struct_type_name; + if is_nested { + continue; + } + } + if !status.is_normal() { + // should not be opaque + let full_f = prefix.with_suffix(f); + let c = if utils::is_opaque_pointer(field_state.ty) { + Constraint::NeedInit + } else { + Constraint::NonNull + }; + add_function_constraint(f_name, arg_pos, full_f, c, "set null in pilot-infer")?; + } + } + Ok(()) + } + + /// Infer null constraints in an argument once overflow crash happens + pub fn crash_infer_null_for_overflow( + &mut self, + program: &FuzzProgram, + call_i: usize, + call_stmt: &CallStmt, + stmt_index: &StmtIndex, + load_state: &ObjectState, + arg_pos: usize, + prefix: LocFields, + crash_sig: &CrashSig, + ) -> eyre::Result> { + crate::log!( + trace, + "crash infer non-null at load stmt: {} for call {call_i}", + stmt_index.get() + ); + // Infer pointer: set NULL pointer to NON-NULL + let null_fields = load_state.find_fields_with(|s| s.is_null(), false); + if null_fields.is_empty() { + return Ok(None); + } + crate::log!( + trace, + "start inferring non-null and need-init constraint. load index: {}, null_fields: {:?}", + stmt_index.get(), + null_fields + ); + for f in null_fields { + let field_state = load_state.get_child_by_fields(f.as_slice())?; + let is_opaque = utils::is_opaque_pointer(field_state.ty); + let loc = Location::new(stmt_index.use_index(), f.clone()); + // We set the pointer to point to a address of a canary + let op = MutateOperator::new(loc, MutateOperation::PointerCanary); + let _status = self.execute_with_op(program, &op, false)?; + let cur_rip = self.observer.feedback.instrs.rip_addr; + let cur_segv = self.observer.feedback.instrs.segv_addr; + crate::log!(trace, "field: {f:?}, segv: {cur_segv:X}, rip: {cur_rip:X}"); + // if the program crash at the same place by de-refercing the address in the canary! + if crate::is_overflow_canary() { + let cur_hash = self.observer.feedback.path.hash_trace(); + crate::log!(trace, "hash: {cur_hash}, rip: {cur_rip}"); + if crash_sig.is_null_function_pointer() { + // function pointer is directly called instead of de-referencing. + // if rip is 0, it is a null function poiner, we should compare hash. + if cur_hash != crash_sig.hash { + crate::log!( + trace, + "skip since hash ({}, {}) is not same", + cur_hash, + crash_sig.hash + ); + continue; + } + } else if cur_rip != crash_sig.rip { + crate::log!( + trace, + "skip since ({}, {}) rip is not same", + cur_rip, + crash_sig.rip + ); + // Though RIP indicates the program is not crash at the same place, + // some API codes check the null pointer inconsistently, + // they may check null at A and then do not check it at B, + // so the crash rip changes. + // Thus, we try to set it to be non-null again + let loc = Location::new(stmt_index.use_index(), f.clone()); + let op = if is_opaque { + MutateOperation::InitOpaque { call_i } + } else { + MutateOperation::PointerGen { + rng_state: rng::gen_rng_state(), + } + }; + let op = MutateOperator::new(loc, op); + let status = self.execute_with_op(program, &op, true)?; + if flag::is_incomplete_gen() { + log!(trace, "incomplete mutation"); + continue; + } + // if still access null + if status.is_overflow() && crate::is_access_null() { + crate::log!(trace, "still access null after refining"); + continue; + } + // exit early + if self.observer.feedback.track_nothing() { + log!(trace, "exit before invoking"); + continue; + } + } + let constraint = if is_opaque { + Constraint::NeedInit + } else { + Constraint::NonNull + }; + log!(trace, "prefix: {prefix:?} , f: {f:?}"); + let full_f = prefix.with_suffix(f); + add_function_constraint( + call_stmt.fg.f_name, + arg_pos, + full_f.clone(), + constraint.clone(), + &format!("infer non-null in crash {}", program.id), + )?; + let constraint_sig = ConstraintSig { + f_name: call_stmt.fg.f_name.to_string(), + arg_pos, + fields: full_f, + constraint, + }; + return Ok(Some(constraint_sig)); + } + } + Ok(None) + } + + /// Infer constraints in an argument once abort happens + pub fn crash_infer_null_for_abort( + &mut self, + program: &FuzzProgram, + call_i: usize, + call_stmt: &CallStmt, + stmt_index: &StmtIndex, + load_state: &ObjectState, + arg_pos: usize, + prefix: LocFields, + ) -> eyre::Result> { + crate::log!(trace, "infer null for abort!"); + let null_fields = load_state.find_fields_with(|s| s.is_null(), false); + // the pointer-to-canary trick is not works for these cases, + // since they will pass the assert and crash at other place. + // so we try to filp the pointers to be NON-NULL. + for f in null_fields { + let field_state = load_state.get_child_by_fields(f.as_slice())?; + let is_opaque = utils::is_opaque_pointer(field_state.ty); + let loc = Location::new(stmt_index.use_index(), f.clone()); + let op = if is_opaque { + MutateOperation::InitOpaque { call_i } + } else { + MutateOperation::PointerGen { + rng_state: rng::gen_rng_state(), + } + }; + let op = MutateOperator::new(loc.clone(), op); + let status = self.execute_with_op(program, &op, true)?; + if flag::is_incomplete_gen() { + log!(trace, "incomplete mutation"); + continue; + } + if status.is_ignore() { + log!( + warn, + "generate wrong program during infer abort for:\n {}", + program.serialize_all()? + ); + continue; + } + // assert to exit early. + if self.observer.feedback.track_nothing() { + log!(trace, "exit before invoking"); + continue; + } + + if !status.is_abort() { + let full_f = prefix.with_suffix(f); + let constraint = if is_opaque { + Constraint::NeedInit + } else { + Constraint::NonNull + }; + add_function_constraint( + call_stmt.fg.f_name, + arg_pos, + full_f.clone(), + constraint.clone(), + "infer non-null in abort", + )?; + let constraint_sig = Some(ConstraintSig { + f_name: call_stmt.fg.f_name.to_string(), + arg_pos, + fields: full_f, + constraint, + }); + return Ok(constraint_sig); + } + } + Ok(None) + } +} diff --git a/hopper-core/src/fuzz/infer/opaque.rs b/hopper-core/src/fuzz/infer/opaque.rs new file mode 100644 index 0000000..bdfcf5b --- /dev/null +++ b/hopper-core/src/fuzz/infer/opaque.rs @@ -0,0 +1,212 @@ +//! Infer the type is opaque type or not + +use eyre::ContextCompat; + +use crate::{fuzz::*, fuzzer::*, log, runtime::*, utils, CrashSig}; + +impl Fuzzer { + /// Infer if a void type can be casted as a char type or not. + pub fn pilot_infer_void_type( + &mut self, + program: &FuzzProgram, + stmt_index: &StmtIndex, + call_stmt: &CallStmt, + arg_pos: usize, + ) -> eyre::Result<()> { + let arg_type = call_stmt.fg.arg_types[arg_pos]; + let alias_arg_type = call_stmt.fg.alias_arg_types[arg_pos]; + if utils::is_void_pointer(arg_type) + && (alias_arg_type.contains("void") || alias_arg_type.contains("Void")) + { + log!(trace, "arg type: {arg_type} {alias_arg_type}"); + let op = MutateOperator::new( + Location::stmt(stmt_index.use_index()), + MutateOperation::PointerGenChar, + ); + let mut suc = true; + log!(trace, "try to infer void type"); + // verify with some random data, + // and verify it in other execute paths later. + for _ in 0..100 { + let status = self.execute_with_op(program, &op, false)?; + if !status.is_normal() { + log!(trace, "fail to infer void type, crash!"); + suc = false; + break; + } + } + if suc { + add_function_arg_constraint( + call_stmt.fg.f_name, + arg_pos, + Constraint::CastFrom { + cast_type: utils::mut_pointer_type("i8"), + }, + "try to assgin cast", + )?; + } + } + Ok(()) + } + + /// If we update a call's return fails, we assume the return's type is opaque that can not be mutated. + pub fn infer_opaque_if_update_fail( + &mut self, + program: &FuzzProgram, + ) -> eyre::Result> { + let Some(op) = program.ops.first() else { + return Ok(None); + }; + if matches!(op.op, MutateOperation::CallUpdate { fields: _, ops: _ }) { + let call_i = op.key.get_index()?.get(); + crate::log!(trace, "find update operation for {call_i}"); + if let Some(call) = program.get_call_stmt(call_i) { + let f_name = call.fg.f_name; + if crate::inspect_function_constraint_with(f_name, |fc| { + Ok(fc.ret.is_partial_opaque) + })? { + crate::log!(trace, "function {f_name} has partial opaque return"); + if let Some(alias_ret_ty) = call.fg.alias_ret_type { + let ret_ty: &str = call.fg.ret_type.unwrap(); + if let Some(c) = self.set_opaque_type(ret_ty, alias_ret_ty)? { + return Ok(Some(c)); + } + } + } + } + } + Ok(None) + } + + pub fn infer_opaque_type( + &mut self, + program: &FuzzProgram, + fail_at: usize, + crash_sig: &CrashSig, + ) -> eyre::Result> { + let Some(canary_info) = crash_sig.get_canary_info() else { + return Ok(None); + }; + // skip primitive types/pointers + if let FuzzStmt::Load(load) = &program.stmts[canary_info.stmt_index].stmt { + if let Some(inner_ty) = utils::get_vec_inner(load.state.ty) { + if utils::is_primitive_type(inner_ty) { + return Ok(None); + } + } + } + + if let Some((call_i, call_stmt, arg_pos, fields)) = + program.find_stmt_loc_in_all_calls(canary_info.stmt_index, fail_at) + { + let mut ptr_fields = fields; + let last_one = ptr_fields.list.pop(); + if Some(FieldKey::Pointer) != last_one { + return Ok(None); + } + crate::log!( + trace, + "try to infer if it is a opaque type, index: {}", + canary_info.stmt_index + ); + let loc = ptr_fields + .to_loc_for_refining(program, &call_stmt.args[arg_pos], &LocFields::default()) + .context("can't find loc for infer opaque type")?; + let op = MutateOperator::new(loc, MutateOperation::InitOpaqueForInfer { call_i }); + // verify + for _ in 0..16 { + let mut p = program.clone(); + p.mutate_program_by_op(&op)?; + crate::log!(trace, "mutate ops: {:?}", p.ops); + p.refine_program()?; + let Some(op) = p.ops.first() else { + continue; + }; + let mut is_opaque_return = false; + let (ptr_ty, ptr_alias_ty) = match &op.op { + MutateOperation::PointerRet { + f_name, + rng_state: _, + } => { + crate::inspect_function_constraint_with(f_name, |fc| { + if fc.ret.is_partial_opaque { + crate::log!(trace, "{f_name} is partial opaque return"); + is_opaque_return = true; + } + Ok(()) + })?; + let fg = global_gadgets::get_instance().get_func_gadget(f_name)?; + + (fg.ret_type.unwrap(), fg.alias_ret_type.unwrap()) + } + MutateOperation::CallRelatedInsert { + f_name, + arg_pos, + rng_state: _, + } => { + let fg = global_gadgets::get_instance().get_func_gadget(f_name)?; + (fg.arg_types[*arg_pos], fg.alias_arg_types[*arg_pos]) + } + _ => return Ok(None), + }; + // skip primitive pointer + if let Some(inner_ty) = utils::get_pointer_inner(ptr_ty) { + if utils::is_primitive_type(inner_ty) { + return Ok(None); + } + } + crate::log!(trace, "program: {}", p.serialize()?); + let status = self.executor.execute_program(&p)?; + let last_stmt = self.observer.feedback.last_stmt_index(); + crate::log!(trace, "last_stmt: {last_stmt}"); + // if !crash_sig.is_overflow_at_same_rip() { + if status.is_normal() && (is_opaque_return || last_stmt >= p.stmts.len()) { + let ret = self.set_opaque_type(ptr_ty, ptr_alias_ty)?; + return Ok(ret); + } + } + } + Ok(None) + } + + /// Set `ptr_ty` as opaque type + pub fn set_opaque_type( + &mut self, + ptr_ty: &str, + ptr_alias_ty: &str, + ) -> eyre::Result> { + let opaque_ty = if let Some(inner_ty) = utils::get_pointer_inner(ptr_ty) { + if utils::is_primitive_type(inner_ty) { + if let Some(alias_inner) = utils::get_pointer_inner(ptr_alias_ty) { + if utils::is_primitive_type(alias_inner) { + return Ok(None); + } + alias_inner + } else { + ptr_alias_ty + } + } else { + inner_ty + } + } else { + return Ok(None); + }; + crate::log!(trace, "opaque_ty is : {opaque_ty}"); + if utils::is_opaque_type(opaque_ty) { + crate::log!(trace, "skip, {opaque_ty} is opaque!"); + return Ok(None); + } + log!(info, "set type {opaque_ty} as opaque"); + crate::log_new_constraint(&format!("set type {opaque_ty} as opaque")); + global_gadgets::get_mut_instance().add_opaque_type(opaque_ty); + self.executor + .set_config(crate::OPAQUE_CONFIG_KEY, opaque_ty)?; + let sig = ConstraintSig { + f_name: opaque_ty.to_string(), + arg_pos: 0, + fields: LocFields::default(), + constraint: Constraint::OpaqueType, + }; + Ok(Some(sig)) + } +} diff --git a/hopper-core/src/fuzz/infer/res.rs b/hopper-core/src/fuzz/infer/res.rs new file mode 100644 index 0000000..4994318 --- /dev/null +++ b/hopper-core/src/fuzz/infer/res.rs @@ -0,0 +1,276 @@ +//! Infer resource related constraints +//! +//! APIs that access or allocate limited resources based on argument numbers may encounter resource exhaustion +//! if the number is out of range. +//! +//! If the inputs lead to a timeout or out of memory, we search for large numerical values in the arguments and +//! mutates them. If the execution becomes significantly faster or exits normally after setting the value to be small, +//! we add a RANGE constraint for the argument to limit its maximum value. + +use crate::{fuzz::*, fuzzer::*, log, runtime::*, utils}; + +impl Fuzzer { + /// Check crash/hang for resource exhaustion + pub fn crash_infer_resource_exhaustion( + &mut self, + program: &FuzzProgram, + fail_at: usize, + ) -> eyre::Result> { + crate::log!( + trace, + "start infer resource exhastion due to crash/hang, fail_at: {fail_at}" + ); + for is in &program.stmts { + let stmt_index = &is.index; + if stmt_index.get() == fail_at { + break; + } + let FuzzStmt::Load(load) = &is.stmt else { + continue; + }; + let Some((_call_i, call_stmt, arg_pos, prefix)) = + program.find_stmt_loc_in_all_calls(stmt_index.get(), fail_at) + else { + continue; + }; + let num_fields = load + .state + .find_fields_with(|s| utils::is_index_or_length_number(s.ty), true); + for f in num_fields { + let loc: Location = Location::new(stmt_index.use_index(), f.clone()); + let val = program.find_number_by_loc(loc.clone())? as i32; + // underflow for zero + if val == 0 { + if let Some(c) = + self.infer_underflow(program, call_stmt, stmt_index, arg_pos, &prefix, &f)? + { + return Ok(Some(c)); + } + } + // skip if it is not a huge number and it is not a fd + if val > 8192 || !self.observer.contain_fd(val).0 { + let op = MutateOperator::new(loc, MutateOperation::IntSet { val: 16.into() }); + let status = self.execute_with_op(program, &op, false)?; + if !status.is_normal() { + crate::log!(trace, "still crash/hang after setting small value"); + continue; + } + } + if let Some(c) = self.infer_resource_exhaustion( + program, call_stmt, stmt_index, arg_pos, &prefix, &f, + )? { + return Ok(Some(c)); + } + } + } + Ok(None) + } + + /// Infer numbers that may cause underflow, e.g zero + fn infer_underflow( + &mut self, + program: &FuzzProgram, + call_stmt: &CallStmt, + stmt_index: &StmtIndex, + arg_pos: usize, + prefix: &LocFields, + fields: &LocFields, + ) -> eyre::Result> { + log!( + trace, + "try to infer arg: {arg_pos}, field {fields:?} to be underflow" + ); + let _ = self.executor.execute_program(program)?; + let edges = self.observer.feedback.path.get_list(); + let mut huge_loops = vec![]; + for (br, cnt) in edges { + #[cfg(feature = "fat_bucket")] + let is_full = if cfg!(feature = "fat_bucket") { + cnt == 32768 + } else { + cnt == 128 + }; + if is_full { + huge_loops.push(br); + } + } + if huge_loops.is_empty() { + crate::log!(trace, "can't find any huge loop"); + return Ok(None); + } + // try to avoid underflow by setting them to other value + for val in [64, 256, 512] { + crate::log!(trace, "try to set to be val : {val}"); + let op = MutateOperator::new( + Location::new(stmt_index.use_index(), fields.clone()), + MutateOperation::IntSet { val: val.into() }, + ); + let status = self.execute_with_op(program, &op, false)?; + if status.is_normal() { + for br in &huge_loops { + let cnt = self.observer.feedback.path.buf[*br]; + if cnt == 0 { + // can't find the loop + return Ok(None); + } + } + } + } + let full_f = prefix.with_suffix(fields.clone()); + add_function_constraint( + call_stmt.fg.f_name, + arg_pos, + full_f, + Constraint::NonZero, + &format!("may be an underflow at crash {} #bug", program.id), + ) + } + + /// Infer numbers that may exhaust recource, e.g. timeout, OOM + pub fn infer_resource_exhaustion( + &mut self, + program: &FuzzProgram, + call_stmt: &CallStmt, + stmt_index: &StmtIndex, + arg_pos: usize, + prefix: &LocFields, + fields: &LocFields, + ) -> eyre::Result> { + log!( + trace, + "try to infer arg: {arg_pos}, field {fields:?} to be resource-related" + ); + let f_name = call_stmt.fg.f_name; + let mut diff_cnt = 0; + let mut last_num_cmp = 0; + let mut last_allocated_resources = (0, 0); + let mut fail_with_huge_number = false; + let mut path = vec![]; + let start_at = std::time::Instant::now(); + let mut fd_cnt = 0; + let mut fd_read = false; + // Try different numbers, + // and compare how many compares visited in the function. + // If the #compares and #resource are increasing at most time, + // It must be loop related. + for val in [1, 8, 25, 64, 256] { + let op = MutateOperator::new( + Location::new(stmt_index.use_index(), fields.clone()), + MutateOperation::IntSet { val: val.into() }, + ); + let status = self.execute_with_op(program, &op, false)?; + if status.is_normal() { + let num_cmp = self.observer.feedback.instrs.cmp_len(); + let allocated_resources = self.observer.feedback.instrs.count_allocated_resources(); + // check fd resource + let (is_fd, is_fd_read) = self.observer.contain_fd(val); + if is_fd { + fd_cnt += 1; + fd_read = is_fd_read; + } + crate::log!( + trace, + "cmp: {num_cmp}, mem: {allocated_resources:?}, fd: {is_fd:?}" + ); + if num_cmp > last_num_cmp || allocated_resources > last_allocated_resources { + diff_cnt += 1; + } + last_num_cmp = num_cmp; + last_allocated_resources = allocated_resources; + // for the last one + if val == 512 { + path = self.observer.feedback.path.get_list(); + } + } else { + // length or array-len? + return Ok(None); + } + } + crate::log!(trace, "diff_cnt: {diff_cnt}, is_fd_cnt: {fd_cnt}"); + let resource_related = diff_cnt > 4; + let is_fd = fd_cnt > 4; + if is_fd { + let full_f = prefix.with_suffix(fields.clone()); + return add_function_constraint( + f_name, + arg_pos, + full_f, + Constraint::File { + read: fd_read, + is_fd: true, + }, + &format!("infer file fd by diff from program {}", program.id), + ); + } + + if !resource_related { + // focus on somthing we can not catch + // we measure the time and if it crash with huge number + let loop_secs = start_at.elapsed().as_micros(); + let start_at = std::time::Instant::now(); + // try to set it as the maximal number + // the program will crash or hang if the number is resource-related or loop-related. + let op = MutateOperator::new( + Location::new(stmt_index.use_index(), fields.clone()), + MutateOperation::IntSet { + val: IrEntry::Max(0), + }, + ); + let status = self.execute_with_op(program, &op, false)?; + // we simply use crash and timeout to indicate OOM/Timtout here. + // Since OOM may cause timeout or segment fault(malloc return NULL), killed .. etc. + // however, it is hard to say that the error is due to the number, + // e.g if (x > N) { for (int i = 0; i < y; i++) {} } + // the root cause is `y`'s value, but it will OOM/timeout if we set x to maximal if y is also huge. + // so we check if the crash path is included by the path before mutating the number. + if self.observer.feedback.path.is_inclued_by(&path) { + if status.is_timeout() || status.is_crash() { + log!( + warn, + "loc <{}>{} is fail with huge number", + stmt_index.get(), + fields.serialize()? + ); + fail_with_huge_number = true; + } + let huge_secs = start_at.elapsed().as_micros(); + // if it is too slow with huge number + if huge_secs > loop_secs * 5 { + fail_with_huge_number = true; + } + } + } + + if resource_related || fail_with_huge_number { + let full_f = prefix.with_suffix(fields.clone()); + let comment = if resource_related { + format!("infer resource related number from program {}", program.id) + } else { + format!("the number should be samll at program {}", program.id) + }; + // if it has constraints but crash again! we try to make the range samller + let mut c = Constraint::resource_related(); + let _ = crate::inspect_function_constraint_with(f_name, |fc| { + if let Some(tc) = fc.arg_constraints[arg_pos] + .list + .iter() + .find(|tc| tc.key == full_f) { + if matches!(&tc.constraint, Constraint::Range { min: _, max: _ }) { + c = tc.constraint.clone(); + c.shrink_range(); + } + } + Ok(()) + }); + + return add_function_constraint( + f_name, + arg_pos, + full_f, + c, + &comment, + ); + } + Ok(None) + } +} diff --git a/hopper-core/src/fuzz/infer/sigfpe.rs b/hopper-core/src/fuzz/infer/sigfpe.rs new file mode 100644 index 0000000..1c8d15e --- /dev/null +++ b/hopper-core/src/fuzz/infer/sigfpe.rs @@ -0,0 +1,51 @@ +//! Infer non zero constraint for SIGFPE + +use crate::{fuzz::*, fuzzer::*, runtime::*, utils}; + +impl Fuzzer { + /// Infer SIGFPE, find division by zero errors. + pub fn infer_sigfpe( + &mut self, + program: &FuzzProgram, + _call_i: usize, + call_stmt: &CallStmt, + stmt_index: &StmtIndex, + load_state: &ObjectState, + arg_pos: usize, + prefix: LocFields, + last_cmps: &[u32], + ) -> eyre::Result> { + crate::log!( + trace, + "infer division by zero error for sigfpe, stmt: {}!", + stmt_index.get() + ); + // hash? + let num_fields = + load_state.find_fields_with(|s| utils::is_index_or_length_number(s.ty), true); + for f in num_fields { + // if it is zero + let num_loc = Location::new(stmt_index.use_index(), f.clone()); + let num = program.find_number_by_loc(num_loc.clone())?; + if num == 0 { + crate::log!(trace, "find zero number: {num_loc:?}"); + let op = MutateOperator::new(num_loc, MutateOperation::IntSet { val: 1.into() }); + let status = self.execute_with_op(program, &op, false)?; + if !status.is_sigfpe() + && self.observer.feedback.instrs.contain_cmp_chunks(last_cmps) + { + let full_f = prefix.with_suffix(f.clone()); + let sig = add_function_constraint( + call_stmt.fg.f_name, + arg_pos, + full_f, + Constraint::NonZero, + &format!("infer non_zero for sigfpe in crash {} #bug", program.id), + ); + return sig; + } + } + } + Ok(None) + } +} diff --git a/hopper-core/src/fuzz/minimize.rs b/hopper-core/src/fuzz/minimize.rs new file mode 100644 index 0000000..89bdd88 --- /dev/null +++ b/hopper-core/src/fuzz/minimize.rs @@ -0,0 +1,187 @@ +use eyre::{Context, ContextCompat}; + +use crate::{fuzzer::Fuzzer, runtime::*, utils, StatusType}; + +impl Fuzzer { + /// Minimize the program before save + /// + // TODO: for fast executions, we can find all prev programs and view them as one, and try to minimize it. + pub fn minimize( + &mut self, + program: &mut FuzzProgram, + ori_status: &StatusType, + ) -> eyre::Result { + // ignore det mutation + if let Some(op) = program.ops.first() { + if op.det { + return Ok(false); + } + } + let start_at = std::time::Instant::now(); + let original_len = program.stmts.len(); + let hash = self.observer.feedback.path.hash_trace(); + let prev_cnt = self.count; + crate::log!(trace, "try minimize input, hash: {hash}"); + // try to minimize operators + let mut changed = self + .minimize_ops(program, hash, ori_status) + .context(format!("program: {}", program.serialize_all().unwrap()))?; + let mut visited = vec![]; + loop { + let mut p = program.clone(); + if !p.try_minimize(&mut visited)? { + break; + } + let status = self.executor.execute_program(&p)?; + self.count += 1; + if &status == ori_status { + let cur_hash = self.observer.feedback.path.hash_trace(); + crate::log!(trace, "cur_hash: {cur_hash}"); + // ok, we adapt this minimization + if cur_hash == hash { + crate::log!(trace, "hash is the same, keep mutation"); + changed = true; + program.ops = program.ops.clone_with_program(&mut p); + program.stmts = p.stmts; + } + } + // rollback and skip it then + } + if changed { + // refine again + program.refine_program()?; + program.ops.retain(|op| !op.key.is_released()); + crate::log!( + trace, + "minimize program, #stmts {original_len} -> {}, hash: {hash}!", + program.stmts.len() + ); + } + let cnt = self.count - prev_cnt; + let secs = start_at.elapsed().as_secs_f32(); + crate::log!(trace, "try {cnt} minimize uses {secs} seconds"); + Ok(changed) + } + + fn minimize_ops( + &mut self, + program: &mut FuzzProgram, + hash: u64, + ori_status: &StatusType, + ) -> eyre::Result { + if program.ops.len() <= 1 { + return Ok(false); + } + let mut changed = false; + crate::log!(trace, "try minimize mutate ops"); + let parent_id = program + .parent + .context("can't find parent for mutated program")?; + let parent = if let Some(p) = self.depot.get_program_by_id(parent_id) { + p.clone() + } else { + crate::read_input_in_queue(parent_id)? + }; + let mut ops = program.ops.clone(); + for op_i in (0..ops.len()).rev() { + crate::log!(trace, "try remove op: {op_i}"); + let op = ops.remove(op_i); + let mut p = parent.clone(); + p.mutate_program_by_ops(&ops)?; + p.refine_program()?; + let status = self.executor.execute_program(&p)?; + self.count += 1; + crate::log!(trace, "status: {ori_status:?} vs. {status:?}"); + if &status == ori_status { + let cur_hash = self.observer.feedback.path.hash_trace(); + crate::log!(trace, "hash: {cur_hash} vs. {hash}"); + if cur_hash == hash { + changed = true; + program.stmts = p.stmts; + program.ops = p.ops; + continue; + } + } + // insert back + ops.insert(op_i, op); + } + Ok(changed) + } +} + +impl FuzzProgram { + // Try minimize Program + pub fn try_minimize(&mut self, visited: &mut Vec) -> eyre::Result { + let len = self.stmts.len(); + for i in (0..len).rev() { + let is = &mut self.stmts[i]; + let uniq = is.index.get_uniq(); + if visited.contains(&uniq) { + continue; + } + match &mut is.stmt { + FuzzStmt::Load(load) => { + // set vec to smaller: 1 + let len = load.value.get_length(); + if len > 1 && utils::is_vec_type(load.value.type_name()) { + // craft an uniq number for this + let sub_uniq = uniq * 2 + len as u64; + if visited.contains(&sub_uniq) { + continue; + } + visited.push(sub_uniq); + crate::log!(trace, "try delete length {} to {}", len, len / 2); + load.value.mutate_by_op( + &mut load.state, + &[], + &crate::MutateOperation::VecDel { + offset: 1, + len: len / 2, + }, + )?; + // we should refine length/range constraints + self.refine_program()?; + return Ok(true); + } + // try to set null + let non_null_fields = load.state.find_fields_with(|s| s.is_non_null(), false); + for f in non_null_fields { + // craft a uniq hash + let sub_uniq = uniq + crate::hash_buf(f.serialize()?.as_bytes()); + if visited.contains(&sub_uniq) { + continue; + } + visited.push(sub_uniq); + let sub_state = load.state.get_child_mut_by_fields(f.as_slice())?; + if let Some(ps) = sub_state.pointer.as_mut() { + crate::log!(trace, "try set pointer to null: {f:?}"); + ps.pointer_location = Location::null(); + self.check_ref_use()?; + return Ok(true); + } + } + } + FuzzStmt::Call(call) => { + // TODO: if the call is marked as track, remove it must affect the path + // if call.track_cov { + // visited.push(uniq); + // return Ok(true); + // } + if !crate::config::ENABLE_INTER_API_LEARN + || !(call.is_implicit() || call.is_relative()) + { + continue; + } + crate::log!(trace, "try remove context: {i}"); + self.delete_stmt(i); + self.check_ref_use()?; + visited.push(uniq); + return Ok(true); + } + _ => {} + } + visited.push(uniq); + } + Ok(false) + } +} diff --git a/hopper-core/src/fuzz/mod.rs b/hopper-core/src/fuzz/mod.rs new file mode 100644 index 0000000..335100d --- /dev/null +++ b/hopper-core/src/fuzz/mod.rs @@ -0,0 +1,68 @@ +//! Mutating module +//! Code for how to generate or mutating a program + +pub mod constraints; +mod det; +pub mod effective; +mod flag; +mod generate; +mod infer; +mod minimize; +mod mutate; +mod object; +mod operator; +mod pcg; +pub mod refine; +mod rng; +pub mod stmt; +mod weight; +mod check; +mod find; + +pub use constraints::*; +pub use det::*; +pub use flag::*; +pub use generate::*; +pub use mutate::*; +pub use object::*; +pub use operator::*; +pub use rng::*; +pub use weight::*; + +pub trait EnumKind { + fn kind(&self) -> &'static str; +} + +#[test] +fn test_generate_and_mutate() { + use crate::FuzzProgram; + use crate::Serialize; + + fn gen_and_mutate(target: &str) { + for _ in 0..100 { + let (mut p1, mut p2) = { + let mut program = FuzzProgram::generate_program_for_func(target).unwrap(); + // make it like a seed + program.parent = Some(0); + println!("**p0**\n{}", program.serialize().unwrap()); + let p1 = program.clone(); + let p2 = program.clone(); + (p1, p2) + }; + p1.mutate_program().unwrap(); + println!("**p1**\n{}", p1.serialize().unwrap()); + println!("**ops**: {}", p1.ops.serialize().unwrap()); + + p2.mutate_program_by_ops(&p1.ops).unwrap(); + p2.refine_program().unwrap(); + println!("**p2**\n{}", p2.serialize().unwrap()); + + assert_eq!(p1.serialize().unwrap(), p2.serialize().unwrap()) + } + } + + gen_and_mutate("func_add"); + gen_and_mutate("func_create"); + gen_and_mutate("func_use"); + gen_and_mutate("func_struct"); +} diff --git a/hopper-core/src/fuzz/mutate.rs b/hopper-core/src/fuzz/mutate.rs new file mode 100644 index 0000000..e011d67 --- /dev/null +++ b/hopper-core/src/fuzz/mutate.rs @@ -0,0 +1,206 @@ +use eyre::Context; + +use crate::{fuzz::*, runtime::*, MutateOperator}; + +impl FuzzProgram { + /// Mutate the program + pub fn mutate_program(&mut self) -> eyre::Result<()> { + self.save_mutate_state(); + // deterministic mutation + let _ = self.deterministic_mutate()? || + // random mutation + self.random_mutate()?; + // refine by constraints + self.refine_program()?; + Ok(()) + } + + /// Mutate the program's input + pub fn mutate_program_inputs(&mut self) -> eyre::Result<()> { + self.save_mutate_state(); + // only mutate inputs + self.mutate_inputs()?; + // refine by constraints + self.refine_program()?; + Ok(()) + } + + /// Mutate the program by specific operator + pub fn mutate_program_by_op( + &mut self, + op: &MutateOperator, + ) -> eyre::Result<()> { + if op.key.is_released() { + return Ok(()); + } + let is = self.get_mut_stmt_by_index_uniq(op.key.get_index()?); + if is.is_none() { + return Ok(()); + } + let is = is.unwrap(); + let mut stmt = is.stmt.lend(); + let ret = stmt.mutate_by_op(self, op.key.fields.as_slice(), &op.op); + if let Err(err) = ret { + crate::log!(trace, "p: {}", self.serialize().unwrap()); + crate::log!(trace, "stmt: {}", stmt.serialize().unwrap()); + crate::log!(trace, "fail to mutate by op, op: {:?}, err: {:?}", op, err); + } + { + let index = self.withdraw_stmt(stmt)?; + let mut op = op.clone(); + op.key.set_index(index); + self.ops.push(op); + } + self.check_ref_use()?; + Ok(()) + } + + /// Mutate the program by some specific operators + pub fn mutate_program_by_ops( + &mut self, + ops: &[MutateOperator], + ) -> eyre::Result<()> { + for op in ops { + self.mutate_program_by_op(op)?; + } + Ok(()) + } + + /// Post handle after single mutating operation + fn post_handle(&mut self, op: MutateOperator) -> eyre::Result<()> { + crate::log!(trace, "op: {}", op); + if !op.is_nop() { + self.ops.push(op); + } + self.check_ref_use()?; + self.ops.retain(|op| !op.key.is_released()); + Ok(()) + } + + /// Deterministic mutate + fn deterministic_mutate(&mut self) -> eyre::Result { + let mut nop_cnt = 0; + while let Some(is) = self + .stmts + .iter_mut() + .find(|is| is.stmt.is_deterministic()) + { + let cur_index = is.index.get(); + crate::log!(trace, "det stage on program: {}, index: {}", self.id, cur_index); + let mut stmt = is.stmt.lend(); + let mut op = stmt.det_mutate(self) + .with_context(|| format!("stub: {}", stmt.serialize().unwrap()))?; + let index = self.withdraw_stmt(stmt)?; + op.set_index(index); + op.det = true; + if op.is_nop() { + // avoid stuck in det mutation + nop_cnt += 1; + if nop_cnt > 25 { + crate::log!(warn, "stuck in deterministic mutate, program: {:?}, stmt: {cur_index}", self.id); + break; + } + } else { + self.post_handle(op)?; + return Ok(true); + } + } + Ok(false) + } + + /// Random mutate + fn random_mutate(&mut self) -> eyre::Result { + let weight_sum = super::weight::get_weight_sum(&self.stmts); + let ratio = 5; + let max = 3.max(64.min(weight_sum / ratio)); + let use_stacking = rng::gen_range(1..max); + for _ in 0..use_stacking { + if let Some(i) = weight::choose_weighted(&self.stmts) { + let is = &mut self.stmts[i]; + let cur_index = is.index.get(); + crate::log!(trace, "random stage on program: {}, index: {}", self.id, cur_index); + let mut stmt = is.stmt.lend(); + let mut op = stmt.mutate(self) + .with_context(|| format!("stub: {}", stmt.serialize().unwrap()))?; + let is_incompatible = stmt.is_incompatible(&op); + let index = self.withdraw_stmt(stmt)?; + op.set_index(index); + self.post_handle(op)?; + if is_incompatible && !self.ops.is_empty() { + break; + } + } + } + Ok(true) + } + + /// Only mutate inputs + fn mutate_inputs(&mut self) -> eyre::Result { + crate::set_input_only(true); + let weight_sum = super::weight::get_weight_sum(&self.stmts); + let ratio = 5; + let max = 3.max(64.min(weight_sum / ratio)); + let use_stacking = rng::gen_range(1..max); + for _ in 0..use_stacking { + if let Some(i) = weight::choose_weighted(&self.stmts) { + let is = &mut self.stmts[i]; + let cur_index = is.index.get(); + let mut stmt = is.stmt.lend(); + let mut op_holder = None; + if matches!(stmt, FuzzStmt::Load(_)) { + crate::log!( + trace, + "random input stage on program: {}, index: {}", + self.id, + cur_index + ); + op_holder = Some( + stmt.mutate(self) + .with_context(|| format!("stub: {}", stmt.serialize().unwrap()))?, + ); + } + let index = self.withdraw_stmt(stmt)?; + if let Some(mut op) = op_holder { + op.set_index(index); + self.post_handle(op)?; + if !self.ops.is_empty() { + break; + } + } + } + } + crate::set_input_only(false); + Ok(true) + } +} + +#[test] +fn test_var_mutate() { + fn test_type(ty: &str) { + let mut seed = FuzzProgram::default(); + let load = LoadStmt::generate_new( &mut seed, ty, "test", 0).unwrap(); + let _index = seed.append_stmt(load); + // make it like a seed + seed.parent = Some(0); + for _ in 0..200 { + let mut p = seed.clone(); + // to make index won't be drop + let index = p.stmts[0].index.use_index(); + p.mutate_program().unwrap(); + assert!(index.get_ref_used() > 0); + } + } + test_type("u8"); + test_type("i8"); + test_type("u32"); + test_type("i32"); + test_type("u64"); + test_type("i64"); + test_type("char"); + test_type("bool"); + test_type("f32"); + test_type("f64"); + test_type("[u8; 10]"); + test_type("hopper::runtime::FuzzMutPointer"); + test_type("hopper::test::TestType"); +} diff --git a/hopper-core/src/fuzz/object/bitfield.rs b/hopper-core/src/fuzz/object/bitfield.rs new file mode 100644 index 0000000..7adeed1 --- /dev/null +++ b/hopper-core/src/fuzz/object/bitfield.rs @@ -0,0 +1,30 @@ +use crate::{HopperBindgenBitfieldUnit, ObjFuzzable}; + +use super::*; + +impl ObjGenerate for HopperBindgenBitfieldUnit { + fn generate_new(state: &mut ObjectState) -> eyre::Result { + let sub_state = state + .add_child( + FieldKey::Field("storage".to_string()), + std::any::type_name::(), + ) + .last_child_mut()?; + Ok(Self{ storage: Storage::generate_new(sub_state)? }) + } +} + +impl ObjMutate for HopperBindgenBitfieldUnit { + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + self.storage.mutate(state.last_child_mut()?) + } + + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + self.storage.mutate_by_op(state.last_child_mut()?, keys, op) + } +} diff --git a/hopper-core/src/fuzz/object/buf.rs b/hopper-core/src/fuzz/object/buf.rs new file mode 100644 index 0000000..e00568d --- /dev/null +++ b/hopper-core/src/fuzz/object/buf.rs @@ -0,0 +1,616 @@ +//! Mutate buffer ([u8] or [i8]) without format-aware. +//! use AFL-like mutation + +use std::{ + io::{BufRead, Read}, + path::PathBuf, +}; + +use eyre::{Context, ContextCompat}; +use once_cell::sync::OnceCell; + +use super::*; +use crate::{fuzz::effective, runtime::*}; + +static BUF_SEEDS: OnceCell, Vec)>> = OnceCell::new(); +static BUF_DICTS: OnceCell, Vec)>> = OnceCell::new(); + +pub trait BufMutate { + /// Mutate the buffer without format-aware + fn mutate_buf(&mut self, state: &mut ObjectState) -> eyre::Result; + + /// Mutate the buffer by specific opeartion + fn mutate_buf_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()>; + + /// Splice the buffer with another buffer randomly + fn splice_buf(&mut self, state: &mut ObjectState) -> eyre::Result; + + /// Randomly havoc the buffer + fn havoc_buf(&mut self, state: &mut ObjectState) -> eyre::Result; + + /// Randomly insert tokens from dictionary + fn insert_dict_token(&mut self, state: &mut ObjectState) -> eyre::Result; +} + +impl BufMutate for Vec { + fn mutate_buf(&mut self, state: &mut ObjectState) -> eyre::Result { + crate::log!(trace, "mutate buf"); + if rng::unlikely() && self.len() > 20 { + let op = self.splice_buf(state).context("fail to splice buf")?; + if !op.is_nop() { + return Ok(state.as_mutate_operator(op)); + } + } + if rng::rarely() { + let op = self.insert_dict_token(state)?; + if !op.is_nop() { + return Ok(state.as_mutate_operator(op)); + } + } + self.havoc_buf(state).context("fail to havoc buf") + } + + fn mutate_buf_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + match op { + MutateOperation::BufSplice { + program_id, + stmt_index, + split_at, + range, + } => { + let ele_type_name = std::any::type_name::(); + let ident = state.key.as_str()?; + let key = format!("{ident}_{ele_type_name}"); + let buf1 = self; + if *split_at >= buf1.len() { + return Ok(()); + } + let found: eyre::Result = effective::EFFECT.with(|eff| { + if let Some(list) = eff.borrow().buf_list.get(&key) { + if let Some(seed) = list + .iter() + .find(|s| s.program_id == *program_id && s.stmt_index == *stmt_index) + { + if let Some(r) = range { + if *split_at >= buf1.len() || r.upper > seed.buf.len() { + return Ok(true); + } + let chunk = &seed.buf[r.lower..r.upper]; + super::seq::vec_insert_chunk(buf1, state, *split_at, chunk, r.is_insert)?; + } else { + if *split_at >= seed.buf.len() { + return Ok(true); + } + let buf2 = unsafe { + std::slice::from_raw_parts( + seed.buf.as_ptr() as *const T, + seed.buf.len(), + ) + }; + splice_buf_at(buf1, buf2, state, *split_at); + } + return Ok(true); + } + } + Ok(false) + }); + if found? { + return Ok(()); + } + + // If our cached buf list does not contain such buffer, + // try to read it from disk. + let p = crate::depot::read_input_in_queue(*program_id)?; + let buf2_value = p.stmts[*stmt_index] + .stmt + .get_value() + .context("buf has value")?; + let buf2 = buf2_value + .downcast_ref::>() + .context("downcast buf")?; + if *split_at >= buf2.len() { + return Ok(()); + } + splice_buf_at(buf1, buf2, state, *split_at); + } + MutateOperation::BufHavoc { + use_bytes, + swap, + op, + } => { + let buf = self; + let fields = op.key.fields.as_slice(); + eyre::ensure!(!fields.is_empty(), "key should at least 2 fields"); + let offset = fields.last().unwrap().as_usize()?; + if offset >= buf.len() { + return Ok(()); + } + let val = &mut buf[offset]; + let op = &op.op; + let sub_state = state.get_child_mut(offset)?; + macro_rules! mutate_num_by_op { + ($ty:ident) => {{ + let ptr = val as *mut T as *mut $ty; + let mut num = unsafe { ptr.read_unaligned() }; + if !swap { + num.mutate_by_op(sub_state, &[], op)?; + } else { + let mut swap_num = num.swap_bytes(); + swap_num.mutate_by_op(sub_state, &[], op)?; + num = swap_num.swap_bytes(); + } + unsafe { ptr.write_unaligned(num) }; + }}; + } + match *use_bytes { + 1 => { + val.mutate_by_op(sub_state, &[], op)?; + } + 2 => { + mutate_num_by_op!(u16); + } + 4 => { + mutate_num_by_op!(u32); + } + 8 => { + mutate_num_by_op!(u64); + } + _ => { + unreachable!(); + } + } + } + MutateOperation::UseDict { + offset, + dict, + is_insert, + } => { + super::seq::vec_insert_chunk(self, state, *offset, dict.as_slice(), *is_insert)?; + } + _ => { + self.as_mut_slice().mutate_by_op(state, keys, op)?; + } + } + Ok(()) + } + + fn splice_buf(&mut self, state: &mut ObjectState) -> eyre::Result { + let ele_type_name = std::any::type_name::(); + let ident = state.key.as_str()?; + let key = format!("{ident}_{ele_type_name}"); + let buf1 = unsafe { + std::slice::from_raw_parts(self.as_ptr() as *const u8, self.len()) + }; + // Get current buffer's hash + let hash = crate::utils::hash_buf(buf1); + // Pick a random entry. Don't splice with yourself. + effective::EFFECT.with(|eff| { + if let Some(list) = eff.borrow().buf_list.get(&key) { + let iter = list.iter().filter(|s| s.hash != hash); + if let Some(seed) = rng::choose_iter(iter) { + crate::log!( + trace, + "splice with program {} index: {}", + seed.program_id, + seed.stmt_index + ); + let buf2 = &seed.buf[..]; + // crossover insert/overwrite + if rng::coin() && buf1.len() > 2 && buf2.len() > 4 { + let lower = rng::gen_range(0..buf2.len() - 4); + let upper = rng::gen_range(lower + 4..buf2.len()); + let chunk = &buf2[lower..upper]; + eyre::ensure!(chunk.len() >= 4, "chunk has at least 4 bytes"); + let split_at = rng::gen_range(1..buf1.len() - 1); + let is_insert = rng::coin(); + super::seq::vec_insert_chunk(self, state, split_at, chunk, is_insert)?; + return Ok(MutateOperation::BufSplice { + program_id: seed.program_id, + stmt_index: seed.stmt_index, + split_at, + range: Some(crate::SpliceRange { + lower, + upper, + is_insert + }) + }); + } + + // splice at specific position + // we cast them to u8 since T havn't Eq trait + if let Some(split_at) = find_splice_pos(buf1, buf2) { + let buf1 = self; + let buf2 = unsafe { + std::slice::from_raw_parts( + seed.buf.as_ptr() as *const T, + seed.buf.len(), + ) + }; + splice_buf_at(buf1, buf2, state, split_at); + return Ok(MutateOperation::BufSplice { + program_id: seed.program_id, + stmt_index: seed.stmt_index, + split_at, + range: None, + }); + } + } + } + Ok(MutateOperation::Nop) + }) + } + + /// Try afl-like havoc mutation for bytes + fn havoc_buf(&mut self, state: &mut ObjectState) -> eyre::Result { + let buf_len = self.len(); + let use_bytes = use_bytes(buf_len); + let swap = rng::unlikely(); + let offset = rng::gen_range(0..buf_len - use_bytes + 1); + let buf = self; + let val = &mut buf[offset]; + let sub_state = state.get_child_mut(offset)?; + macro_rules! mutate_num { + ($ty:ident) => {{ + let ptr = val as *mut T as *mut $ty; + let mut num = unsafe { ptr.read_unaligned() }; + let op_ret; + if !swap { + op_ret = num.mutate(sub_state)?; + } else { + let mut swap_num = num.swap_bytes(); + op_ret = swap_num.mutate(sub_state)?; + num = swap_num.swap_bytes(); + } + unsafe { ptr.write_unaligned(num) }; + op_ret + }}; + } + + let op = match use_bytes { + 1 => val.mutate(sub_state)?, + 2 => mutate_num!(u16), + 4 => mutate_num!(u32), + 8 => mutate_num!(u64), + _ => { + unreachable!(); + } + }; + let op = MutateOperation::BufHavoc { + use_bytes, + swap, + op: Box::new(op), + }; + Ok(state.as_mutate_operator(op)) + } + + /// Try to insert or overwrite values in dictionary + fn insert_dict_token(&mut self, state: &mut ObjectState) -> eyre::Result { + let ident = state.key.as_str().unwrap(); + let dicts = get_buf_dict_tokens(ident); + if let Some(value) = rng::choose_slice(dicts.as_slice()) { + let buf_len = self.len(); + let offset = rng::gen_range(0..buf_len); + // insert or replace + let is_insert = rng::coin(); + super::seq::vec_insert_chunk(self, state, offset, value, is_insert)?; + return Ok(MutateOperation::UseDict { + offset, + dict: value.to_vec(), + is_insert, + }); + } + Ok(MutateOperation::Nop) + } +} + + + +/// Find a suitable splicing location, somewhere between the first and +/// the last differing byte. +fn find_splice_pos(buf1: &[u8], buf2: &[u8]) -> Option { + let (f_loc, l_loc) = locate_diffs(buf1, buf2); + if f_loc.is_none() || l_loc.is_none() { + return None; + } + let f_loc = f_loc.unwrap(); + let l_loc = l_loc.unwrap(); + // Bail out if the difference is just a single byte or so. + // f_loc == 0 || + if l_loc < 2 || f_loc == l_loc { + return None; + } + let split_at = rng::gen_range(f_loc..l_loc); + Some(split_at) +} + +/// Returns first and last differing offset. +/// We use this to find reasonable locations for splicing two buffers +fn locate_diffs(buf1: &[u8], buf2: &[u8]) -> (Option, Option) { + let len = std::cmp::min(buf1.len(), buf2.len()); + if len < 2 { + return (None, None); + } + let mut first_loc = None; + let mut last_loc = None; + for i in 0..len { + if buf1[i] != buf2[i] { + if first_loc.is_none() { + first_loc = Some(i); + } + last_loc = Some(i); + } + } + + (first_loc, last_loc) +} + +fn splice_buf_at( + buf1: &mut Vec, + buf2: &[T], + state: &mut ObjectState, + split_at: usize, +) { + let mut new_buf = [&buf1[..split_at], &buf2[split_at..]].concat(); + std::mem::swap(buf1, &mut new_buf); + // resize state + state.children.truncate(split_at); + for _ in split_at..buf1.len() { + let idx = state.children.len(); + let _ = state.add_child(idx, std::any::type_name::()); + } +} + +/// Choose how many bytes we used +fn use_bytes(buf_len: usize) -> usize { + let max: usize = if buf_len >= 8 { + 4 + } else if buf_len >= 4 { + 3 + } else if buf_len >= 2 { + 2 + } else if buf_len >= 1 { + 1 + } else { + unreachable!() + }; + 1 << rng::gen_range(0..max) +} + +/// Get seeds from environment(directory) for byte arguments. +/// if there are sub-directory starts with `@` (e.g @png), then it is for the arguments whose name is png. +pub fn get_buf_seeds(index: usize, ident: &str) -> Option<&'static [u8]> { + let list = BUF_SEEDS.get_or_init(|| { + let path = if let Ok(path) = std::env::var("HOPPER_SEED_DIR") { + crate::log!(info, "load seed path: {}", path); + PathBuf::from(path) + } else { + crate::config::output_file_path("seeds") + }; + let list = read_buf_seeds_from_dir(&path, None); + crate::log!(info, "add {} seeds for buf", list.len()); + list + }); + list.iter() + .filter_map(|(i, t)| { + if let Some(name) = i { + if name != ident { + return None; + } + } + Some(t.as_slice()) + }) + .nth(index) +} + +fn read_buf_seeds_from_dir( + path: &std::path::Path, + ident: Option, +) -> Vec<(Option, Vec)> { + let mut list = vec![]; + if !path.is_dir() { + return list; + } + for entry in path.read_dir().unwrap() { + let file = entry.unwrap().path(); + if file.is_dir() { + let f_name = file.to_str().unwrap(); + // if directory starts with '@', only for specific ident + if let Some(rest) = f_name.strip_prefix('@') { + list.extend(read_buf_seeds_from_dir(&file, Some(rest.to_string()))); + } else { + list.extend(read_buf_seeds_from_dir(&file, ident.clone())); + } + } + if !file.is_file() { + continue; + } + if file.metadata().unwrap().len() > 10000 { + continue; + } + let mut f = std::fs::File::open(file).unwrap(); + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + list.push((ident.clone(), buf)); + } + list +} + +/// Parse dictionarys, e.g: +/// # IVF Signature + version (bytes 0-5) +/// kw1="DKIF\x00\x00" +/// section_2101="!\x01\x12" +pub fn get_buf_dict_tokens(ident: &str) -> Vec<&'static [u8]> { + let dicts = BUF_DICTS.get_or_init(|| { + let default_dict = crate::config::output_file_path("misc/dict"); + let path = if let Ok(path) = std::env::var("HOPPER_DICT") { + crate::log!(info, "load dict path: {}", path); + std::fs::copy(&path, default_dict).unwrap(); + PathBuf::from(path) + } else { + default_dict + }; + if path.is_file() { + let mut f = std::fs::File::open(path).unwrap(); + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + parse_dictionary(&buf) + } else { + vec![] + } + }); + dicts + .iter() + .filter_map(|(i, t)| { + if let Some(name) = i { + if name != ident { + return None; + } + } + Some(t.as_slice()) + }) + .collect() +} + +fn parse_dictionary(buf: &[u8]) -> Vec<(Option, Vec)> { + let mut list = vec![]; + for line in buf.lines() { + let line = line.unwrap(); + let mut l = line.trim(); + let mut ident = None; + if l.is_empty() || l.starts_with('#') || l.starts_with("//") { + continue; + } + if !l.starts_with('"') { + if let Some(pos) = l.find('"') { + let prefix = &l[..pos]; + if let Some(eq_pos) = prefix.find('=') { + // if kw starts with '@', only for specific ident + let kw = l[..eq_pos].trim(); + if let Some(rest) = kw.strip_prefix('@') { + ident = Some(rest.to_string()); + } + } + l = &l[pos..]; + } else { + continue; + } + } + if l.is_empty() { + continue; + } + l = &l[1..]; + if let Some(pos) = l.find('"') { + l = &l[..pos]; + } else { + continue; + } + let mut dict_value = vec![]; + let mut citer = l.chars(); + while let Some(c) = citer.next() { + let c = c as u8; + if !(32..128).contains(&c) { + continue; + } + if c == b'\\' && citer.next() == Some('x') { + let first = citer.next().context("has first").unwrap(); + let second = citer.next().context("has second").unwrap(); + if first.is_ascii_hexdigit() && second.is_ascii_hexdigit() { + let v = (first.to_digit(16).unwrap() << 4) | second.to_digit(16).unwrap(); + dict_value.push(v as u8); + } + continue; + } + dict_value.push(c); + } + list.push((ident, dict_value)); + } + list +} + +#[test] +fn test_buf_mutate() { + let mut buf = vec![0_u8; 10]; + let ptr = &mut buf[1] as *mut u8 as *mut u32; + unsafe { ptr.write_unaligned(0x1234) }; + println!("{buf:?}"); + assert_eq!(buf[1], 0x34); + assert_eq!(buf[2], 0x12); + let ptr = &mut buf[5] as *mut u8 as *mut u32; + let val = 0x1234_u32.swap_bytes(); + unsafe { ptr.write_unaligned(val) }; + println!("{buf:?}"); + assert_eq!(buf[7], 0x12); + assert_eq!(buf[8], 0x34); +} + +#[test] +fn test_buf() { + use crate::fuzz::effective::EffectiveBuf; + { + let mut seeds = vec![]; + for i in 0..10 { + let buf: Vec = (0..100).map(|_| rng::gen::()).collect(); + seeds.push(EffectiveBuf { + program_id: i, + stmt_index: i, + buf, + hash: i as u64, + }); + } + effective::EFFECT.with(|eff| { + eff.borrow_mut() + .buf_list + .insert("test_i8".to_string(), seeds); + }); + } + let mut buf = vec![0_i8; 64]; + let mut state = ObjectState::root("test", std::any::type_name::()); + for _ in 0..buf.len() { + let idx = state.children.len(); + let _ = state.add_child(idx, std::any::type_name::()); + } + for _ in 0..1000 { + println!("----------------------"); + let mut buf2 = buf.clone(); + let mut state2 = state.clone_without_mutate_info(None); + let op = buf.mutate_buf(&mut state).unwrap(); + println!("op: {}", op.serialize().unwrap()); + println!("buf_len: {} / {}", buf.len(), state.children.len()); + assert!(!op.is_nop()); + buf2.mutate_buf_by_op(&mut state2, op.key.fields.as_slice(), &op.op) + .unwrap(); + println!("buf: {buf:?}"); + println!("buf2: {buf2:?}"); + assert_eq!(buf, buf2); + } + + for _ in 0..100 { + let op = buf.splice_buf(&mut state).unwrap(); + println!("op: {op:?}"); + } +} + +#[test] +fn test_parse_dict() { + let buf = r#"kw1="DKIF\x00\x00" + section_2101="!\x01\x12" + @test="123""#; + let ret = BUF_DICTS.get_or_init(|| parse_dictionary(buf.as_bytes())); + if ret.is_empty() { + return; + } + let list = get_buf_dict_tokens("abc"); + assert_eq!(list.len(), 2); + let list = get_buf_dict_tokens("test"); + assert_eq!(list.len(), 3); +} diff --git a/hopper-core/src/fuzz/object/corpus.rs b/hopper-core/src/fuzz/object/corpus.rs new file mode 100644 index 0000000..1e494b9 --- /dev/null +++ b/hopper-core/src/fuzz/object/corpus.rs @@ -0,0 +1,143 @@ +//! Corpus for types, lists of special values. +//! Copy from lain +use super::ObjCorpus; + +static DANGEROUS_NUMBERS_U8: &[u8] = &[ + 1, + 0x10, + 0x40, + std::u8::MIN, // 0x00 + std::u8::MAX, // 0xff + std::i8::MAX as u8, // 0x7f + std::i8::MIN as u8, // 0x80 +]; + +static DANGEROUS_NUMBERS_U16: &[u16] = &[ + 1, + 0x40, + 0x0400, + std::u8::MAX as u16, // 0xff + std::i8::MAX as u16, // 0x7f + // big-endian variants + std::u16::MIN, // 0x0000 + std::u16::MAX, // 0xffff + std::i16::MAX as u16, // 0x7fff + std::i16::MIN as u16, // 0x8000 + // little-endian variants + (std::i16::MAX as u16).swap_bytes(), // 0xff7f + (std::i16::MIN as u16).swap_bytes(), // 0x0080 +]; + +static DANGEROUS_NUMBERS_U32: &[u32] = &[ + 1, + 0x40, + 0x0400, + std::u8::MAX as u32, // 0xff + std::i8::MAX as u32, // 0x7f + std::u16::MAX as u32, // 0xffff + std::i16::MAX as u32, // 0x7fff + std::i16::MIN as u32, // 0x8000 + // big-endian variants + std::u32::MIN, // 0x0000_0000 + std::u32::MAX, // 0xffff_ffff + std::i32::MAX as u32, // 0x7fff_ffff + std::i32::MIN as u32, // 0x8000_0000 + // little-endian variants + (std::i32::MAX as u32).swap_bytes(), // 0xffff_ff7f + (std::i32::MIN as u32).swap_bytes(), // 0x0000_0080 +]; + +static DANGEROUS_NUMBERS_U64: &[u64] = &[ + 1, + 0x40, + 0x0400, + std::u8::MAX as u64, // 0xff + std::i8::MAX as u64, // 0x7f + std::u16::MAX as u64, // 0xffff + std::i16::MAX as u64, // 0x7fff + std::i16::MIN as u64, // 0x8000 + std::u32::MAX as u64, // 0xffff_ffff + std::i32::MAX as u64, // 0x7fff_ffff + std::i32::MIN as u64, // 0x8000_0000 + // big-endian variants + std::u64::MIN, + std::u64::MAX, + std::i64::MAX as u64, + std::i64::MIN as u64, + // little-endian variants + (std::i64::MAX as u64).swap_bytes(), // 0xffff_ffff_ffff_ff7f + (std::i64::MIN as u64).swap_bytes(), // 0x0000_0000_0000_0080 +]; + +static DANGEROUS_NUMBERS_USIZE: &[usize] = &[ + 1, + 0x40, + 0x0400, + std::u8::MAX as usize, // 0xff + std::i8::MAX as usize, // 0x7f + std::u16::MAX as usize, // 0xffff + std::i16::MAX as usize, // 0x7fff + std::i16::MIN as usize, // 0x8000 + #[cfg(target_pointer_width = "64")] + 0x7fff_ffff, + #[cfg(target_pointer_width = "64")] + 0x8000_0000, + #[cfg(target_pointer_width = "64")] + 0xffff_ffff, + // big-endian variants + std::usize::MIN, + std::usize::MAX, + std::isize::MAX as usize, + std::isize::MIN as usize, + // little-endian variants + (std::isize::MAX as usize).swap_bytes(), + (std::isize::MIN as usize).swap_bytes(), +]; + +static DANGEROUS_NUMBERS_F32: &[f32] = &[ + std::f32::INFINITY, + std::f32::MAX, + std::f32::MIN, + std::f32::MIN_POSITIVE, + std::f32::NAN, + std::f32::NEG_INFINITY, +]; + +static DANGEROUS_NUMBERS_F64: &[f64] = &[ + std::f64::INFINITY, + std::f64::MAX, + std::f64::MIN, + std::f64::MIN_POSITIVE, + std::f64::NAN, + std::f64::NEG_INFINITY, +]; + +macro_rules! impl_corpus { + ( $ty:ident, $corpus:ident ) => { + impl ObjCorpus for $ty { + fn corpus_size() -> usize { + $corpus.len() + } + + fn get_interesting_value(index: usize) -> Option { + Some($corpus[index] as $ty) + } + } + }; +} + +impl_corpus!(u8, DANGEROUS_NUMBERS_U8); +impl_corpus!(i8, DANGEROUS_NUMBERS_U8); +impl_corpus!(u16, DANGEROUS_NUMBERS_U16); +impl_corpus!(i16, DANGEROUS_NUMBERS_U16); +impl_corpus!(u32, DANGEROUS_NUMBERS_U32); +impl_corpus!(i32, DANGEROUS_NUMBERS_U32); +impl_corpus!(u64, DANGEROUS_NUMBERS_U64); +impl_corpus!(i64, DANGEROUS_NUMBERS_U64); +impl_corpus!(f32, DANGEROUS_NUMBERS_F32); +impl_corpus!(f64, DANGEROUS_NUMBERS_F64); +impl_corpus!(u128, DANGEROUS_NUMBERS_U64); +impl_corpus!(i128, DANGEROUS_NUMBERS_U64); +impl_corpus!(usize, DANGEROUS_NUMBERS_USIZE); +impl_corpus!(isize, DANGEROUS_NUMBERS_USIZE); + diff --git a/hopper-core/src/fuzz/object/fn_pointer.rs b/hopper-core/src/fuzz/object/fn_pointer.rs new file mode 100644 index 0000000..9401dca --- /dev/null +++ b/hopper-core/src/fuzz/object/fn_pointer.rs @@ -0,0 +1,72 @@ +//! Generate and mutatte fn pointer + +use super::*; +use crate::{global_gadgets, runtime::fn_pointer::cast_fn_pointer, FnFuzzable, FnSignature, fn_pointer::cast_canary_fn_pointer}; +use std::fmt::Debug; + +impl ObjGenerate for T { + /// Choose any function from gadgets + fn generate_new(state: &mut ObjectState) -> eyre::Result { + let (func_name, f) = choose_function_pointer::()?; + cast_fn_pointer(func_name, f, state) + } +} + +impl ObjMutate for T { + /// Re-Choose any function from gadgets + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + let op = if let Ok((func_name, f)) = choose_function_pointer::() { + *self = cast_fn_pointer(func_name, f, state)?; + MutateOperation::FnPointer { + f_name: func_name.into(), + } + } else { + MutateOperation::Nop + }; + Ok(state.as_mutate_operator(op)) + } + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + match op { + MutateOperation::FnPointer { f_name } => { + let fg = global_gadgets::get_instance().get_func_gadget(f_name)?; + if T::arg_type_names() != fg.arg_types || T::ret_type_name() != fg.ret_type { + eyre::bail!("function pointer type mismatch"); + } + *self = cast_fn_pointer(fg.f_name, fg.f, state)?; + } + MutateOperation::PointerCanary => { + *self = cast_canary_fn_pointer(state); + } + _ => { + eyre::bail!("fail to mutate pointer, keys: {keys:?}, op: {op:?}"); + } + } + Ok(()) + } +} + +/// Random choose function from existing gadgets, +/// which should has the same signature with `T` +fn choose_function_pointer( +) -> eyre::Result<(&'static String, &'static dyn FnFuzzable)> { + let cands = global_gadgets::get_instance() + .functions + .iter() + .filter(|(f_name, fg)| { + crate::filter_fn_pointer(f_name) + && T::arg_type_names() == fg.arg_types + && T::ret_type_name() == fg.ret_type + }); + rng::choose_iter(cands) + .map(|(name, fg)| (name, fg.f)) + .ok_or_else(|| { + // spefic error for telling the option warpper should be None + crate::log!(trace, "fail to find a function pointer"); + eyre::eyre!(crate::HopperError::NullFuncionPointer) + }) +} diff --git a/hopper-core/src/fuzz/object/mod.rs b/hopper-core/src/fuzz/object/mod.rs new file mode 100644 index 0000000..bd1ff12 --- /dev/null +++ b/hopper-core/src/fuzz/object/mod.rs @@ -0,0 +1,54 @@ +use super::*; +use crate::{FieldKey, ObjType, ObjectState}; + +/// Trait for creating new object that generate form nothing +pub trait ObjGenerate: Clone + ObjType { + /// Generate a totally new object + fn generate_new(state: &mut ObjectState) -> eyre::Result; +} + +/// Trait for mutating object +pub trait ObjMutate { + /// Mutate object itself + fn mutate( + &mut self, + state: &mut ObjectState, + ) -> eyre::Result; + /// Deterministic mutate + fn det_mutate( + &mut self, + state: &mut ObjectState, + ) -> eyre::Result { + state.done_deterministic(); + Ok(MutateOperator::nop()) + } + /// Mutate object by operator + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()>; +} + +/// Corpus of interesting values +pub trait ObjCorpus: Sized { + /// Size of corpus + fn corpus_size() -> usize { + 0 + } + /// Get interesting value in corpus + fn get_interesting_value(_index: usize) -> Option { + None + } +} + +mod corpus; +mod fn_pointer; +mod number; +mod option; +pub mod pointer; +pub mod seq; +mod void; +pub mod buf; +mod bitfield; \ No newline at end of file diff --git a/hopper-core/src/fuzz/object/number.rs b/hopper-core/src/fuzz/object/number.rs new file mode 100644 index 0000000..c7473d6 --- /dev/null +++ b/hopper-core/src/fuzz/object/number.rs @@ -0,0 +1,658 @@ +//! Mutate numbers +//! Including +//! - mutate integers +//! - mutate floats + +use num::Float; +use num_traits::{Bounded, WrappingAdd, WrappingSub}; +use rand::{distributions::Standard, prelude::*}; +use std::ops::*; + +use crate::{ + add_det_mutation, feedback::CmpState, log, ObjCorpus, ObjGenerate, ObjMutate, ObjectState, +}; + +use super::*; + +macro_rules! impl_fuzz_gen { + ( $($name:ident),* ) => { + $( + impl ObjGenerate for $name { + /// Use rand to generate integer + fn generate_new( state: &mut ObjectState) -> eyre::Result { + if flag::is_pilot_det() || state.mutate.borrow().is_zero_weight() { + return Ok(Self::default()); + } + // we prefer small integers during generation + //if rng::likely() { + let v: u8 = if rng::coin() { + rng::gen_range(0..=16) + } else { + rng::gen_range(0..=255) + }; + return Ok(v as $name); + //} + //Ok(rng::gen()) + } + } + )* + } +} + +impl_fuzz_gen!(u16, u32, u64, u128, usize); + +macro_rules! impl_fuzz_gen_neg { + ( $($name:ident),* ) => { + $( + impl ObjGenerate for $name { + /// Use rand to generate integer + fn generate_new( state: &mut ObjectState) -> eyre::Result { + if flag::is_pilot_det() || state.mutate.borrow().is_zero_weight() { + return Ok(Self::default()); + } + // we prefer small integers during generation + //if rng::likely() { + let v: i8 = if rng::coin() { + rng::gen_range(-4..=12) + } else { + rng::gen_range(-12..=127) + }; + return Ok(v as $name); + //} + //Ok(rng::gen()) + } + } + )* + } +} + +impl_fuzz_gen_neg!(i16, i32, i64, i128, isize); + +impl ObjGenerate for bool { + /// Use rand to generate integer + fn generate_new(_state: &mut ObjectState) -> eyre::Result { + Ok(rng::gen()) + } +} + +macro_rules! impl_fuzz_gen_byte { + ( $($name:ident),* ) => { + $( + impl ObjGenerate for $name { + /// Use rand to generate integer + fn generate_new( state: &mut ObjectState) -> eyre::Result { + if flag::is_pilot_det() || state.mutate.borrow().is_zero_weight() { + return Ok(Self::default()); + } + Ok(rng::gen()) + } + } + )* + } +} + +impl_fuzz_gen_byte!(u8, i8, char); + +impl ObjGenerate for f32 { + fn generate_new( state: &mut ObjectState) -> eyre::Result { + if flag::is_pilot_det() || state.mutate.borrow().is_zero_weight() { + return Ok(Self::default()); + } + Ok(rng::gen_range(-8.0..64.0)) + } +} + +impl ObjGenerate for f64 { + fn generate_new( state: &mut ObjectState) -> eyre::Result { + if flag::is_pilot_det() || state.mutate.borrow().is_zero_weight() { + return Ok(Self::default()); + } + Ok(rng::gen_range(-8.0..64.0)) + } +} + +macro_rules! impl_int_mut { + (@cmp $name:ident) => {}; + ( $($name:ident),* ) => { + $( + impl ObjMutate for $name { + fn det_mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if let Some(op) = call_det(self, state)? { + return Ok(state.as_mutate_operator(op)); + } + Ok(MutateOperator::nop()) + } + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + log!(trace, "mutate number at field: {:?}", state.get_location_fields()); + // Random stage + let op = match rng::gen_range(0..10) { + 0..=1 => bit_flip(self), + 2..=3 => flip(self), + 4..=6 => arithmetic(self), + 7 => set_corpus(self), + 8 => random_value(self), + _ => cmp_var(self, state), + }; + log!(trace, "random operation selected: {:?}", op); + Ok(state.as_mutate_operator(op)) + } + fn mutate_by_op(&mut self, _state: &mut ObjectState, keys: &[FieldKey], op: &MutateOperation) -> eyre::Result<()> { + if keys.len() > 0 && !(keys.len() == 1 && keys[0] == FieldKey::Index(0)) { + crate::log!(error, "keys: {:?}, op: {:?}", keys, op); + unimplemented!() + } + match op { + MutateOperation::IntBitFlip{ index } => { bit_flip_at(self, *index); }, + MutateOperation::IntFlip{ indices } => { flip_at(self, indices.clone()); }, + MutateOperation::IntAdd { change } => { add(self, *change); }, + MutateOperation::IntSub { change } => { sub(self, *change); }, + MutateOperation::IntSet { val } => { + let v: Self = val.as_constant(); + if *self != v { + *self = v; + } else { + flag::set_refine_suc(false); + } + }, + MutateOperation::IntGet => { flag::set_tmp_u64(*self as u64) }, + MutateOperation::IntCmp { val } => { *self = Self::from_u64(*val); }, + MutateOperation::IntRandom { val } => { *self = Self::from_u64(*val); }, + MutateOperation::IntVariance { val } => { *self = Self::from_u64(*val); }, + MutateOperation::Corpus { index } => { set_corpus_at(self, *index); }, + MutateOperation::IntRange { min, max } => { + let min_val: Self = min.as_constant(); + let max_val: Self = max.as_constant(); + if *self >= max_val || *self < min_val { + if max_val <= min_val { + *self = num::cast(0).unwrap(); + } else { + *self = rng::gen_range(min_val..max_val); + } + } else { + flag::set_refine_suc(false); + } + }, + _ => { + eyre::bail!("unsuppported operator for {}: {op:?}", stringify!($name)); + } + } + Ok(()) + } + } + impl DetMutate for $name { + fn det_mutateion_steps() -> Vec> { + let mut steps: Vec> = vec![]; + // add and sub 1, 2 + for i in 1..=2 { + add_det_mutation!(steps, "add", |n: $name| (add(n, i), DetAction::Next)); + add_det_mutation!(steps, "sub", |n: $name| (sub(n, i), DetAction::Next)); + } + // bit flips + let num_bits = (std::mem::size_of::() * 8) as u8; + for i in (0..num_bits).rev() { + add_det_mutation!(steps, "bit_flip", |n: $name, s| { + let op = bit_flip_at(n, i); + // if it is in a list, we just flip once + if let FieldKey::Index{..} = &s.key { + return (op, DetAction::Last) + } + (op, DetAction::Next) + }); + } + // corpus + for i in 0..$name::corpus_size() { + add_det_mutation!(steps, "corpus", |n: $name| (set_corpus_at(n, i), DetAction::Next)); + } + // cmp + add_det_mutation!(steps, "cmp", |n: $name, s| { + let cmps = &mut s.mutate.borrow_mut().related_cmps; + if let Some(det_cmp) = cmps.iter_mut().find(|c| c.det) { + log!(trace, "cmp det: {det_cmp:?}"); + det_cmp.det = false; + (set_cmp(n, det_cmp, s), DetAction::Keep) + } else { + (MutateOperation::Nop, DetAction::Finish) + } + }); + steps + } + } + + )* + } +} + +impl_int_mut!(u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, usize, isize); + +macro_rules! impl_fuzz_mut_cast { + ($name:ident, $cast:ident) => { + impl ObjMutate for $name { + fn det_mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + let casted = unsafe { &mut *(self as *mut $name as *mut $cast) }; + casted.det_mutate(state) + } + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + let casted = unsafe { &mut *(self as *mut $name as *mut $cast) }; + casted.mutate(state) + } + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + let casted = unsafe { &mut *(self as *mut $name as *mut $cast) }; + casted.mutate_by_op(state, keys, op) + } + } + }; +} + +// Cast them for U* and do mutating +impl_fuzz_mut_cast!(char, u8); + + +impl ObjMutate for bool { + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + *self = !*self; + Ok(state.as_mutate_operator(MutateOperation::FlipBool)) + } + fn mutate_by_op( + &mut self, + _state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + if !keys.is_empty() { + unimplemented!() + } + match op { + MutateOperation::FlipBool => { + *self = !*self; + } + _ => { + unimplemented!(); + } + } + Ok(()) + } +} + +macro_rules! impl_float_mut { + ( $($name:ident),* ) => { + $( +impl ObjMutate for $name { + fn det_mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if let Some(op) = call_det(self, state)? { + return Ok(state.as_mutate_operator(op)); + } + Ok(MutateOperator::nop()) + } + fn mutate( + &mut self, + state: &mut ObjectState, + ) -> eyre::Result { + // Random stage + let op = if rng::rarely() { + set_corpus(self) + } else if rng::coin() { + let change = rng::gen_range(-1.0..1.0); + float_add(self, change) + } else { + let val = Self::generate_new( state)? as f64; + float_set(self, val) + }; + Ok(state.as_mutate_operator(op)) + } + fn mutate_by_op(&mut self, _state: &mut ObjectState, keys: &[FieldKey], op: &MutateOperation) -> eyre::Result<()> { + if keys.len() > 0 { + unimplemented!() + } + match op { + MutateOperation::FloatAdd { change } => { float_add(self, *change); }, + MutateOperation::FloatNew { val } => { float_set(self, *val); }, + MutateOperation::Corpus { index } => { set_corpus_at(self, *index); }, + _ => { + unimplemented!(); + } + } + Ok(()) + } +} + +impl DetMutate for $name { + fn det_mutateion_steps() -> Vec> { + let mut steps: Vec> = vec![]; + let size = $name::corpus_size(); + if size > 0 { + for i in 0..size - 1 { + add_det_mutation!(steps, "corpus", |n: $name| (set_corpus_at(n, i), DetAction::Next )); + } + add_det_mutation!(steps, "corpus_last", |n: $name| (set_corpus_at(n, size - 1), DetAction::Finish)); + } + steps + } +} + )* + } +} + +impl_float_mut!(f32, f64); + +/// Flip a single bit in the given number. +pub fn bit_flip(num: &mut T) -> MutateOperation +where + T: BitXor + IntCast + Copy, +{ + let num_bits = (std::mem::size_of::() * 8) as u8; + let idx: u8 = rng::gen_range(0..num_bits); + bit_flip_at(num, idx) +} + +fn bit_flip_at(num: &mut T, idx: u8) -> MutateOperation +where + T: BitXor + IntCast + Copy +{ + log!(trace, "xoring bit at {}-th ", idx); + *num = (*num) ^ T::from_u128(1_u128 << idx); + MutateOperation::IntBitFlip { index: idx } +} + +/// Flip more than 1 bit in this number. This is a flip potentially up to +/// the max bits in the number +pub fn flip(num: &mut T) -> MutateOperation +where + T: BitXor + IntCast + Copy, +{ + let num_bits = (std::mem::size_of::() * 8) as u8; + let bits_to_flip = rng::gen_range(1..=num_bits) as usize; + // 64 is chosen here as it's the the max primitive size (in bits) that we support + // we choose to do this approach over a vec to avoid an allocation + assert!(num_bits <= 64); + let bit_indices = rng::choose_multiple(0..num_bits, bits_to_flip); + flip_at(num, bit_indices) +} + +fn flip_at(num: &mut T, bit_indices: Vec) -> MutateOperation +where + T: BitXor + IntCast + Copy, +{ + log!(trace, "flip bits, indices: {:?}", bit_indices); + for &idx in &bit_indices { + *num = (*num) ^ T::from_u128(1_u128 << idx); + } + MutateOperation::IntFlip { + indices: bit_indices, + } +} + +/// Perform a simple arithmetic operation on the number (+ or -) +pub fn arithmetic(num: &mut T) -> MutateOperation +where + T: IntCast + Copy + WrappingAdd + WrappingSub, +{ + let change: u64 = rng::gen_range(1..=16); + if rng::coin() { + add(num, change) + } else { + sub(num, change) + } +} + +fn add(num: &mut T, change: u64) -> MutateOperation +where + T: IntCast + Copy + WrappingAdd, +{ + log!(trace, "adding {}", change); + *num = num.wrapping_add(&T::from_u64(change)); + MutateOperation::IntAdd { change } +} + +fn sub(num: &mut T, change: u64) -> MutateOperation +where + T: IntCast + Copy + WrappingSub, +{ + log!(trace, "subtracting {}", change); + *num = num.wrapping_sub(&T::from_u64(change)); + MutateOperation::IntSub { change } +} + +/// Set num as corpus' value +pub fn set_corpus(num: &mut T) -> MutateOperation { + let n = T::corpus_size(); + if n > 0 { + let index = rng::gen_range(0..n); + set_corpus_at(num, index) + } else { + MutateOperation::Nop + } +} + +pub fn set_corpus_at(num: &mut T, index: usize) -> MutateOperation { + log!(trace, "set corpus {}", index); + if let Some(v) = T::get_interesting_value(index) { + *num = v; + } + MutateOperation::Corpus { index } +} + +pub fn random_value(num: &mut T) -> MutateOperation +where + Standard: Distribution, + T: IntCast + Copy + std::fmt::Debug, +{ + let val: T = rng::gen(); + *num = val; + MutateOperation::IntRandom { + val: val.as_u64() + } +} + +fn set_cmp(num: &mut T, cmp_state: &CmpState, object_state: &ObjectState) -> MutateOperation +where + T: IntCast + Copy + std::fmt::Debug, +{ + let op = cmp_state.op.borrow(); + let size = op.size as usize; + let operand = if cmp_state.affect_left { + op.operand2 + } else { + op.operand1 + }; + let type_size = std::mem::size_of::(); + // for vec or vec + if type_size == 1 && type_size < size { + if let FieldKey::Index(i) = object_state.key { + crate::log!(trace, "cast num to size {} for det", size); + let len = object_state.get_parent().unwrap().children.len(); + if i + size <= len { + match size { + 2 => { + let ptr = num as *mut T as *mut u16; + unsafe { ptr.write_unaligned( u16::from_u64(operand)) }; + } + 4 => { + let ptr = num as *mut T as *mut u32; + unsafe { ptr.write_unaligned( u32::from_u64(operand)) }; + } + 8 => { + let ptr = num as *mut T as *mut u64; + unsafe { ptr.write_unaligned( u64::from_u64(operand)) }; + } + _ => { + *num = T::from_u64(operand); + } + } + } + } + } else { + log!(trace, "cmp set: {:?}", cmp_state); + *num = T::from_u64(operand); + } + MutateOperation::IntCmp { val: operand } +} + +impl IrEntry { + pub fn as_constant + WrappingSub>(&self) -> T { + match self { + Self::Min(off) => T::min_value().wrapping_add(&T::from_u64(*off as u64)), + Self::Max(off) => T::max_value().wrapping_sub(&T::from_u64(*off as u64)), + Self::Constant(val) => T::from_u64(*val), + _ => T::from_u64(0) + } + } +} + +/// Set value as compare bytes or corpus, and add some mathematic variance +fn cmp_var(num: &mut T, state: &ObjectState) -> MutateOperation +where + T: IntCast + + ObjCorpus + + Copy + + WrappingAdd + + WrappingSub + + std::fmt::Debug, +{ + let mut first_op = set_corpus(num); + if rng::likely() { + if let Some(cmp_state) = rng::choose_iter(state.mutate.borrow().related_cmps.iter()) { + first_op = set_cmp(num, cmp_state, state); + } + } + if rng::unlikely() { + return first_op; + } + let _arith_op = arithmetic(num); + MutateOperation::IntVariance { + val: num.as_u64() + } +} + +fn float_add(num: &mut T, change: f32) -> MutateOperation +where + T: Float, +{ + log!(trace, "float arith: {}", change); + *num = num.mul_add(num::cast(1.0).unwrap(), num::cast(change).unwrap()); + MutateOperation::FloatAdd { change } +} + +fn float_set(num: &mut T, val: f64) -> MutateOperation +where + T: Float, +{ + log!(trace, "float new: {}", val); + *num = num::cast(val).unwrap(); + MutateOperation::FloatNew { val } +} + +pub trait IntCast { + fn from_u64(val: u64) -> Self; + fn from_u128(val: u128) -> Self; + fn as_u64(&self) -> u64; + fn as_u128(&self) -> u128; +} + +macro_rules! impl_int_cast_unsign { + ( $($name:ident),* ) => { + $( + impl IntCast for $name { + fn from_u64(val: u64) -> Self { + val as Self + } + fn from_u128(val: u128) -> Self { + val as Self + } + fn as_u64(&self) -> u64 { + *self as u64 + } + fn as_u128(&self) -> u128 { + *self as u128 + } + } + )* + } +} +impl_int_cast_unsign!(u8, u16, u32, u64, u128, usize); + +macro_rules! impl_int_cast_sign { + ($name:ident, $cast:ident) => { + impl IntCast for $name { + fn from_u64(val: u64) -> Self { + val as $cast as Self + } + fn from_u128(val: u128) -> Self { + val as $cast as Self + } + fn as_u64(&self) -> u64 { + *self as $cast as u64 + } + fn as_u128(&self) -> u128 { + *self as $cast as u128 + } + } + } +} + +impl_int_cast_sign!(i8, u8); +impl_int_cast_sign!(i16, u16); +impl_int_cast_sign!(i32, u32); +impl_int_cast_sign!(i64, u64); +impl_int_cast_sign!(i128, u128); +impl_int_cast_sign!(isize, usize); + +#[test] +fn test_write_bytes() { + use crate::feedback::*; + use std::cell::RefCell; + use std::rc::Rc; + + let val = 12300_u16; + println!("{:?}", val.to_le_bytes()); + println!("{:?}", val.to_be_bytes()); + + let val_m = 12349_u16; + println!("val_m: {:?}", val_m.to_le_bytes()); + + /* + let mut val2 = 0_u16; + val2 = u16::from_u64(12300_u64); + println!("val2: {val2}"); + */ + + let val3 = -100000000_i32; + println!("val_3({}): {:?}", val3 as u32, val3.to_le_bytes()); + let mut buf = vec![0_i8; 20]; + let cmp_op = CmpOperation { + operand1: 12300, + operand2: 0x7788, + id: 0, + size: 2, + ty: 0, + stmt_index: 0, + state: 0, + }; + let cmp_state = CmpState { + id: 0, + op: Rc::new(RefCell::new(cmp_op)), + affect_left: false, + det: true, + }; + let mut object_state = ObjectState::root("test", "CmpState"); + for i in 0..buf.len() { + object_state.add_child(FieldKey::Index(i), "usize"); + } + println!("buf ptr: {:?}", buf.as_slice().as_ptr()); + set_cmp(&mut buf[1], &cmp_state, &object_state.children[1]); + println!("buf: {buf:?}"); + + cmp_state.op.borrow_mut().operand1 = 987654325; + cmp_state.op.borrow_mut().size = 4; + set_cmp(&mut buf[4], &cmp_state, &object_state.children[4]); + println!("buf: {buf:?}"); + + cmp_state.op.borrow_mut().operand1 = 4194967296; + cmp_state.op.borrow_mut().size = 4; + set_cmp(&mut buf[10], &cmp_state, &object_state.children[10]); + println!("buf: {buf:?}"); +} diff --git a/hopper-core/src/fuzz/object/option.rs b/hopper-core/src/fuzz/object/option.rs new file mode 100644 index 0000000..0b8cf4c --- /dev/null +++ b/hopper-core/src/fuzz/object/option.rs @@ -0,0 +1,104 @@ +//! Generate and mutate option + +use crate::{config, FieldKey, HopperError, ObjFuzzable}; + +use super::*; + +/// Generate data inside option +fn generate_inner(state: &mut ObjectState) -> eyre::Result> { + match T::generate_new(state) { + Ok(obj) => Ok(Some(obj)), + Err(err) => { + // For fn pointer, if we can't find any valid fn, it should be NONE + if let Some(HopperError::NullFuncionPointer) = err.downcast_ref::() { + return Ok(None); + } + Err(err) + } + } +} + +impl ObjGenerate for Option { + fn generate_new(state: &mut ObjectState) -> eyre::Result { + state.done_deterministic(); + let sub_state = state + .add_child(FieldKey::Option, std::any::type_name::>()) + .last_child_mut()?; + if !config::ENABLE_SET_FN_POINTER { + let _ = state.replace_weight(0); + return Ok(None); + } + if rng::mostly() && !flag::is_pilot_det() { + return Ok(None); + } + generate_inner(sub_state) + } +} + +impl ObjMutate for Option { + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if !config::ENABLE_SET_FN_POINTER { + return Ok(MutateOperator::nop()); + } + let op = if let Some(val) = self { + if rng::likely() { + *self = None; + state.last_child_mut()?.pointer.take(); + MutateOperation::OptionNone + } else { + return val.mutate(state.last_child_mut()?); + } + } else { + // None + let rng_state = rng::save_rng_state(); + *self = generate_inner(state.last_child_mut()?)?; + MutateOperation::OptionNew { rng_state } + }; + Ok(state.as_mutate_operator(op)) + } + + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + // crate::log!(trace, "state: {:?}", state); + match op { + MutateOperation::PointerGen { rng_state } + | MutateOperation::OptionNew { rng_state } => { + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + *self = generate_inner(state.last_child_mut()?)?; + } + MutateOperation::OptionNone | MutateOperation::PointerNull => { + if self.is_none() { + flag::set_refine_suc(false); + } else { + *self = None; + state.last_child_mut()?.pointer.take(); + } + } + MutateOperation::FnPointer { f_name: _ } => { + eyre::ensure!(keys[0] == FieldKey::Option, "key should be option!"); + if self.is_none() { + *self = generate_inner(state.last_child_mut()?)?; + } + if let Some(val) = self { + val.mutate_by_op(state.last_child_mut()?, &keys[1..], op)?; + } + } + MutateOperation::PointerCanary => { + if self.is_none() { + *self = generate_inner(state.last_child_mut()?)?; + } + if let Some(val) = self { + val.mutate_by_op(state.last_child_mut()?, keys, op)?; + } + } + _ => { + eyre::bail!("keys: {:?}, op: {:?}", keys, op); + } + } + Ok(()) + } +} diff --git a/hopper-core/src/fuzz/object/pointer.rs b/hopper-core/src/fuzz/object/pointer.rs new file mode 100644 index 0000000..175318b --- /dev/null +++ b/hopper-core/src/fuzz/object/pointer.rs @@ -0,0 +1,656 @@ +//! Generate and mutate pointers +//! Including: +//! - assign a location for pointer during generation +//! - mutate existing pointer to a new location + +use super::*; +use crate::{config, runtime::*, utils}; +use eyre::ContextCompat; + +// static mut str_cache: Vec<&'static str> = vec![]; +/// Mutate specific pointer's location +pub fn mutate_pointer_location( + program: &mut FuzzProgram, + root_state: &mut ObjectState, + keys: &[FieldKey], +) -> eyre::Result { + let state = root_state.get_child_mut_by_fields(keys)?; + let is_root = keys.is_empty(); + let depth = program.get_stub_stmt_depth()?; + let op = set_pointer_location(program, state, is_root, depth, false)?; + Ok(op) +} + +/// Generate all pointer inside state +pub fn generate_pointer_location( + program: &mut FuzzProgram, + state: &mut ObjectState, + depth: usize, +) -> eyre::Result<()> { + // Ignore state inside option, e.g. function pointer + if state.key == FieldKey::Option { + return Ok(()); + } + if let Some(ps) = state.pointer.as_ref() { + if ps.pointer_location.is_null() { + let is_root = state.parent.is_none(); + let _ = set_pointer_location(program, state, is_root, depth, true)?; + } + } + if state.children.len() <= crate::config::MAX_VEC_LEN { + for sub_state in state.children.iter_mut() { + generate_pointer_location(program, sub_state, depth)?; + } + } + Ok(()) +} + +/// Mutate specific pointer's location with specific operation +pub fn mutate_pointer_location_by_op( + program: &mut FuzzProgram, + root_state: &mut ObjectState, + mut keys: &[FieldKey], + op: &MutateOperation, +) -> eyre::Result<()> { + if keys.last().map_or(false, |k| k.is_union_root()) { + keys = &keys[..keys.len() - 1]; + } + let state = root_state.get_child_mut_by_fields(keys)?; + match op { + MutateOperation::PointerNull => { + state.get_pointer_mut()?.pointer_location = Location::null(); + } + MutateOperation::PointerUse { loc } => { + let mut loc = loc.use_loc(); + // stmt in loc may not 'refer' to a stmt in current program, e.g create by `dup` + // so we update it to the index of program's stmt here + if let Some(index) = loc.stmt_index.as_mut() { + let is = program + .get_stmt_by_index_uniq(index) + .with_context(|| format!("can't find stmt with uniq: {index:?}"))?; + *index = is.index.use_index(); + } + state.get_pointer_mut()?.pointer_location = loc; + } + MutateOperation::PointerRet { f_name, rng_state } => { + let ident = state.key.as_str()?; + let depth = program.get_stub_stmt_depth()?; + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + let loc = create_call_stmt_for_ptr(program, ident, f_name, depth)?; + state.get_pointer_mut()?.pointer_location = loc; + } + MutateOperation::PointerGen { rng_state } => { + let ident = state.key.as_str()?; + let type_name = &state.pointer.as_mut().context("has ptr")?.pointer_type; + let is_opaque = utils::is_opaque_type(type_name); + let depth = program.get_stub_stmt_depth()?; + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + let loc = create_load_stmt_for_ptr(program, type_name, ident, is_opaque, depth)?; + if loc.is_null() { + set_incomplete_gen(true); + } + state.get_pointer_mut()?.pointer_location = loc; + } + MutateOperation::PointerGenChar => { + let ident = state.key.as_str()?; + let type_name = "i8"; + state.pointer.as_mut().context("has ptr")?.pointer_type = type_name; + let depth = program.get_stub_stmt_depth()?; + let loc = create_load_stmt_for_ptr(program, type_name, ident, false, depth)?; + state.get_pointer_mut()?.pointer_location = loc; + } + MutateOperation::InitOpaque { call_i } => { + let depth = program.get_stub_stmt_depth()?; + let op = init_opaque_pointer(program, state, *call_i, depth)?; + if op.is_nop() { + set_incomplete_gen(true); + } + } + MutateOperation::InitOpaqueForInfer { call_i } => { + let depth = program.get_stub_stmt_depth()?; + let op = init_opaque_pointer(program, state, *call_i, depth)?; + if op.is_nop() { + set_incomplete_gen(true); + } else { + // the op is derived to be another operation (more detailed) + program.ops.push(state.as_mutate_operator(op)); + } + } + MutateOperation::RemoveInitOpaque => { + if let Some(cur_index) = program.get_stub_stmt_index() { + if let Some(call_index) = program.has_been_inited(&cur_index) { + // if program.check_call_related_to_stmt(cur_index.get(), call_index) { + program.stmts.remove(call_index); + } + } + } + MutateOperation::PointerCanary => { + let depth = program.get_stub_stmt_depth()?; + let load_stmt = LoadStmt::generate_vec(program, "i8", "mock_canary", depth + 1)?; + let len = load_stmt.value.get_length(); + let stmt_index = program.insert_or_append_stmt(load_stmt)?; + let loc = Location::new(stmt_index, LocFields::new(vec![FieldKey::Index(len + 1)])); + state.get_pointer_mut()?.pointer_location = loc; + } + MutateOperation::PointerFile { read } => { + // generate a file, and point to it. + let ident = state.key.as_str()?; + let depth = program.get_stub_stmt_depth()?; + let file_stmt = FileStmt::generate_new(program, ident, false, false, *read, depth)?; + let stmt_index = program.insert_or_append_stmt(file_stmt)?; + let loc = Location::new(stmt_index, LocFields::new(vec![FieldKey::Pointer])); + state.get_pointer_mut()?.pointer_location = loc; + } + MutateOperation::VecPad { .. } | MutateOperation::BufRefine { .. } => { + let loc = &state.get_pointer()?.pointer_location; + let is_null = loc.is_null(); + let is_returned = loc.stmt_index.as_ref().map_or(false, |ptee| { + if let FuzzStmt::Call(..) = &program.stmts[ptee.get()].stmt { + return true; + } + false + }); + + fn create_new_vec( + program: &mut FuzzProgram, + state: &mut ObjectState, + ) -> eyre::Result { + let ident = state.key.as_str()?; + let type_name = &state.pointer.as_mut().context("has ptr")?.pointer_type; + let depth = program.get_stub_stmt_depth()?; + let is_opaque = utils::is_opaque_type(type_name); + if is_opaque { + return Ok(false); + } + let loc = create_load_stmt_for_ptr(program, type_name, ident, is_opaque, depth)?; + state.get_pointer_mut()?.pointer_location = loc; + Ok(true) + } + // If the vec/buffer is loaded from a call statement, + // we replace it with a load statement that is adhere to the constriant. + if (is_null || is_returned) && !create_new_vec(program, state)? { + return Ok(()); + } + let loc = &state.get_pointer()?.pointer_location; + if let FuzzStmt::Load(load) = &mut program.stmts[loc.get_index()?.get()].stmt { + // if the pointee is not a vector, we create a vector for it. + if utils::is_vec_type(load.value.type_name()) { + load.value.mutate_by_op(&mut load.state, &[], op)?; + } else if create_new_vec(program, state)? { + let loc = &state.get_pointer()?.pointer_location; + if let FuzzStmt::Load(load) = &mut program.stmts[loc.get_index()?.get()].stmt { + load.value.mutate_by_op(&mut load.state, &[], op)?; + } + } + } + } + MutateOperation::UnionUse { rng_state, member } => { + // If the target member is a pointer, we intercept the mutate operation here to set the pointer location. + // Otherwise, the mutation is completed later by the object itself. + + // If the target member is already set, don't do anything + let sub_state = state.last_child_mut()?; + // If this is a pointer pointing to another union + if let FieldKey::Field(f) = &sub_state.key { + if f == member { + flag::set_refine_suc(false); + return Ok(()); + } + } + let fields_ty = global_gadgets::get_instance() + .get_object_builder(state.ty)? + .get_fields_ty(); + + if let Some(type_name) = fields_ty.get(member) { + if utils::is_pointer_type(type_name) { + let is_null = state + .pointer + .as_ref() + .map_or(false, |ps| ps.pointer_location.is_null()); + state.clear(); + let sub_state = state + .add_child(member.as_str(), utils::get_static_ty(type_name)) + .last_child_mut()?; + let is_mut = utils::is_mut_pointer_type(type_name); + if is_null { + sub_state.pointer = Some(PointerState::new_pointer( + Location::null(), + utils::get_static_ty(utils::get_pointer_inner(type_name).unwrap()), + is_mut, + )) + } else { + let ident = sub_state.key.as_str()?; + let is_opaque = utils::is_opaque_type(type_name); + let depth = program.get_stub_stmt_depth()?; + let inner_type = utils::get_pointer_inner(type_name).unwrap(); + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + let loc = + create_load_stmt_for_ptr(program, inner_type, ident, is_opaque, depth)?; + let pt_type_holder = global_gadgets::get_instance() + .types + .keys() + .find(|s| s == &inner_type); + if let Some(pt_type) = pt_type_holder { + sub_state.pointer = + Some(PointerState::new_pointer(loc, pt_type, is_mut)); + return Ok(()); + } + } + } + } + } + _ => {} + } + Ok(()) +} + +macro_rules! impl_fuzz_pointer { + ($pointer:ident, $name:literal, $is_mut:tt) => { + impl ObjGenerate for $pointer { + fn generate_new(state: &mut ObjectState) -> eyre::Result { + // generate later + Ok(Self::null(state)) + } + } + + impl ObjMutate for $pointer { + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if rng::unlikely() && !crate::is_input_only() { + Ok(state.as_mutate_operator(MutateOperation::PointerTodo)) + } else { + Ok(state.as_mutate_operator(MutateOperation::Nop)) + } + } + + fn mutate_by_op( + &mut self, + _state: &mut ObjectState, + keys: &[FieldKey], + _op: &MutateOperation, + ) -> eyre::Result<()> { + if keys.is_empty() { + flag::set_mutate_ptr(true); + } + // special case for union + if keys.len() == 1 && keys.last().map_or(false, |k| k.is_union_root()) { + flag::set_mutate_ptr(true); + } + Ok(()) + } + } + }; +} + +impl_fuzz_pointer!(FuzzMutPointer, "mut", true); +impl_fuzz_pointer!(FuzzConstPointer, "const", false); + +impl ObjMutate for FuzzFrozenPointer { + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + crate::log!(trace, "it is a frozen pointer, do not mutated!"); + Ok(state.as_mutate_operator(MutateOperation::Nop)) + } + + fn mutate_by_op( + &mut self, + _state: &mut ObjectState, + _keys: &[FieldKey], + _op: &MutateOperation, + ) -> eyre::Result<()> { + Ok(()) + } +} + +impl ObjGenerate for FuzzFrozenPointer { + fn generate_new(state: &mut ObjectState) -> eyre::Result { + let _ = state.replace_weight(0); + Ok(Self::new()) + } +} + +/// Find a new location for pointers +fn set_pointer_location( + program: &mut FuzzProgram, + state: &mut ObjectState, + is_root: bool, + depth: usize, + is_generate: bool, +) -> eyre::Result { + eyre::ensure!(depth < 50, "the program is too complex with huge depth!"); + let ident = state.key.as_str()?; + let parent_ty_holder = state.get_parent().map(|p| p.ty); + let ps = state.pointer.as_mut().context("pointer has ps")?; + let type_name = ps.pointer_type; + let is_opaque = utils::is_opaque_type(type_name); + // once the pointer is mutated, stub is removed + ps.stub = false; + + // 1. the pointer may be null + // avoid reach deep depth + let use_null = if flag::is_pilot_det() { + !is_root + && (depth >= config::PILOT_MAX_DEPTH || program.stmts.len() > config::MAX_STMTS_LEN) + } else if depth > config::MAX_DEPTH || program.stmts.len() > config::MAX_STMTS_LEN { + true + } else if is_generate { + rng::mostly() + } else if is_root || depth < config::PILOT_MAX_DEPTH { + rng::coin() + } else { + rng::likely() + }; + if use_null { + crate::log!( + trace, + "`{type_name}*`: use null pointer, is_root: {is_root}, depth: {depth}" + ); + ps.pointer_location = Location::null(); + return Ok(MutateOperation::PointerNull); + } + // 2. find existing call/load for pointer + // since it may bring cases that a locations is used in multiple pointers, + // and the pointers are in the same object or the functions(arguments), + // which may causes crash. + // thus, we only consider the statement itself (except elements inside statement) for reusing pointer. + if is_root && flag::is_reuse_stmt() && rng::rarely() { + if let Some(loc) = find_location_from_stmts(program, type_name, ident) { + crate::log!(trace, "`{}*` use existing pointer", type_name); + // dup loc to avoid ref-checking + let loc_dup = loc.dup(); + ps.pointer_location = loc; + return Ok(MutateOperation::PointerUse { loc: loc_dup }); + } + } + + // 3. the pointer may be return from function + // if it is a pointer in a root statement, it will use calls directly in call mutation + if !is_root && flag::use_call(type_name, is_opaque, depth) { + let ptr_type_name = utils::pointer_type(type_name, ps.is_mut); + let alias_type_name = + get_alias_type_name(program, 0, ident, &ptr_type_name, parent_ty_holder); + if let Some(provider) = find_func_with_return_type(&ptr_type_name, alias_type_name) { + let rng_state = rng::save_rng_state(); + // the arguments of call may lead to refence circle, so we do not reuse stmt + let loc = create_call_stmt_for_ptr(program, ident, provider, depth)?; + ps.pointer_location = loc; + return Ok(MutateOperation::PointerRet { + f_name: provider.to_string(), + rng_state, + }); + } + } + + // 4. create a load for pointer + crate::log!(trace, "load new value for pointer `{}*`", type_name); + let rng_state = rng::save_rng_state(); + let loc = create_load_stmt_for_ptr(program, type_name, ident, is_opaque, depth)?; + if loc.is_null() { + return Ok(MutateOperation::PointerNull); + } + ps.pointer_location = loc; + Ok(MutateOperation::PointerGen { rng_state }) +} + +/// Insert a new load statement, and return its location +fn create_load_stmt_for_ptr( + program: &mut FuzzProgram, + type_name: &str, + ident: &str, + is_opaque: bool, + depth: usize, +) -> eyre::Result { + let load_stmt = if is_opaque { + // do not mutate the opaque struct + // LoadStmt::generate_constant(type_name, ident)? + + // do not load a opaque struct by ourself from scratch + return Ok(Location::null()); + } else { + LoadStmt::generate_vec(program, type_name, ident, depth + 1)? + }; + let stmt_index = program.insert_or_append_stmt(load_stmt)?; + Ok(Location::stmt(stmt_index)) +} + +/// Insert a new call statement, and return its location +fn create_call_stmt_for_ptr( + program: &mut FuzzProgram, + ident: &str, + f_name: &str, + depth: usize, +) -> eyre::Result { + crate::log!(trace, "use call `{}` for pointer ", f_name); + let _tmp = flag::ReuseStmtGuard::temp_disable(); + let call = CallStmt::generate_new(program, ident, f_name, depth + 1)?; + let call_stmt = program.insert_or_append_stmt(call)?; + let _ = program.insert_or_append_stmt(AssertStmt::assert_non_null(call_stmt.use_index())); + let mut loc = Location::stmt(call_stmt); + loc.fields.push(FieldKey::Pointer); + Ok(loc) +} + +/// Find location from exsiting statement +fn find_location_from_stmts( + program: &FuzzProgram, + type_name: &str, + ident: &str, +) -> Option { + // find ty, or alloc::vec::Vec / [ty: N] + let mut loc_list = vec![]; + // let vec_ty = utils::vec_type(type_name); + for indexed_stmt in program.stmts.iter() { + if indexed_stmt.stmt.is_stub() { + break; + } + if indexed_stmt.freed.is_some() { + continue; + } + let index = &indexed_stmt.index; + if let FuzzStmt::Load(load) = &indexed_stmt.stmt { + if load.point_to_freed_resource(program) { + continue; + } + let value_type = load.value.type_name(); + if type_name == value_type { + // || vec_ty == value_type + if load.get_ident() == ident { + loc_list.push(Location::stmt(index.use_index())); + } + } + } + } + rng::choose_iter(loc_list.into_iter()) +} + +/// Initialize opaque pointer +/// only used in mutation +fn init_opaque_pointer( + program: &mut FuzzProgram, + state: &mut ObjectState, + call_i: usize, + depth: usize, +) -> eyre::Result { + if depth > config::MAX_DEPTH { + crate::log!(trace, "reach max depth"); + return Ok(MutateOperation::Nop); + } + + // if it is an vector, get its first element + let mut state = state; + if utils::is_vec_type(state.ty) { + if let Some(ch) = state.children.first() { + if utils::is_opaque_pointer(ch.ty) { + state = &mut state.children[0]; + } + } + } + + let ident = state.key.as_str()?; + let parent_type = state.get_parent().map(|p| p.ty); + let ps = state.pointer.as_mut().context("pointer has ps")?; + let type_name = ps.pointer_type; + crate::log!(trace, "Try to prepare opaque pointer `{}*`", type_name); + let ptr_type_name = utils::pointer_type(type_name, ps.is_mut); + let alias_type_name: &str = + get_alias_type_name(program, call_i, ident, &ptr_type_name, parent_type); + + // try to obtain opaque type from function call + if let Some(provider) = find_func_with_return_type(&ptr_type_name, alias_type_name) { + let loc = create_call_stmt_for_ptr(program, ident, provider, depth)?; + ps.pointer_location = loc; + crate::log!(trace, "Opaque pointer ready, provider: {provider}"); + let rng_state = rng::save_rng_state(); + return Ok(MutateOperation::PointerRet { + f_name: provider.to_string(), + rng_state, + }); + } + + // try to init a NULL opaque pointer + crate::log!(trace, "Try to init opaque pointer for `{}*`", type_name); + let stub_index = program.get_stub_stmt_index().context("no stub")?; + let rng_state = save_rng_state(); + // Try init function; + // check and get index of corresponding canonical type name || alias type name + let candidates = global_gadgets::get_instance() + .functions + .iter() + .filter(|(_, fg)| crate::filter_init_func(fg.f_name)) + .filter_map(|(_, fg)| { + find_init_arg_pos(fg, &ptr_type_name, alias_type_name).map(|i| (fg, i)) + }); + + if let Some((fg, arg_pos)) = rng::choose_iter(candidates) { + let f_name = fg.f_name; + if parent_type.is_none() { + let mut init_call = + CallStmt::generate_new(program, CallStmt::RELATIVE, f_name, depth + 1)?; + let mut load_2nd_ptr = + LoadStmt::generate_constant(fg.arg_types[arg_pos], fg.arg_idents[arg_pos])?; + let load_2nd_ps = load_2nd_ptr.state.get_pointer_mut()?; + load_2nd_ps.pointer_location = Location::stmt(stub_index.use_index()); + let load_2nd_ptr_index = program.insert_stmt(stub_index.get() + 1, load_2nd_ptr); + if !init_call.args.contains(&stub_index) { + init_call.set_arg(arg_pos, load_2nd_ptr_index.use_index()); + } + let call_stmt = program.insert_stmt(load_2nd_ptr_index.get() + 1, init_call); + let _ = program.insert_stmt( + call_stmt.get() + 1, + AssertStmt::assert_initialized(stub_index.use_index(), call_stmt), + ); + } else { + let load_null = + LoadStmt::generate_constant(&utils::pointer_type(type_name, true), ident)?; + let load_null_index = program.insert_or_append_stmt(load_null)?; + let mut load_2nd_ptr = + LoadStmt::generate_constant(fg.arg_types[arg_pos], fg.arg_idents[arg_pos])?; + load_2nd_ptr.state.get_pointer_mut()?.pointer_location = + Location::stmt(load_null_index.use_index()); + + let load_2nd_ptr_index = program.insert_or_append_stmt(load_2nd_ptr)?; + let mut init_call = + CallStmt::generate_new(program, CallStmt::RELATIVE, f_name, depth + 1)?; + + // To reference the pointer itself instead of its address, + // `FieldKey::Pointer` must be specified in the Location Field. + ps.pointer_location = Location::new( + load_null_index.use_index(), + LocFields::new(vec![FieldKey::Pointer]), + ); + + if !init_call.args.contains(&stub_index) { + init_call.set_arg(arg_pos, load_2nd_ptr_index.use_index()); + } + let call_stmt = program.insert_or_append_stmt(init_call)?; + let _ = program.insert_stmt( + call_stmt.get() + 1, + AssertStmt::assert_initialized(load_null_index.use_index(), call_stmt), + ); + } + crate::log!( + trace, + "Init opaque pointer `{}*` with: {:?}", + type_name, + f_name + ); + return Ok(MutateOperation::CallRelatedInsert { + f_name: f_name.to_owned(), + arg_pos, + rng_state, + }); + } + // found no possible call can initialize this opaque pointer. + let rng_state = rng::save_rng_state(); + let loc = create_load_stmt_for_ptr(program, type_name, ident, true, depth)?; + if loc.is_null() { + return Ok(MutateOperation::PointerNull); + } + ps.pointer_location = loc; + Ok(MutateOperation::PointerGen { rng_state }) +} + +/// Find argument position in init function +fn find_init_arg_pos(fg: &FnGadget, type_name: &str, alias_type_name: &str) -> Option { + crate::log!( + trace, + "find API for init arg pos for {type_name} / {alias_type_name}" + ); + fg.arg_types + .iter() + .position(|arg_ty| { + *arg_ty == utils::const_pointer_type(type_name) + || *arg_ty == utils::mut_pointer_type(type_name) + }) + .or_else(|| { + fg.alias_arg_types.iter().position(|alias_ty| { + *alias_ty == utils::const_pointer_type(alias_type_name) + || *alias_ty == utils::mut_pointer_type(alias_type_name) + }) + }) + .and_then(|i| { + if let Ok(has_need_init) = crate::inspect_function_constraint_with(fg.f_name, |fc| { + Ok(fc.arg_constraints[i] + .list + .iter() + .any(|citem| matches! {citem.constraint, Constraint::NeedInit})) + }) { + crate::log!( + trace, + "choose API `{}` for init, has_need_init: {has_need_init}", + fg.f_name + ); + if !has_need_init { + return Some(i); + } + } + None + }) +} + +/// Get alias type name of ident in arguments +fn get_alias_type_name<'a>( + program: &FuzzProgram, + call_i: usize, + ident: &str, + ptr_type_name: &'a str, + parent_ty: Option<&str>, +) -> &'a str { + let mut alias_type_name: &str = ptr_type_name; + let mut alias_ident = ident.to_string(); + if let Some(p_ty) = parent_ty { + alias_ident.push('@'); + alias_ident.push_str(p_ty); + alias_type_name = + global_gadgets::get_instance().get_field_alias_type(&alias_ident, ptr_type_name) + } else if let FuzzStmt::Call(call) = &program.stmts[call_i].stmt { + for arg_pos in 0..call.fg.arg_idents.len() { + if call.fg.arg_idents[arg_pos] == ident && call.fg.arg_types[arg_pos] == ptr_type_name { + alias_type_name = call.fg.alias_arg_types[arg_pos]; + } + } + } + crate::log!( + trace, + "find alias with ident: {alias_ident}, type: {ptr_type_name} -> {alias_type_name}" + ); + alias_type_name +} diff --git a/hopper-core/src/fuzz/object/seq.rs b/hopper-core/src/fuzz/object/seq.rs new file mode 100644 index 0000000..8d1fa88 --- /dev/null +++ b/hopper-core/src/fuzz/object/seq.rs @@ -0,0 +1,739 @@ +//! Mutate sequence, including vector and array + +use crate::{ + add_det_mutation, buf::BufMutate, config, utils, ObjFuzzable, ObjValue, ObjectDeserialize, + ObjectState, +}; + +use std::{fmt::Debug, mem::MaybeUninit}; + +use super::*; + +impl ObjGenerate for [T; N] { + /// Generate value for each element in the arrays + fn generate_new(state: &mut ObjectState) -> eyre::Result { + if N == 0 { + // opaque structure + unsafe { + return Ok(std::mem::zeroed()); + } + } + // eyre::ensure!(N > 0, "fail to generate zero-sized array!"); + let mut output: MaybeUninit<[T; N]> = MaybeUninit::uninit(); + let arr_ptr = output.as_mut_ptr() as *mut T; + for i in 0..N { + let element = create_element_for_slice::(state, i)?; + unsafe { + arr_ptr.add(i).write(element); + } + } + // add terminator + if T::is_primitive() { + let end = rng::gen_range(0..N) as isize; + unsafe { + let zero: T = std::mem::zeroed(); + arr_ptr.offset(end).write(zero); + } + } + Ok(unsafe { output.assume_init() }) + } +} + +impl ObjGenerate for Vec { + /// Generate value for each element in the vector + fn generate_new(state: &mut ObjectState) -> eyre::Result { + let len = if T::is_primitive() || (rng::rarely() && !flag::is_pilot_det()) { + rng::gen_range(config::MIN_VEC_LEN..=config::MAX_VEC_LEN) + } else { + 1 + }; + let mut list = vec![]; + for i in 0..len { + let element = create_element_for_slice::(state, i)?; + list.push(element); + } + add_vec_terminator(&mut list, state); + Ok(list) + } +} + +/// Both vec and array can be viewed by a slice, then do some mutation +impl ObjMutate for [T] { + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if self.is_empty() { + return Ok(MutateOperator::nop()); + } + // only select one in slice + let idx = rng::gen_range(0..self.len()); + // UNUSED: if it is not primitive, we likely to mutate its first element. + // we do not use this since we have minimized the arguments. + // if !T::is_primitive() && rng::rarely() { + // idx = 0; + // } + self[idx].mutate(state.get_child_mut(idx)?) + } + + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + if !keys.is_empty() { + let field = &keys[0]; + let index = field.as_usize()?; + if index < self.len() { + let sub_state = state.get_child_mut(field)?; + self[index].mutate_by_op(sub_state, &keys[1..], op)?; + } + } else { + match op { + MutateOperation::Nop => {} + _ => { + if !self.is_empty() { + let sub_state = state.get_child_mut(&FieldKey::Index(0))?; + self[0].mutate_by_op(sub_state, &[], op)?; + return Ok(()); + } + eyre::bail!("op: {:?} is not support", op); + } + } + } + Ok(()) + } +} + +impl ObjMutate for [T; N] { + // Deterministic steps: + // - should deterministic mutate chilren first, + // - then run the steps that holded by itself + fn det_mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if N == 0 { + return Ok(MutateOperator::nop()); + } + if let Some(idx) = state.get_deterministic_child_position() { + let op = self[idx].det_mutate(state.get_child_mut(idx)?)?; + add_arr_terminator(self); + Ok(op) + } else if let Some(op) = call_det(self, state)? { + Ok(state.as_mutate_operator(op)) + } else { + Ok(MutateOperator::nop()) + } + } + + /// Random select an element in array and mutate it. + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if N == 0 { + return Ok(MutateOperator::nop()); + } + let op = self.as_mut().mutate(state)?; + add_arr_terminator(self); + Ok(op) + } + + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + if !keys.is_empty() { + self.as_mut_slice().mutate_by_op(state, keys, op)?; + add_arr_terminator(self); + return Ok(()); + } + match op { + MutateOperation::Nop => {} + MutateOperation::BufRefine { buffer } => { + if self.len() >= buffer.len() { + let cur = unsafe { + std::slice::from_raw_parts(self.as_ptr() as *const u8, buffer.len()) + }; + if &cur[..buffer.len()] == buffer { + flag::set_refine_suc(false); + return Ok(()); + } + } + self.assign_buf(0, buffer, state); + if buffer.len() < self.len() { + self[buffer.len()] = unsafe { std::mem::zeroed() }; + } + } + MutateOperation::BufCmp { offset, buffer } => { + self.assign_buf(*offset, buffer, state); + } + _ => { + self.as_mut().mutate_by_op(state, keys, op)?; + add_arr_terminator(self); + } + } + Ok(()) + } +} + +impl ObjMutate for Vec { + /// Deterministic steps: + /// - should deterministic mutate chilren first, + /// - then run the steps that holded by itself + fn det_mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if T::is_opaque() { + if state.is_deterministic() { + state.done_deterministic(); + } + return Ok(MutateOperator::nop()); + } + crate::log!(trace, "det mut seq"); + remove_vec_teminator(self, state); + let op = if let Some(idx) = state.get_deterministic_child_position() { + self[idx].det_mutate(state.get_child_mut(idx)?)? + } else if let Some(det_op) = call_det(self, state)? { + // if state.children.len() < self.len() { + // state.mutate.borrow_mut().deterministic = true; + // } + // resize state length + while state.children.len() < self.len() { + let _ = state.add_child_at_offset(state.children.len(), std::any::type_name::()); + } + while state.children.len() > self.len() { + state.children.pop(); + } + state.as_mutate_operator(det_op) + } else { + MutateOperator::nop() + }; + add_vec_terminator(self, state); + Ok(op) + } + /// Random select an element in the vector and mutate it. + fn mutate(&mut self, state: &mut ObjectState) -> eyre::Result { + if T::is_opaque() { + return Ok(MutateOperator::nop()); + } + remove_vec_teminator(self, state); + let use_resize = rng::rarely() && !crate::is_input_only(); + if use_resize { + let resize_op = resize_vec(self, state)?; + assert_eq!(state.children.len(), self.len()); + if !resize_op.is_nop() { + add_vec_terminator(self, state); + return Ok(state.as_mutate_operator(resize_op)); + } + } + let op = if self.len() > 8 && utils::is_byte(std::any::type_name::()) { + self.mutate_buf(state)? + } else { + self.as_mut_slice().mutate(state)? + }; + add_vec_terminator(self, state); + Ok(op) + } + /// Mutate by op + fn mutate_by_op( + &mut self, + state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + remove_vec_teminator(self, state); + if !keys.is_empty() { + self.as_mut_slice().mutate_by_op(state, keys, op)?; + add_vec_terminator(self, state); + return Ok(()); + } + match op { + MutateOperation::Nop => {} + MutateOperation::BufRefine { buffer } => { + self.assign_buf(0, buffer, state); + let cut_len = self.len() - buffer.len(); + if cut_len > 0 { + let offset = buffer.len(); + vec_del_elements(self, state, offset, cut_len)?; + } + } + MutateOperation::VecPad { + len, + zero, + rng_state, + } => { + let mut start = self.len(); + if *len > start { + if T::is_primitive() { + start += 1; + } + let diff = len - start; + crate::log!(trace, "pad diff: {diff}"); + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + if *zero && T::is_primitive() { + let size = std::mem::size_of::(); + let buf = vec![0; diff * size]; + self.assign_buf(start * size, &buf, state); + } else { + vec_add_elements(self, state, start, diff)?; + } + eyre::ensure!(self.len() >= len -1, "resize to specific length"); + } else { + flag::set_refine_suc(false); + } + } + MutateOperation::BufCmp { offset, buffer } => { + self.assign_buf(*offset, buffer, state); + } + MutateOperation::BufSeed { index } => { + let ident = state.key.as_str().unwrap(); + if let Some(buf) = buf::get_buf_seeds(*index, ident) { + self.assign_buf(0, buf, state); + } + } + MutateOperation::VecAdd { + offset, + len, + rng_state, + } => { + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + vec_add_elements(self, state, *offset, *len)?; + } + MutateOperation::VecDel { offset, len } => { + vec_del_elements(self, state, *offset, *len)?; + } + _ => { + if utils::is_byte(std::any::type_name::()) { + self.mutate_buf_by_op(state, keys, op)?; + } else { + self.as_mut_slice().mutate_by_op(state, keys, op)?; + } + } + } + add_vec_terminator(self, state); + Ok(()) + } +} + +impl DetMutate for [T; N] { + fn det_mutateion_steps() -> Vec> { + let mut steps: Vec> = vec![]; + add_det_mutation!(steps, "buf_det", |arr: [T; N], state| { + assign_buf_for_cmp_fn_ptr(arr, state) + }); + steps + } +} + +impl DetMutate for Vec { + fn det_mutateion_steps() -> Vec> { + let mut steps: Vec> = vec![]; + add_det_mutation!(steps, "buf_det", |arr: Vec, state| { + // try seeds + if utils::is_byte(std::any::type_name::()) { + let index = state.mutate.borrow().get_mutation(); + crate::log!(trace, "assgin seed buf: {index}"); + if let Some(buf) = buf::get_buf_seeds(index, state.key.as_str().unwrap()) { + arr.assign_buf(0, buf, state); + return (MutateOperation::BufSeed { index }, DetAction::Keep); + } + } + assign_buf_for_cmp_fn_ptr(arr, state) + }); + steps + } +} + +/// Assign collected buffer in compare functions, and assign to its used pointer +fn assign_buf_for_cmp_fn_ptr( + list: &mut T, + state: &mut ObjectState, +) -> (MutateOperation, DetAction) { + let mut cmp_buf = None; + { + let cmps = &mut state.mutate.borrow_mut().cmp_bufs; + if let Some(det_cmp) = cmps.iter_mut().find(|c| c.det) { + det_cmp.det = false; + cmp_buf = Some(det_cmp.clone()) + } + } + if let Some(cmp) = cmp_buf { + crate::log!( + trace, + "assgin buf ({}) from cmp: {:?} <- {:?}", + cmp.offset, + list, + cmp.buf.as_slice() + ); + list.assign_buf(cmp.offset, cmp.buf.as_slice(), state); + let op = MutateOperation::BufCmp { + offset: cmp.offset, + buffer: cmp.buf, + }; + return (op, DetAction::Keep); + } + (MutateOperation::Nop, DetAction::Finish) +} + +/// How to assgin a buffer to a list : Vec or Array +trait AssignBuf { + fn assign_buf(&mut self, offset: usize, buf: &[u8], state: &mut ObjectState); + fn zeroed(&mut self); +} + +impl AssignBuf for [T; N] { + fn assign_buf(&mut self, offset: usize, buf: &[u8], _state: &mut ObjectState) { + let len = buf.len() / std::mem::size_of::(); + if len == 0 { + return; + } + let buf = unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const T, len) }; + let offset = offset / std::mem::size_of::(); + if offset < N { + let min = len.min(N - offset); + self[offset..offset + min].clone_from_slice(&buf[..min]); + // teminator + if offset + min < N && T::is_primitive() && !self[offset + min - 1].is_zero() { + self[offset + min] = unsafe { std::mem::zeroed() }; + } + } + } + + fn zeroed(&mut self) { + // ATTN: zero is unsafe + for v in self { + *v = unsafe { std::mem::zeroed() }; + } + } +} + +impl AssignBuf for Vec { + fn assign_buf(&mut self, offset: usize, buf: &[u8], state: &mut ObjectState) { + let buf_len = buf.len() / std::mem::size_of::(); + if buf_len == 0 { + return; + } + let offset = offset / std::mem::size_of::(); + let buf = unsafe { std::slice::from_raw_parts(buf.as_ptr() as *const T, buf_len) }; + for (i, v) in buf.iter().enumerate() { + if offset + i < self.len() { + self[offset + i] = v.clone(); + } else { + self.push(v.clone()); + state.add_child(state.children.len(), std::any::type_name::()); + } + } + } + + fn zeroed(&mut self) { + for v in self { + *v = unsafe { std::mem::zeroed() }; + } + } +} + +/// Create element for slice +fn create_element_for_slice( + state: &mut ObjectState, + offset: usize, +) -> eyre::Result { + let _ = state.add_child_at_offset(state.children.len(), std::any::type_name::()); + T::generate_new(&mut state.children[offset]) +} + +fn add_arr_terminator(arr: &mut [T]) { + if T::is_primitive() { + let has_zero = arr.iter().any(|v| v.is_zero()); + if !has_zero { + let zero: T = unsafe { std::mem::zeroed() }; + let index = arr.len() - 1; + arr[index] = zero; + } + } +} + +pub fn remove_vec_teminator( + list: &mut Vec, + state: &mut ObjectState, +) { + if T::is_primitive() { + list.pop(); + state.children.pop(); + } +} + +pub fn add_vec_terminator( + list: &mut Vec, + state: &mut ObjectState, +) { + if T::is_primitive() { + let zero: T = unsafe { std::mem::zeroed() }; + state.add_child(state.children.len(), std::any::type_name::()); + list.push(zero); + } +} + +pub fn resize_vec( + list: &mut Vec, + state: &mut ObjectState, +) -> eyre::Result { + let len = rng::gen_range(1..=config::MAX_VEC_LEN / 2); + let can_add = list.len() + len <= config::MAX_VEC_LEN || T::is_primitive(); + let can_del = list.len() > len; + if rng::coin() && can_del { + let offset = rng::gen_range(0..list.len() - len); + vec_del_elements(list, state, offset, len)?; + Ok(MutateOperation::VecDel { offset, len }) + } else if can_add { + let offset = rng::gen_range(0..=list.len()); + let rng_state = rng::save_rng_state(); + vec_add_elements(list, state, offset, len)?; + Ok(MutateOperation::VecAdd { + offset, + len, + rng_state, + }) + } else { + Ok(MutateOperation::Nop) + } +} + +pub fn vec_insert_chunk( + list: &mut Vec, + state: &mut ObjectState, + offset: usize, + chunk: &[u8], + is_insert: bool, +) -> eyre::Result<()> { + if is_insert { + vec_add_elements(list, state, offset, chunk.len())?; + } + list.assign_buf(offset, chunk, state); + Ok(()) +} + +fn vec_add_elements( + list: &mut Vec, + state: &mut ObjectState, + offset: usize, + len: usize, +) -> eyre::Result<()> { + let mut offset = offset; + if offset > list.len() { + offset = list.len(); + } + crate::log!(trace, "vec add {} elements at {}", len, offset); + eyre::ensure!( + len < 10000, + "the number of adding elements should not be too huge" + ); + // copy all elements from exist ones + let mut copy_all = false; + if rng::rarely() { + copy_all = true; + } + for i in 0..len { + let off = offset + i; + if T::is_primitive() { + let _ = state.add_child_at_offset(state.children.len(), std::any::type_name::()); + } + let element = if !list.is_empty() && copy_all && rng::coin() { + // use exist one + let chosed = rng::gen_range(0..list.len()); + if !T::is_primitive() { + state.dup_child_state(chosed, off); + } + list[chosed].clone() + } else { + if !T::is_primitive() { + let _ = state.add_child_at_offset(off, std::any::type_name::()); + } + T::generate_new(&mut state.children[off])? + }; + list.insert(off, element); + } + crate::log!( + trace, + "new length: {} / {}", + list.len(), + state.children.len() + ); + eyre::ensure!(state.children.len() == list.len(), "length consistent"); + if !T::is_primitive() { + state.resort_children_indices(); + } + Ok(()) +} + +fn vec_del_elements( + list: &mut Vec, + state: &mut ObjectState, + offset: usize, + len: usize, +) -> eyre::Result<()> { + crate::log!(trace, "vec del {} elements at {}", len, offset); + let mut offset = offset; + if list.len() > len && offset >= list.len() { + offset = list.len() - len; + } + for _ in 0..len { + if list.len() <= offset || list.len() == 1 { + break; + } + let _ = list.remove(offset); + if T::is_primitive() { + let _ = state.children.pop(); + } else { + state.children.remove(offset); + } + eyre::ensure!(list.len() == state.children.len(), "consistent length"); + } + if !T::is_primitive() { + state.resort_children_indices(); + } + Ok(()) +} + +#[cfg(test)] +fn assert_seq_state(state: &ObjectState, n: usize) { + assert_eq!(state.children.len(), n); + for (i, s) in state.children.iter().enumerate() { + if let FieldKey::Index(si) = s.key { + assert_eq!(i, si); + } + } +} + +#[test] +fn test_arr_gen_mutate() { + // generate + let mut state = ObjectState::root("test", "[u8; 10]"); + let mut arr = <[u8; 10]>::generate_new(&mut state).unwrap(); + println!("arr: {arr:?}"); + assert_eq!(state.children.len(), arr.len()); + assert!(arr.iter().any(|i| *i == 0)); + // mutate + for _ in 0..200 { + let op = arr.mutate(&mut state).unwrap(); + assert!(!op.is_nop()); + assert!(arr.iter().any(|i| *i == 0)); + assert_seq_state(&state, arr.len()); + } + for _ in 0..200 { + arr.det_mutate(&mut state).unwrap(); + assert!(arr.iter().any(|i| *i == 0)); + assert_seq_state(&state, arr.len()); + } + // det + let cmp_buf = crate::feedback::CmpBuf { + id: 0, + offset: 0, + buf: vec![1, 2, 3, 4], + det: true, + }; + println!("state: {:?}", state.mutate.borrow_mut()); + state.mutate.borrow_mut().deterministic = true; + state.mutate.borrow_mut().det_iter = 0; + state.mutate.borrow_mut().affect_cmp_buf(cmp_buf); + let _op = arr.det_mutate(&mut state).unwrap(); + println!("arr: {arr:?}"); + assert!(arr.starts_with(&[1, 2, 3, 4, 0])); +} + +#[test] +fn test_vec_gen_mutate() { + use crate::test; + // primitive type: u8 + { + // generate + let mut state = ObjectState::root("test", "Vec"); + let mut list = >::generate_new(&mut state).unwrap(); + println!("v: {list:?}"); + assert_eq!(state.children.len(), list.len()); + assert_eq!(*list.last().unwrap(), 0); + // mutate + for _ in 0..200 { + let op = list.mutate(&mut state).unwrap(); + println!("op : {op:?}"); + assert!(!op.is_nop()); + assert_seq_state(&state, list.len()); + assert_eq!(*list.last().unwrap(), 0); + // assert!(state.is_deterministic()); + } + for _ in 0..200 { + let _ = list.det_mutate(&mut state).unwrap(); + assert_seq_state(&state, list.len()); + assert_eq!(*list.last().unwrap(), 0); + } + // det + let cmp_buf = crate::feedback::CmpBuf { + id: 0, + offset: 0, + buf: vec![1, 2, 3, 4], + det: true, + }; + println!("mutate state: {:?}", state.mutate.borrow_mut()); + state.mutate.borrow_mut().deterministic = true; + state.done_deterministic(); + state.mutate.borrow_mut().deterministic = true; + state.mutate.borrow_mut().det_iter = 0; + state.mutate.borrow_mut().affect_cmp_buf(cmp_buf); + let op = list.det_mutate(&mut state).unwrap(); + println!("op : {op:?}"); + println!("list: {list:?}"); + assert!(list.starts_with(&[1, 2, 3, 4])); + } + // custom type + { + let mut state = ObjectState::root("test", "Vec"); + let mut list = >::generate_new(&mut state).unwrap(); + println!("v: {list:?}"); + assert_eq!(state.children.len(), list.len()); + // mutate + for _ in 0..200 { + let op = list.mutate(&mut state).unwrap(); + println!("op : {op:?}"); + // assert!(!op.is_nop()); + assert_seq_state(&state, list.len()); + } + } +} + +#[test] +fn test_vec_pad() -> eyre::Result<()> { + // generate + let mut state = ObjectState::root("test", "Vec"); + let list = >::generate_new(&mut state).unwrap(); + assert_eq!(state.children.len(), list.len()); + assert_eq!(*list.last().unwrap(), 0); + println!("test vec pad"); + + for _ in 0..500 { + let mut tmp = list.clone(); + let mut tmp_state = state.clone_without_mutate_info(None); + let base = list.len(); + let len = rng::gen_range(base..=4096); + let rng_state = rng::save_rng_state(); + println!("pad len: {len}"); + tmp.mutate_by_op( + &mut tmp_state, + &[], + &MutateOperation::VecPad { + len, + zero: rng::coin(), + rng_state, + }, + )?; + assert_eq!(tmp.len(), len); + assert!(tmp.starts_with(&list.as_slice()[..list.len() - 1])); + } + + Ok(()) +} + + +#[test] +fn test_resize() -> eyre::Result<()> { + let mut state = ObjectState::root("test", "Vec"); + let mut list = >::generate_new(&mut state).unwrap(); + for _ in 0..2000 { + let resize_op = resize_vec(&mut list, &mut state)?; + println!("resize op: {:?}", resize_op); + assert_eq!(state.children.len(), list.len()); + } + Ok(()) +} \ No newline at end of file diff --git a/hopper-core/src/fuzz/object/void.rs b/hopper-core/src/fuzz/object/void.rs new file mode 100644 index 0000000..6cbd5f8 --- /dev/null +++ b/hopper-core/src/fuzz/object/void.rs @@ -0,0 +1,50 @@ +//! Generate and mutate void +//! void is uint, and we do nothing + +use crate::{FuzzVoid, RetVoid}; + +use super::*; + +macro_rules! impl_void { + ($void:ident) => { + impl ObjGenerate for $void { + fn generate_new( state: &mut ObjectState) -> eyre::Result { + let _ = state.replace_weight(0); + state.done_deterministic(); + Ok(Self::default()) + } + } + + impl ObjMutate for $void { + fn mutate( + &mut self, + state: &mut ObjectState, + ) -> eyre::Result { + if state.is_deterministic() { + state.done_deterministic(); + } + Ok(state.as_mutate_operator(MutateOperation::Nop)) + } + fn mutate_by_op( + &mut self, + _state: &mut ObjectState, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + if !keys.is_empty() { + unimplemented!() + } + match op { + MutateOperation::Nop => {} + _ => { + unimplemented!(); + } + } + Ok(()) + } + } + }; +} + +impl_void!(FuzzVoid); +impl_void!(RetVoid); diff --git a/hopper-core/src/fuzz/operator.rs b/hopper-core/src/fuzz/operator.rs new file mode 100644 index 0000000..db33752 --- /dev/null +++ b/hopper-core/src/fuzz/operator.rs @@ -0,0 +1,380 @@ +//! Describe what mutating did, which key it mutates, what operation it uses + +use hopper_derive::{EnumKind, Serde}; + +use crate::{runtime::*, EnumKind, IrEntry, RngState}; + +/// Mutate operation, +/// represent different kind of mutating behaviors. +#[derive(Debug, Clone, Serde, EnumKind)] +pub enum MutateOperation { + // ---- Integer ---- + /// Flip one bit + IntBitFlip { + index: u8, + }, + /// Flip some bits + IntFlip { + indices: Vec, + }, + /// Add + IntAdd { + change: u64, + }, + /// Sub + IntSub { + change: u64, + }, + /// Set int in range min..max + IntRange { + min: IrEntry, + max: IrEntry, + }, + /// Set as value + IntSet { + val: IrEntry, + }, + /// Get the value + IntGet, + // Set cmp value + IntCmp { + val: u64, + }, + // Set cmp or corpus value's variance + IntVariance { + val: u64, + }, + // Set Random value + IntRandom { + val: u64, + }, + /// Set value from corpus + Corpus { + index: usize, + }, + // ---- Float ---- + /// Add Float + FloatAdd { + change: f32, + }, + /// Float New + FloatNew { + val: f64, + }, + // ---- Sequence ---- + /// set buffer from compare function + BufCmp { + offset: usize, + buffer: Vec, + }, + /// set buffer from seeds + BufSeed { + index: usize, + }, + /// Refine buffer + BufRefine { + buffer: Vec, + }, + /// Pad to a larger length with all zero + VecPad { + len: usize, + zero: bool, + rng_state: RngState, + }, + /// Add elements + VecAdd { + offset: usize, + len: usize, + rng_state: RngState, + }, + /// Delete elements + VecDel { + offset: usize, + len: usize, + }, + /// Slice two input buffer + BufSplice { + program_id: usize, + stmt_index: usize, + split_at: usize, + range: Option, + }, + /// Havoc buffer + BufHavoc { + use_bytes: usize, + swap: bool, + op: Box, + }, + UseDict { + offset: usize, + dict: Vec, + is_insert: bool, + }, + // ---- Pointer --- + /// PointerTodo + PointerTodo, + /// Set pointer as NULL + PointerNull, + /// Use other location as pointer + PointerUse { + loc: Location, + }, + /// Use return as pointer + PointerRet { + f_name: String, + rng_state: RngState, + }, + /// Generate non-null pointer + PointerGen { + rng_state: RngState, + }, + /// Generate non-null char pointer + PointerGenChar, + /// Make poiter to an address of canary + PointerCanary, + /// Make pointer to an file name + PointerFile { + read: bool, + }, + /// Make int to be a file descriptor + FdFile, + /// Generate and init opaque pointer + InitOpaque { + call_i: usize, + }, + /// Generate and init opaque pointer for inference + InitOpaqueForInfer { + call_i: usize, + }, + /// Remove Init opaque pointer + RemoveInitOpaque, + // ---- Function Pointer ---- + /// Find another function pointer + FnPointer { + f_name: String, + }, + /// Try generate a totally new one for option + OptionNew { + rng_state: RngState, + }, + /// Set None for option + OptionNone, + // ---- Function ---- + /// Find another function pointer + /// Change call argument + CallArg { + arg_pos: usize, + rng_state: RngState, + }, + EffCallArg { + arg_pos: usize, + eff_i: usize, + rng_state: RngState, + }, + NewTarget { + f_name: String, + arg_i: Option, + }, + /// Insert implicit call + CallImplicitInsert { + f_name: String, + rng_state: RngState, + }, + /// Insert related call + CallRelatedInsert { + f_name: String, + arg_pos: usize, + rng_state: RngState, + }, + InitTypeWithCall, + /// Update call return + CallUpdate { + fields: LocFields, + ops: Vec, + }, + // ---- Other ---- + /// Flip boolean type + FlipBool, + /// Generate a new union instance + UnionNew { + rng_state: RngState, + }, + /// Generate a new union with a specific member + UnionUse { + rng_state: RngState, + member: String, + }, + /// Do nothing, or indicating you should ending the stage + Nop, +} + +impl MutateOperation { + pub fn is_arithmetical(&self) -> bool { + matches!( + self, + Self::IntBitFlip { index: _ } + | Self::IntFlip { indices: _ } + | Self::IntAdd { change: _ } + | Self::IntSub { change: _ } + | Self::Corpus { index: _ } + | Self::FloatAdd { change: _ } + | Self::FloatNew { val: _ } + ) + } + + pub fn is_pointer_todo(&self) -> bool { + matches!(self, Self::PointerTodo) + } + + pub fn is_nop(&self) -> bool { + matches!(self, MutateOperation::Nop) + } +} + +/// Mutate operator, +/// using operation on certain objet field +#[derive(Debug, Clone, Serde)] +pub struct MutateOperator { + /// key of object field + pub key: WeakLocation, + /// is deterministic + pub det: bool, + /// operation + pub op: MutateOperation, +} + +impl MutateOperator { + /// Create an operator + pub fn new(key: Location, op: MutateOperation) -> Self { + let key = key.to_weak_loc(); + Self { + key, + det: false, + op, + } + } + + pub fn stmt_op(op: MutateOperation) -> Self { + Self { + key: WeakLocation::null(), + det: false, + op, + } + } + + /// Create a nop operator + pub fn nop() -> Self { + Self { + key: WeakLocation::null(), + det: false, + op: MutateOperation::Nop, + } + } + + /// Is the operation is nop or not + pub fn is_nop(&self) -> bool { + self.op.is_nop() + } + + /// Set index after mutation + pub fn set_index(&mut self, index: StmtIndex) { + if !self.is_nop() { + self.key.set_index(index); + } + } +} + +impl std::fmt::Display for MutateOperator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "({}: {})", + self.key.serialize().unwrap(), + self.op.serialize().unwrap(), + ) + } +} + +impl CloneProgram for MutateOperator { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + Self { + key: self.key.clone_with_program(program), + op: self.op.clone_with_program(program), + det: self.det, + } + } +} + +impl CloneProgram for MutateOperation { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + match self { + MutateOperation::PointerUse { loc } => MutateOperation::PointerUse { + loc: loc.clone_with_program(program), + }, + MutateOperation::BufHavoc { + use_bytes, + swap, + op, + } => MutateOperation::BufHavoc { + use_bytes: *use_bytes, + swap: *swap, + op: Box::new(op.clone_with_program(program)), + }, + MutateOperation::CallUpdate { fields, ops } => { + let new_list = ops + .iter() + .map(|op| op.clone_with_program(program)) + .collect(); + MutateOperation::CallUpdate { + fields: fields.clone(), + ops: new_list, + } + } + _ => self.clone(), + } + } +} + +impl CloneProgram for Vec { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + let mut ops = vec![]; + for op in self { + if !op.key.is_released() { + let op = op.clone_with_program(program); + // exclude those operators involved with tmp indices + if let Some(index) = &op.key.stmt_index { + let uniq = index.get_uniq(); + if program + .tmp_indices + .iter() + .any(|tmp_i| tmp_i.get_uniq() == uniq) + { + continue; + } + } + ops.push(op); + } + } + ops + } +} + +impl Serialize for Box { + fn serialize(&self) -> eyre::Result { + self.as_ref().serialize() + } +} + +impl Deserialize for Box { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + Ok(Box::new(MutateOperator::deserialize(de)?)) + } +} + +#[derive(Debug, Clone, Serde)] + +pub struct SpliceRange { + pub lower: usize, + pub upper: usize, + pub is_insert: bool, +} diff --git a/hopper-core/src/fuzz/pcg.rs b/hopper-core/src/fuzz/pcg.rs new file mode 100644 index 0000000..427795b --- /dev/null +++ b/hopper-core/src/fuzz/pcg.rs @@ -0,0 +1,173 @@ +// Copyright 2018 Developers of the Rand project. +// Copyright 2017 Paul Dicker. +// Copyright 2014-2017 Melissa O'Neill and PCG Project contributors +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! PCG random number generators +//! +//! https://rust-random.github.io/rand/rand_pcg/index.html +//! https://www.pcg-random.org/ +//! https://users.rust-lang.org/t/saving-and-restoring-the-state-of-a-seedablerng/52642 + +use core::fmt; + +use hopper_derive::Serde; +use rand_core::{impls, le, Error, RngCore, SeedableRng}; + +// This is the default multiplier used by PCG for 64-bit state. +const MULTIPLIER: u64 = 6364136223846793005; + +/// A PCG random number generator (XSH RR 64/32 (LCG) variant). +/// +/// Permuted Congruential Generator with 64-bit state, internal Linear +/// Congruential Generator, and 32-bit output via "xorshift high (bits), +/// random rotation" output function. +/// +/// This is a 64-bit LCG with explicitly chosen stream with the PCG-XSH-RR +/// output function. This combination is the standard `pcg32`. +/// +/// Despite the name, this implementation uses 16 bytes (128 bit) space +/// comprising 64 bits of state and 64 bits stream selector. These are both set +/// by `SeedableRng`, using a 128-bit seed. +/// +/// Note that two generators with different stream parameter may be closely +/// correlated. +#[derive(Clone, PartialEq, Eq, Serde)] +pub struct Lcg64Xsh32 { + state: u64, + increment: u64, +} + +/// [`Lcg64Xsh32`] is also officially known as `pcg32`. +pub type Pcg32 = Lcg64Xsh32; + +impl Lcg64Xsh32 { + /// Multi-step advance functions (jump-ahead, jump-back) + /// + /// The method used here is based on Brown, "Random Number Generation + /// with Arbitrary Stride,", Transactions of the American Nuclear + /// Society (Nov. 1994). The algorithm is very similar to fast + /// exponentiation. + /// + /// Even though delta is an unsigned integer, we can pass a + /// signed integer to go backwards, it just goes "the long way round". + /// + /// Using this function is equivalent to calling `next_32()` `delta` + /// number of times. + #[inline] + pub fn advance(&mut self, delta: u64) { + let mut acc_mult: u64 = 1; + let mut acc_plus: u64 = 0; + let mut cur_mult = MULTIPLIER; + let mut cur_plus = self.increment; + let mut mdelta = delta; + + while mdelta > 0 { + if (mdelta & 1) != 0 { + acc_mult = acc_mult.wrapping_mul(cur_mult); + acc_plus = acc_plus.wrapping_mul(cur_mult).wrapping_add(cur_plus); + } + cur_plus = cur_mult.wrapping_add(1).wrapping_mul(cur_plus); + cur_mult = cur_mult.wrapping_mul(cur_mult); + mdelta /= 2; + } + self.state = acc_mult.wrapping_mul(self.state).wrapping_add(acc_plus); + } + + /// Construct an instance compatible with PCG seed and stream. + /// + /// Note that the highest bit of the `stream` parameter is discarded + /// to simplify upholding internal invariants. + /// + /// Note that two generators with different stream parameters may be closely + /// correlated. + /// + /// PCG specifies the following default values for both parameters: + /// + /// - `state = 0xcafef00dd15ea5e5` + /// - `stream = 0xa02bdbf7bb3c0a7` + // Note: stream is 1442695040888963407u64 >> 1 + pub fn new(state: u64, stream: u64) -> Self { + // The increment must be odd, hence we discard one bit: + let increment = (stream << 1) | 1; + Lcg64Xsh32::from_state_incr(state, increment) + } + + #[inline] + fn from_state_incr(state: u64, increment: u64) -> Self { + let mut pcg = Lcg64Xsh32 { state, increment }; + // Move away from initial value: + pcg.state = pcg.state.wrapping_add(pcg.increment); + pcg.step(); + pcg + } + + #[inline] + fn step(&mut self) { + // prepare the LCG for the next round + self.state = self + .state + .wrapping_mul(MULTIPLIER) + .wrapping_add(self.increment); + } +} + +// Custom Debug implementation that does not expose the internal state +impl fmt::Debug for Lcg64Xsh32 { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Lcg64Xsh32 {{ state: {}, increment: {} }}", self.state, self.increment) + } +} + +impl SeedableRng for Lcg64Xsh32 { + type Seed = [u8; 16]; + + /// We use a single 127-bit seed to initialise the state and select a stream. + /// One `seed` bit (lowest bit of `seed[8]`) is ignored. + fn from_seed(seed: Self::Seed) -> Self { + let mut seed_u64 = [0u64; 2]; + le::read_u64_into(&seed, &mut seed_u64); + + // The increment must be odd, hence we discard one bit: + Lcg64Xsh32::from_state_incr(seed_u64[0], seed_u64[1] | 1) + } +} + +impl RngCore for Lcg64Xsh32 { + #[inline] + fn next_u32(&mut self) -> u32 { + let state = self.state; + self.step(); + + // Output function XSH RR: xorshift high (bits), followed by a random rotate + // Constants are for 64-bit state, 32-bit output + const ROTATE: u32 = 59; // 64 - 5 + const XSHIFT: u32 = 18; // (5 + 32) / 2 + const SPARE: u32 = 27; // 64 - 32 - 5 + + let rot = (state >> ROTATE) as u32; + let xsh = (((state >> XSHIFT) ^ state) >> SPARE) as u32; + xsh.rotate_right(rot) + } + + #[inline] + fn next_u64(&mut self) -> u64 { + impls::next_u64_via_u32(self) + } + + #[inline] + fn fill_bytes(&mut self, dest: &mut [u8]) { + impls::fill_bytes_via_next(self, dest) + } + + #[inline] + fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> { + self.fill_bytes(dest); + Ok(()) + } +} diff --git a/hopper-core/src/fuzz/refine.rs b/hopper-core/src/fuzz/refine.rs new file mode 100644 index 0000000..d944008 --- /dev/null +++ b/hopper-core/src/fuzz/refine.rs @@ -0,0 +1,800 @@ +use std::{collections::HashMap, default}; + +use eyre::{Context, ContextCompat}; + +use crate::{fuzz::*, runtime::*, utils}; + +impl FuzzProgram { + /// Refine program by constraints + pub fn refine_program(&mut self) -> eyre::Result> { + let mut ops = vec![]; + if !crate::config::ENABLE_REFINE { + return Ok(ops); + } + crate::log!( + trace, + "program before refine: {}", + self.serialize_all().unwrap() + ); + // add required assertion before refining + self.insert_required_assertions()?; + + // start refining + let mut index = self + .stmts + .last() + .ok_or_else(|| eyre::eyre!("has last"))? + .index + .use_index(); + loop { + let cur_stmt = &self.stmts[index.get()].stmt; + eyre::ensure!(index.get() < 512, "index is too large"); + match cur_stmt { + FuzzStmt::Call(call) => { + let call_name = call.fg.f_name; + let call_args = call.args.clone(); + let existing_ctxs = call.contexts.clone(); + let is_context = call.is_implicit() || call.is_relative(); + crate::log!( + trace, + "refine func constraints: {call_name} at {}", + index.get() + ); + crate::inspect_function_constraint_with(call_name, |fc| { + for (arg_pos, tc) in fc.arg_constraints.iter().enumerate() { + if tc.list.is_empty() { + continue; + } + crate::log!(trace, "arg_pos: {arg_pos}"); + let arg_stmt = &call_args[arg_pos]; + let call_info = Some((index.get(), &call_args[..])); + let mut operators = + self.refine_type_constraints(arg_stmt, tc, None, call_info)?; + if !operators.is_empty() { + ops.append(&mut operators); + } + } + // refine contexts + for ctx_rule in &fc.contexts { + self.refine_contexts(&index, ctx_rule, &call_args, &existing_ctxs)?; + } + if fc.insert_fail && is_context { + let cur = index.get(); + self.delete_stmt(cur); + // switch to next one, since current index is deleted. + index = self.stmts[cur].index.use_index(); + self.check_ref_use()?; + } + let mut operators = self.refine_length_factors(fc, &call_args)?; + if !operators.is_empty() { + ops.append(&mut operators); + } + Ok(()) + })?; + } + FuzzStmt::Load(load) => { + let type_name = load.value.type_name(); + if !(utils::is_primitive_type(type_name) || utils::is_option_type(type_name)) { + let stmt_index = index.use_index(); + let next_index = if index.get() + 1 >= self.stmts.len() { + index.use_index() + } else { + self.stmts[index.get() + 1].index.use_index() + }; + // refine fields in load statement with general type constraints + crate::iterate_type_constraint_with(|ty, tc| { + if let FuzzStmt::Load(load_inner) = &self.stmts[stmt_index.get()].stmt { + // find fields with type in load + let fields_list = + load_inner.state.find_fields_with(|s| s.ty == ty, false); + for prefix in fields_list { + let mut operators = self.refine_type_constraints( + &stmt_index, + tc, + Some(prefix), + None, + )?; + // refine new generating stmts during stmt_index..next_index + if operators.iter().any(|op| { + matches!(op.op, MutateOperation::InitTypeWithCall) + }) { + crate::log!( + trace, + "roll back to next index: {}", + next_index.get() + ); + // crate::log!(trace, "after init, program: {}", self.serialize().unwrap()); + index = next_index.use_index(); + } + if !operators.is_empty() { + ops.append(&mut operators); + } + } + } + Ok(()) + })?; + } + } + _ => {} + } + if index.get() == 0 { + break; + } + index = self.stmts[index.get() - 1].index.use_index(); + } + crate::log!(trace, "refine ops: {:?}", ops); + + drop(index); + // check_ref_use again. + self.check_ref_use()?; + + Ok(ops) + } + + fn refine_type_constraints( + &mut self, + stmt_index: &StmtIndex, + constraint: &TypeConstraint, + prefix: Option, + call_info: Option<(usize, &[StmtIndex])>, + ) -> eyre::Result> { + let mut operators = vec![]; + // used for refine constraint in general types + let prefix = prefix.unwrap_or_default(); + // used for refine constraint in arguments + let (call_i, call_args) = call_info.unwrap_or((0, &[])); + for citem in constraint.list.iter() { + crate::log!(trace, "stmt: {}, constraint: {:?}", stmt_index.get(), citem); + let lcs = citem.find_all_locations_with_any_index(self, stmt_index, &prefix); + // eyre::ensure!(lcs.len() <= 10000, "fail to refine"); + for (loc, constraint) in lcs { + crate::log!(trace, "refining loc: {}", loc.serialize().unwrap()); + let operator = match &constraint { + Constraint::SetNull => { + if self.is_loc_null(&loc) { + continue; + } + MutateOperator::new(loc, MutateOperation::PointerNull) + } + Constraint::NonNull => { + // if the pointer is null, we set it to non-null + if !self.is_loc_null(&loc) { + continue; + } + MutateOperator::new( + loc, + MutateOperation::PointerGen { + rng_state: rng::gen_rng_state(), + }, + ) + } + Constraint::SetVal { val } => { + if val.is_factor() { + crate::log!(trace, "skip factor"); + continue; + } + let op = if let IrEntry::String(buf) = val { + MutateOperation::BufRefine { + buffer: buf.as_bytes().to_vec(), + } + } else { + // make sure can handle length stored in a pointer, e.g. (int* array, int* size) + MutateOperation::IntSet { + val: val.convert_length_to_constant( + self, stmt_index, &prefix, call_args, + )?, + } + }; + MutateOperator::new(loc, op) + } + Constraint::Range { min, max } => { + if max.is_factor() { + crate::log!(trace, "skip factor"); + continue; + } + MutateOperator::new( + loc, + MutateOperation::IntRange { + min: min.convert_length_to_constant( + self, stmt_index, &prefix, call_args, + )?, + max: max.convert_length_to_constant( + self, stmt_index, &prefix, call_args, + )?, + }, + ) + } + Constraint::NonZero => MutateOperator::new( + loc, + MutateOperation::IntRange { + min: 1.into(), + max: IrEntry::Max(0), + }, + ), + Constraint::UseUnionMember { member } => MutateOperator::new( + loc, + MutateOperation::UnionUse { + rng_state: rng::gen_rng_state(), + member: member.clone(), + }, + ), + Constraint::ArrayLength { len } => { + // there are two cases that may be ambiguous in pointer's VecPad + // 1. used in custom rule without & (pointer) + // 2. pointer to another pointer, and the pointee is not a vector (reuse case) + // to support both, we make the latter to the former here + let mut loc = loc; + if citem.key.list.last() == Some(&FieldKey::Pointer) { + if let FuzzStmt::Load(load) = &self.stmts[loc.get_index()?.get()].stmt { + if !utils::is_vec_type(load.value.type_name()) { + let mut new_key = citem.key.clone(); + new_key.list.pop(); + loc = new_key + .to_loc_for_refining(self, stmt_index, &prefix) + .context("has loc")?; + } + } + } + if let IrEntry::Constant(len) = + len.convert_length_to_constant(self, stmt_index, &prefix, call_args)? + { + let mut new_len = 1; + if len > 1 { + new_len = len as usize; + } + MutateOperator::new( + loc, + MutateOperation::VecPad { + len: new_len, + zero: false, + rng_state: rng::gen_rng_state(), + }, + ) + } else { + continue; + } + } + Constraint::NeedInit => { + // State that does not require initialization + // 1. non-null opqaque pointer + // 2. null pointer with inialization functions + if !self.is_loc_null(&loc) || self.has_been_inited(stmt_index).is_some() { + continue; + } + MutateOperator::new(loc, MutateOperation::InitOpaque { call_i }) + } + Constraint::File { read, is_fd } => { + if *is_fd { + if self.refine_fd(stmt_index, &loc, *read)? { + operators.push(MutateOperator::new(loc, MutateOperation::FdFile)); + } + continue; + } + if self.is_file_loc(&loc) { + crate::log!(trace, "loc is file"); + continue; + } + MutateOperator::new(loc, MutateOperation::PointerFile { read: *read }) + } + Constraint::InitWith { f_name, arg_pos } => { + if let Some(op) = + self.init_with(f_name, *arg_pos, stmt_index, loc, &prefix)? + { + operators.push(op); + } + continue; + } + _ => { + continue; + } + }; + if self.refine_load_statement(&operator)? { + operators.push(operator); + } + } + } + Ok(operators) + } + + fn refine_load_statement(&mut self, op: &MutateOperator) -> eyre::Result { + flag::set_refine_suc(true); + let is = self + .get_mut_stmt_by_index_uniq(op.key.stmt_index.as_ref().unwrap()) + .with_context(|| format!("can't find stmt with op: {op:?}"))?; + let mut stmt = is.stmt.lend(); + if stmt.is_load() { + stmt.mutate_by_op(self, op.key.fields.as_slice(), &op.op) + .with_context(|| { + format!( + "op: {}, stub: {}", + op.serialize().unwrap(), + stmt.serialize().unwrap() + ) + })?; + } else { + // ignore calls + flag::set_refine_suc(false); + } + let _ = self.withdraw_stmt(stmt)?; + crate::log!(trace, "refine statement by : {}", op); + Ok(flag::is_refine_suc()) + } + + fn refine_fd( + &mut self, + stmt_index: &StmtIndex, + loc: &Location, + read: bool, + ) -> eyre::Result { + crate::log!(trace, "crate a file fd for loc :{loc:?}"); + if loc.fields.is_empty() { + if let Some(is) = self.get_mut_stmt_by_index_uniq(stmt_index) { + // check if has refined + if matches!(&is.stmt, FuzzStmt::File(_)) { + return Ok(false); + } + let _ = is.stmt.lend(); + let file_stmt = FileStmt::generate_new(self, "fd", false, true, read, 0)?; + let _ = self.withdraw_stmt(file_stmt.into())?; + return Ok(true); + } + } else if let Some(pos) = self.position_stmt_by_index_uniq(stmt_index) { + // check if has refined + if self.stmts[pos..].iter().any(|is| { + if let FuzzStmt::Update(update_stmt) = &is.stmt { + if loc.compare_weak(&update_stmt.dst) { + if let Some(file_is) = self.get_stmt_by_index_uniq(&update_stmt.src) { + if matches!(&file_is.stmt, FuzzStmt::File(_)) { + return true; + } + } + } + } + false + }) { + return Ok(false); + } + // get next one and insert update + if pos + 1 < self.stmts.len() { + let next_stmt = self.stmts[pos + 1].stmt.lend(); + let file_stmt = FileStmt::generate_new(self, "fd", false, true, read, 0)?; + let file_index = self.insert_or_append_stmt(file_stmt)?; + let update_stmt = UpdateStmt::new(file_index, loc.to_weak_loc()); + let _ = self.insert_or_append_stmt(update_stmt)?; + let _ = self.withdraw_stmt(next_stmt)?; + } + } + Ok(false) + } + + fn init_with( + &mut self, + f_name: &str, + arg_pos: usize, + stmt_index: &StmtIndex, + loc: Location, + prefix: &LocFields, + ) -> eyre::Result> { + if !prefix.is_empty() || CallStmt::has_relative_context_for_stmt(self, f_name, stmt_index) { + return Ok(None); + } + crate::log!( + trace, + "init with loc :{loc:?} with {f_name} at arg-{arg_pos}" + ); + if let FuzzStmt::Load(load) = &self.stmts[stmt_index.get()].stmt { + let type_name = load.value.type_name(); + let stub_stmt = self.stmts[stmt_index.get() + 1].stmt.lend(); + let _tmp = flag::ReuseStmtGuard::temp_disable(); + let mut rela_call = CallStmt::generate_new(self, CallStmt::RELATIVE, f_name, 0)?; + let init_arg = rela_call.set_ith_arg_for_relative_call( + self, + arg_pos, + stmt_index.use_index(), + type_name, + )?; + let call_stmt = self.insert_or_append_stmt(rela_call)?; + if init_arg { + let _ = self.insert_or_append_stmt(AssertStmt::assert_initialized( + stmt_index.use_index(), + call_stmt, + )); + } + let _ = self.withdraw_stmt(stub_stmt); + self.check_ref_use()?; + return Ok(Some(MutateOperator::new( + loc, + MutateOperation::InitTypeWithCall, + ))); + } + Ok(None) + } + + fn refine_contexts( + &mut self, + call_index: &StmtIndex, + ctx_rule: &CallContext, + call_args: &[StmtIndex], + existing_ctxs: &[StmtIndex], + ) -> eyre::Result<()> { + if ctx_rule.is_forbidden() { + if let Some(arg_pos) = ctx_rule.related_arg_pos { + let arg_stmt = &call_args[arg_pos]; + for is in &self.stmts { + if &is.index == call_index { + break; + } + if let FuzzStmt::Call(call) = &is.stmt { + if call.fg.f_name == ctx_rule.f_name + && call.is_relative() + && call.is_related_call_for_stmt(arg_stmt, self) + { + self.delete_stmt(is.index.get()); + self.check_ref_use()?; + break; + } + } + } + } else { + for current_ctx in existing_ctxs { + if let Some(implicit_call) = self.get_call_stmt(current_ctx.get()) { + if implicit_call.fg.f_name == ctx_rule.f_name { + self.delete_stmt(current_ctx.get()); + self.check_ref_use()?; + break; + } + } + } + } + } + Ok(()) + } + + fn refine_length_factors( + &mut self, + // call_index: usize, + //call: &CallStmt, + fc: &FuncConstraint, + call_args: &[StmtIndex], + ) -> eyre::Result> { + #[derive(Debug)] + struct FactorItem { + pub arg_pos: usize, + pub fields: LocFields, + pub range_start: Option, + } + #[derive(Debug)] + struct FactorList { + pub coef: u64, + pub list: Vec, + } + impl default::Default for FactorList { + fn default() -> Self { + Self { + coef: 1, + list: vec![], + } + } + } + let mut factors_map: HashMap<(usize, LocFields), FactorList> = HashMap::default(); + macro_rules! get_factor_list { + ($k_arg: ident, $k_fields: expr) => {{ + factors_map + .entry(($k_arg, $k_fields.clone())) + .or_insert(FactorList::default()) + }}; + } + + for (key_arg_pos, tc) in fc.arg_constraints.iter().enumerate() { + for c in tc.list.iter() { + match &c.constraint { + Constraint::LengthFactor { coef } => { + let f = get_factor_list!(key_arg_pos, c.key); + f.coef = *coef; + } + Constraint::SetVal { + val: + IrEntry::Length { + arg_pos, + fields, + is_factor, + }, + } => { + if !*is_factor { + continue; + } + let arg_pos = arg_pos.unwrap_or_default(); + let f: &mut FactorList = get_factor_list!(arg_pos, fields); + f.list.push(FactorItem { + arg_pos: key_arg_pos, + fields: c.key.clone(), + range_start: None, + }); + } + Constraint::Range { + min, + max: + IrEntry::Length { + arg_pos, + fields, + is_factor, + }, + } => { + if !*is_factor { + continue; + } + let range_start = if let IrEntry::Constant(start) = min { + Some(*start) + } else { + None + }; + let arg_pos = arg_pos.unwrap_or_default(); + let f: &mut FactorList = get_factor_list!(arg_pos, fields); + f.list.push(FactorItem { + arg_pos: key_arg_pos, + fields: c.key.clone(), + range_start, + }); + } + _ => {} + } + } + } + crate::log!(trace, "find factors: {factors_map:?}"); + let mut operators = vec![]; + for ((arr_arg_pos, arr_fields), factors) in factors_map { + let arr_stmt = &call_args[arr_arg_pos]; + let Some(arr_loc) = + arr_fields.to_loc_for_refining(self, arr_stmt, &LocFields::default()) + else { + continue; + }; + let stmt_i = arr_loc.stmt_index.as_ref().unwrap().get(); + if let FuzzStmt::Load(load) = &self.stmts[stmt_i].stmt { + let state = load.state.get_child_by_fields(arr_loc.fields.as_slice())?; + let mut len = get_ptee_vec_len(state, &self.stmts)?; + if len == 0 { + for f in factors.list.iter() { + let cur_stmt = &call_args[f.arg_pos]; + if let Some(num_loc) = + f.fields + .to_loc_for_refining(self, cur_stmt, &LocFields::default()) + { + let set_zero = MutateOperation::IntSet { val: 0.into() }; + let op = MutateOperator::new(num_loc, set_zero); + if self.refine_load_statement(&op)? { + operators.push(op); + } + } + } + continue; + } + // if len is not multiple of coef + let rem = len % factors.coef; + if rem > 0 { + let pad_len = factors.coef - rem; + crate::log!( + trace, + "adding {pad_len} elements, to make length to be multiple of coef" + ); + // FIXME: arr_loc is wrong + len += pad_len; + let op = MutateOperator::new( + arr_loc, + MutateOperation::VecPad { + len: len as usize, + zero: false, + rng_state: rng::gen_rng_state(), + }, + ); + if self.refine_load_statement(&op)? { + operators.push(op); + } + } + + // To make sure the buffer won't overflow, we do not use div_ceil here. + // FIXME: it may brings noise for minimize (ops) + let mut remain = len / factors.coef; + if remain == 0 { + remain = 1; + } + let mut max = 1; + let mut f_iter = factors.list.iter().peekable(); + while let Some(f) = f_iter.next() { + // is the last + remain /= max; + if f_iter.peek().is_none() { + max = remain + } else { + max = rng::gen_range(1..=remain); + } + crate::log!(trace, "remain: {remain}"); + let cur_stmt = &call_args[f.arg_pos]; + if let Some(num_loc) = + f.fields + .to_loc_for_refining(self, cur_stmt, &LocFields::default()) + { + let cur_num = self.find_number_by_loc(num_loc.clone())?; + crate::log!(trace, "cur_num: {cur_num}"); + let op = if let Some(start) = f.range_start { + if cur_num >= start && cur_num < remain { + crate::log!(trace, "in range, skip"); + max = cur_num + 1; + continue; + } + MutateOperation::IntRange { + min: start.into(), + max: max.into(), + } + } else { + if cur_num > 0 && cur_num <= remain { + crate::log!(trace, "in range, skip"); + max = cur_num; + continue; + } + MutateOperation::IntSet { val: max.into() } + }; + let op = MutateOperator::new(num_loc, op); + if self.refine_load_statement(&op)? { + operators.push(op); + } + } + } + } + } + Ok(operators) + } +} + +const DEFAULT_PTR_LEN: u64 = 1; + +impl IrEntry { + fn convert_length_to_constant( + &self, + program: &FuzzProgram, + stmt_index: &StmtIndex, + prefix: &LocFields, + call_args: &[StmtIndex], + ) -> eyre::Result { + if let Self::Length { + arg_pos, + fields, + is_factor: _, + } = &self + { + let mut stmt_index = stmt_index; + if let Some(arg_pos) = arg_pos { + stmt_index = &call_args[*arg_pos]; + } + let mut val = DEFAULT_PTR_LEN; + if let Some(loc) = fields.to_loc_for_refining(program, stmt_index, prefix) { + let stmt_i = loc.stmt_index.unwrap().get(); + if let FuzzStmt::Load(load_tmp) = &program.stmts[stmt_i].stmt { + let state = load_tmp.state.get_child_by_fields(loc.fields.as_slice())?; + val = get_ptee_vec_len(state, &program.stmts)?; + } + } + Ok(Self::Constant(val)) + } else { + Ok(self.clone()) + } + } +} + +fn get_ptee_vec_len(ptr_state: &ObjectState, stmts: &[IndexedStmt]) -> eyre::Result { + if let Some(ps) = &ptr_state.pointer { + let dst_loc = &ps.pointer_location; + if dst_loc.is_null() { + crate::log!(trace, "ptr point to null: {:?}", dst_loc); + return Ok(0); + } + let dst_stmt = dst_loc.stmt_index.as_ref().context("has index")?; + if let FuzzStmt::Load(load_vec) = &stmts[dst_stmt.get()].stmt { + let len = load_vec.value.get_length(); + return Ok(len as u64); + } + // call: 1 + } + Ok(DEFAULT_PTR_LEN) +} + +impl TypeConstraintItem { + /// Find all posiible locations without the limiration of indices. + /// e.g if `self` is &.0.xx , and we find &.1 and &.2 exists, we list them. + /// However, we only traverse the indices in one dimension. + pub fn find_all_locations_with_any_index( + &self, + program: &FuzzProgram, + stmt_index: &StmtIndex, + prefix: &LocFields, + ) -> Vec<(Location, Constraint)> { + let mut locs = vec![]; + // skip factors + let c_loc = match &self.constraint { + Constraint::SetVal { val } => Some(val), + Constraint::Range { min: _, max } => Some(max), + _ => None, + }; + if let Some(IrEntry::Length { + arg_pos: _, + fields: _, + is_factor, + }) = c_loc + { + if *is_factor { + return vec![]; + } + } + for (i, f) in self.key.list.iter().enumerate() { + if let FieldKey::Index(_) = f { + let mut next_index = 0; + loop { + let mut k = self.key.clone(); + k.list[i] = FieldKey::Index(next_index); + crate::log!(trace, "index: {next_index}, try loc: {k:?}"); + next_index += 1; + if next_index > 10000 { + panic!("too many indices"); + } + if let Some(loc) = k.to_loc_for_refining(program, stmt_index, prefix) { + // update constraint, too + let mut c = self.constraint.clone(); + let c_loc = match &mut c { + Constraint::SetVal { val } => Some(val), + Constraint::Range { min: _, max } => Some(max), + _ => None, + }; + if let Some(IrEntry::Length { + arg_pos: _, + fields, + is_factor: _, + }) = c_loc + { + if fields.len() > i && &fields.list[i] == f { + fields.list[i] = k.list[i].clone(); + } + } + locs.push((loc, c)); + } else { + crate::log_trace!("fail to find loc"); + break; + } + } + } + } + if locs.is_empty() { + if let Some(loc) = self.key.to_loc_for_refining(program, stmt_index, prefix) { + locs.push((loc, self.constraint.clone())); + } + } + locs + } +} + +#[test] +fn test_refine_ptr_index() { + { + let target = "test_arr"; + CONSTRAINTS + .with(|c| c.borrow_mut().init_func_constraint(target)) + .unwrap(); + for _ in 0..250 { + println!("------------"); + let mut p = FuzzProgram::generate_program_for_func(target).unwrap(); + p.eval().unwrap(); + } + } + + { + let target = "test_index"; + let type_name = "hopper::test::TestType"; + CONSTRAINTS + .with(|c| c.borrow_mut().init_type_constraint(type_name)) + .unwrap(); + for _ in 0..250 { + println!("------------"); + let mut p = FuzzProgram::generate_program_for_func(target).unwrap(); + p.eval().unwrap(); + } + } +} diff --git a/hopper-core/src/fuzz/rng.rs b/hopper-core/src/fuzz/rng.rs new file mode 100644 index 0000000..6a160b4 --- /dev/null +++ b/hopper-core/src/fuzz/rng.rs @@ -0,0 +1,133 @@ +use rand::{ + distributions::uniform::{SampleRange, SampleUniform}, + prelude::*, +}; +use std::cell::RefCell; + +use super::pcg::Pcg32; + +pub type RngState = Pcg32; + +thread_local! { + pub static RNG: RefCell = RefCell::new(Pcg32::seed_from_u64(rand::random())); +} + +#[inline] +pub fn save_rng_state() -> RngState { + RNG.with(|cell| cell.borrow().clone()) +} + +#[inline] +pub fn restore_rng_state(rng: RngState) { + RNG.with(|cell| cell.replace(rng)); +} + +#[inline] +pub fn renew_rng_state() { + RNG.with(|cell| cell.replace(Pcg32::seed_from_u64(rand::random()))); +} + +pub fn gen_rng_state() -> RngState { + Pcg32::seed_from_u64(rand::random()) +} + +#[inline] +pub fn gen_range(range: R) -> T +where + T: SampleUniform, + R: SampleRange, +{ + RNG.with(|cell| cell.borrow_mut().gen_range(range)) +} + +#[inline] +pub fn prob(p: f64) -> bool { + RNG.with(|cell| cell.borrow_mut().gen_bool(p)) +} + +#[inline] +pub fn mostly() -> bool { + prob(0.90) +} + +#[inline] +pub fn likely() -> bool { + prob(0.65) +} + +#[inline] +pub fn coin() -> bool { + prob(0.50) +} + +#[inline] +pub fn unlikely() -> bool { + prob(0.35) +} + +#[inline] +pub fn rarely() -> bool { + prob(0.10) +} + +#[inline] +pub fn gen() -> T +where + rand::distributions::Standard: Distribution, +{ + RNG.with(|cell| cell.borrow_mut().gen()) +} + +pub fn choose_slice(list: &[T]) -> Option<&T> { + RNG.with(|cell| list.choose(&mut *cell.borrow_mut())) +} + +pub fn choose_iter(list: T) -> Option { + RNG.with(|cell| list.choose(&mut *cell.borrow_mut())) +} + +pub fn choose_multiple(list: T, amount: usize) -> Vec { + RNG.with(|cell| list.choose_multiple(&mut *cell.borrow_mut(), amount)) +} + +pub fn shuffle(list: &mut [T]) { + RNG.with(|cell| list.shuffle(&mut *cell.borrow_mut())) +} + +pub fn cond_likely(cond: bool) -> bool { + if cond { + likely() + } else { + coin() + } +} +pub struct TempRngGuard { + cur: RngState, +} + +impl TempRngGuard { + pub fn temp_use(rng: &RngState) -> Self { + RNG.with(|cell| { + let cur = cell.borrow().clone(); + cell.replace(rng.clone()); + Self { cur } + }) + } +} + +impl Drop for TempRngGuard { + fn drop(&mut self) { + restore_rng_state(self.cur.clone()); + } +} + +#[test] +fn test_tmp_rng_guard() { + let rng_cur = save_rng_state(); + let new_rng = gen_rng_state(); + { + let _tmp_rng = TempRngGuard::temp_use(&new_rng); + assert_eq!(new_rng, save_rng_state()); + } + assert_eq!(rng_cur, save_rng_state()); +} diff --git a/hopper-core/src/fuzz/stmt/assert.rs b/hopper-core/src/fuzz/stmt/assert.rs new file mode 100644 index 0000000..62f4185 --- /dev/null +++ b/hopper-core/src/fuzz/stmt/assert.rs @@ -0,0 +1,169 @@ +use std::cell::RefCell; + +use eyre::ContextCompat; + +use super::*; + +impl WeightedItem for AssertStmt {} + +impl StmtMutate for AssertStmt {} + +// Assertions that will adding under specific conditions, e.g for specific function invoking. +#[derive(Debug)] +pub struct Assertion { + pub f_name: String, + pub arg_pos: Option, + pub expected: ExpectedValue, + pub is_eq: bool, +} + +#[derive(Debug)] +pub enum ExpectedValue { + Load { + value: FuzzObject, + state: ObjectState, + }, + // TODO: + // Call { arg_pos: Option }, +} + +unsafe impl Sync for ExpectedValue {} + +thread_local! { + static ASSERTIONS: RefCell> = RefCell::new(vec![]); +} + +pub fn add_assertion(assertion: Assertion) { + ASSERTIONS.with(|asserts| { + crate::log!(info, "add assertion: {assertion:?}"); + asserts.borrow_mut().push(assertion); + }) +} + +impl Assertion { + pub fn assert_call_ret_and_constant( + f_name: &str, + value: FuzzObject, + state: ObjectState, + is_eq: bool, + ) -> Self { + Self { + f_name: f_name.to_string(), + arg_pos: None, + expected: ExpectedValue::Load { value, state }, + is_eq, + } + } +} + +pub fn parse_assertion(de: &mut crate::Deserializer) -> eyre::Result { + let f_name = de.next_token_until(" ")?; + let rule = de.next_token_until(" ")?; + let is_eq = match rule { + "==" => true, + "!=" => false, + _ => { + eyre::bail!("unknown operation for assertion: {rule}") + } + }; + let g = global_gadgets::get_instance(); + let ty = g + .get_func_gadget(f_name)? + .ret_type + .context("should have ret type")?; + let mut state = crate::ObjectState::root(format!("expected_{f_name}"), ty); + let _ = state.replace_weight(0); + let expected = g.get_object_builder(ty)?.deserialize(de, &mut state)?; + Ok(crate::fuzz::stmt::Assertion::assert_call_ret_and_constant( + f_name, expected, state, is_eq, + )) +} + +impl FuzzProgram { + pub fn insert_required_assertions(&mut self) -> eyre::Result<()> { + if ASSERTIONS.with(|asserts| asserts.borrow().is_empty()) { + return Ok(()); + } + for i in (0..self.stmts.len()).rev() { + let is = &self.stmts[i]; + if let FuzzStmt::Call(call) = &is.stmt { + let found = ASSERTIONS.with(|asserts| { + let asserts = asserts.borrow(); + // only support one assertion for each function + if let Some(found) = asserts + .iter() + .find(|assert| assert.f_name == call.fg.f_name) + { + let check_stmt = if found.arg_pos.is_none() { + &is.index + } else { + // TODO: able to assert arguments + return None; + }; + if find_any_assertion(check_stmt, &self.stmts[check_stmt.get()..]) { + return None; + } + let expected = match &found.expected { + ExpectedValue::Load { value, state } => { + LoadStmt::new_const(value.clone(), state.clone_without_mutate_info(None)) + } + }; + return Some((found.is_eq, check_stmt.use_index(), expected)); + } + None + }); + if let Some((is_eq, check_stmt, expected)) = found { + let expected_index = self.insert_stmt(i + 1, expected); + let assert_stmt = if is_eq { + AssertStmt::assert_eq(check_stmt, expected_index) + } else { + AssertStmt::assert_neq(check_stmt, expected_index) + }; + crate::log!(trace, "add assertion: {}", assert_stmt.serialize()?); + let _ = self.insert_stmt(i + 2, assert_stmt); + } + } + } + Ok(()) + } +} + +fn find_any_assertion(target: &StmtIndex, stmts: &[IndexedStmt]) -> bool { + for is in stmts { + if let FuzzStmt::Assert(assert) = &is.stmt { + if let Some(existing) = assert.get_stmt() { + if existing.get_uniq() == target.get_uniq() { + return true; + } + } + } + } + false +} + +#[test] +fn test_parse_and_gen_assertion() { + // parse + let a = parse_assertion(&mut crate::Deserializer::new("test_one == 1", None)).unwrap(); + println!("a: {a:?}"); + assert!(a.is_eq); + add_assertion(a); + let a = parse_assertion(&mut crate::Deserializer::new("test_non_zero != 1", None)).unwrap(); + println!("a: {a:?}"); + assert!(!a.is_eq); + add_assertion(a); + + // gen + let program = FuzzProgram::generate_program_for_func("test_one").unwrap(); + println!("program: {}", program.serialize().unwrap()); + assert!(matches!( + program.stmts.last().unwrap().stmt, + FuzzStmt::Assert(..) + )); + let program = FuzzProgram::generate_program_for_func("test_non_zero").unwrap(); + println!("program: {}", program.serialize().unwrap()); + assert!(matches!( + program.stmts.last().unwrap().stmt, + FuzzStmt::Assert(..) + )); +} diff --git a/hopper-core/src/fuzz/stmt/call.rs b/hopper-core/src/fuzz/stmt/call.rs new file mode 100644 index 0000000..b9c48dc --- /dev/null +++ b/hopper-core/src/fuzz/stmt/call.rs @@ -0,0 +1,1069 @@ +//! Mutate call statments +//! Including: +//! - mutate call function's arguments +//! - mutate call function's return value +//! - insert or delete functions that has implicit relationships + +use crate::{config, utils}; + +use super::*; + +impl WeightedItem for CallStmt { + fn get_weight(&self) -> usize { + // since the search space of call is limited compared to load, + // we set it to 1, the minimal weight. + 1 + } +} + +impl StmtMutate for CallStmt { + fn is_incompatible(&self, _op: &MutateOperator) -> bool { + // mutate call statement is incompatible + true + } + + fn mutate_by_op( + &mut self, + program: &mut FuzzProgram, + _keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + let depth = program.get_stub_stmt_depth()?; + match op { + MutateOperation::CallArg { arg_pos, rng_state } => { + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + self.set_ith_call_arg(program, *arg_pos, depth)?; + } + MutateOperation::EffCallArg { + arg_pos, + eff_i: _, + rng_state, + } => { + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + let _ = self.set_effective_ith_call_arg(program, *arg_pos, depth)?; + } + MutateOperation::CallImplicitInsert { f_name, rng_state } => { + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + self.insert_implicit_call_with(program, f_name, depth)?; + } + MutateOperation::CallRelatedInsert { + f_name: _, + arg_pos, + rng_state, + } => { + let _tmp_rng = rng::TempRngGuard::temp_use(rng_state); + self.insert_relative_call_for_ith_arg(program, *arg_pos, depth, false)?; + } + MutateOperation::NewTarget { f_name, arg_i } => { + if let Some(arg_i) = arg_i { + self.append_new_target_with_arg(program, depth, f_name, *arg_i)?; + } else { + self.append_new_target_with_context(program, depth, f_name)?; + } + } + MutateOperation::Nop => {} + _ => { + crate::log!(trace, "ignore op: {op:?} in call stmt"); + } + } + Ok(()) + } + + fn mutate(&mut self, program: &mut FuzzProgram) -> eyre::Result { + // mutate call should be incompatible with other mutations + if !program.ops.is_empty() || flag::is_single_call() { + return Ok(MutateOperator::nop()); + } + let depth = program.get_stub_stmt_depth()?; + + for _ in 0..4 { + let op = if self.is_leaf() { + match rng::gen_range(0..=5) { + 0..=2 => self.insert_relative_call_before(program, depth)?, + 3 => self.insert_implicit_call_before(program, depth)?, + 4 => self.random_replace_arg(program, depth)?, + 5 => { + if config::ENABLE_APPEND_NEW_TARGET && self.is_target() { + self.choose_new_target_and_append(program, depth)? + } else { + MutateOperator::nop() + } + } + _ => { + unimplemented!() + } + } + } else { + // arguments + match rng::gen_range(0..=3) { + 0 => self.insert_relative_call_before(program, depth)?, + 1 => self.insert_implicit_call_before(program, depth)?, + 2 => update::append_update_stmt(program, self)?, + 3 => self.random_replace_arg(program, depth)?, + _ => { + unimplemented!() + } + } + }; + if op.is_nop() { + crate::log!(trace, "fail to mutate call, continue"); + // remove those useless statments imposed by current mutation + program.check_ref_use()?; + continue; + } + return Ok(op); + } + + crate::log!(trace, "fail to mutate call, break"); + Ok(MutateOperator::nop()) + } + + fn is_deterministic(&self) -> bool { + if !is_call_det() { + return false; + } + let det_index = *self.det_index.borrow(); + let len = global_gadgets::get_instance().functions.len(); + crate::log!(trace, "call is_det: {det_index}, len: {len}"); + self.is_target() && det_index < len + } + + fn det_mutate(&mut self, program: &mut FuzzProgram) -> eyre::Result { + // avoid try context again and again + // we only use it in mutating seed program that generated from nothing + if program.parent != Some(program.id) { + self.det_index.replace(std::usize::MAX); + return Ok(MutateOperator::nop()); + } + // add different context + let det_index = *self.det_index.borrow(); + let depth = program.get_stub_stmt_depth()?; + let gadgets = global_gadgets::get_instance(); + for (i, fg) in gadgets.functions.values().enumerate() { + self.det_index.replace(i + 1); + if i < det_index + || fg.f_name == self.name + || !filter_function(fg.f_name) + || self.has_any_context(program, fg.f_name) + { + continue; + } + let f_name = fg.f_name.to_string(); + for arg_pos in 0..self.args.len() { + if let Some(related_arg_pos) = check_relative_argument(&self.fg, fg, arg_pos, false) + { + let rng_state = rng::save_rng_state(); + crate::log!(trace, "det mutate call: {det_index}-th: relative, {f_name}"); + if self.insert_relative_call_for_ith_arg_with( + program, + arg_pos, + depth, + false, + fg, + related_arg_pos, + )? { + return Ok(MutateOperator::stmt_op( + MutateOperation::CallRelatedInsert { + f_name, + arg_pos, + rng_state, + }, + )); + } else { + return Ok(MutateOperator::nop()); + } + } + } + // if has lots APIs, we skip det-steps for implicit call insertion. + if gadgets.functions.len() > 80 { + continue; + } + let rng_state = rng::save_rng_state(); + crate::log!( + trace, + "det mutate call: {det_index}-th: implicit, {}", + fg.f_name + ); + self.insert_implicit_call_with(program, fg.f_name, depth)?; + return Ok(MutateOperator::stmt_op( + MutateOperation::CallImplicitInsert { f_name, rng_state }, + )); + } + Ok(MutateOperator::nop()) + } +} + +impl CallStmt { + /// Genrate function call for `f_name` in `program` + pub fn generate_new( + program: &mut FuzzProgram, + ident: &str, + f_name: &str, + depth: usize, + ) -> eyre::Result { + let fg = global_gadgets::get_instance() + .get_func_gadget(f_name)? + .clone(); + let mut call = CallStmt::new(ident.to_string(), f_name.to_string(), fg); + // Find or create args for call + let type_names = call.fg.arg_types; + let is_variadic = utils::is_variadic_function(type_names); + let num_arg = if is_variadic { + type_names.len() - 1 + } else { + type_names.len() + }; + + for arg_pos in 0..num_arg { + call.set_ith_call_arg(program, arg_pos, depth)?; + } + + for arg_pos in 0..num_arg { + // only for primitive type + if rng::coin() { + let arg_type = call.fg.arg_types[arg_pos]; + if utils::is_primitive_type(arg_type) || utils::is_buffer_pointer(arg_type) { + let _ = call.set_effective_ith_call_arg(program, arg_pos, depth)?; + } + } + } + + // We must add these required contexts + call.add_required_contexts(program, depth)?; + + // try to generate context in generate mode + // #[cfg(not(test))] + if !flag::is_single_call() && call.is_target() && program.parent.is_none() && rng::likely() + { + if rng::rarely() { + let _op = call.insert_implicit_call_before(program, depth)?; + } else { + let _op = call.insert_relative_call_before(program, depth)?; + } + } + + Ok(call) + } + + /// Find or create ith argument + pub fn set_ith_call_arg( + &mut self, + program: &mut FuzzProgram, + arg_pos: usize, + depth: usize, + ) -> eyre::Result<()> { + let ident = self.fg.arg_idents[arg_pos]; + let mut type_name = self.fg.arg_types[arg_pos]; + let mut alias_type_name = self.fg.alias_arg_types[arg_pos]; + crate::log!( + trace, + "generate {arg_pos}-th arg, type: {type_name} ({alias_type_name}), ident: {ident}, depth: {depth}, single: {}", + flag::is_single_call() + ); + let mut non_null = false; + let mut need_init = false; + // 0. if the argument has any constraint + let should_ret = inspect_function_constraint_with(self.fg.f_name, |fc| { + if !config::ENABLE_REFINE { + return Ok(false); + } + for citem in fc.arg_constraints[arg_pos].list.iter() { + if !citem.key.is_empty() { + continue; + } + match &citem.constraint { + Constraint::SetNull => { + let null_stmt = LoadStmt::generate_constant(type_name, ident)?; + let stmt_index = program.insert_or_append_stmt(null_stmt)?; + self.set_arg(arg_pos, stmt_index); + return Ok(true); + } + Constraint::File { read, is_fd } => { + let (is_c_str, is_mut) = utils::is_c_str_type(type_name); + if is_c_str { + let read = *read; + let is_fd = *is_fd; + let file_stmt = + FileStmt::generate_new(program, ident, is_mut, is_fd, read, depth)?; + crate::log!( + trace, + "generate file for arg `{ident}` in function `{}`", + self.name + ); + let stmt_index = program.insert_or_append_stmt(file_stmt)?; + self.set_arg(arg_pos, stmt_index); + return Ok(true); + } + } + Constraint::RetFrom { ret_f } => { + let ret_f = ret_f.clone(); + self.insert_call_as_arg(program, arg_pos, type_name, &ret_f, ident, depth)?; + return Ok(true); + } + Constraint::CastFrom { cast_type } => { + // make sure it is pointer + eyre::ensure!( + utils::is_pointer_type(type_name), + "cast type should be a pointer" + ); + // modify type_name, and alias_type + type_name = utils::get_static_ty(cast_type); + alias_type_name = type_name; + } + Constraint::SetVal { val: _ } | Constraint::Range { min: _, max: _ } => { + // avoid create calls + let load = LoadStmt::generate_new(program, type_name, ident, depth)?; + let load_index = program.insert_or_append_stmt(load)?; + self.set_arg(arg_pos, load_index); + return Ok(true); + } + Constraint::NonNull => { + non_null = true; + } + Constraint::NeedInit => { + need_init = true; + } + _ => {} + } + } + Ok(false) + })?; + + if should_ret { + return Ok(()); + } + + let is_opaque = utils::is_opaque_pointer(type_name); + + // 1. use other statement + // if the prorgam has any existing statement that provide such types. + if ((flag::is_reuse_stmt() + && rng::coin() + // avoid cause circle-refs + && self + .has_reused_args(program) + .map_or(true, |pos| pos == arg_pos)) + || is_opaque) + && !self.is_implicit() + { + if let Some(stmt_index) = find_stmts_with_type(program, ident, type_name, &self.args) { + crate::log!(trace, "use stmt `{}` as arg ", stmt_index.get()); + self.set_arg(arg_pos, stmt_index); + return Ok(()); + } + } + + // 2. find new function and insert call to provide the argument. + // we only find return for pointer types + if let Some(inner) = utils::get_pointer_inner(type_name) { + if flag::use_call(inner, is_opaque, depth) + || (is_opaque && depth <= config::MAX_DEPTH && non_null) + { + if let Some(provider) = find_func_with_return_type(type_name, alias_type_name) { + if provider != self.fg.f_name { + self.insert_call_as_arg( + program, arg_pos, type_name, provider, ident, depth, + )?; + return Ok(()); + } + } + } + } + + // 3. if we can't find any function or statement, generate load directly + crate::log!(trace, "load new `{type_name}` as arg, opaque: {is_opaque} "); + let load = LoadStmt::generate_new(program, type_name, ident, depth)?; + let is_null_ptr = load.state.is_null(); + let load_index = program.insert_or_append_stmt(load)?; + self.set_arg(arg_pos, load_index); + + // if the arg is opaque pointer, we should try to init it + if is_null_ptr + && is_opaque + && depth <= config::MAX_DEPTH + && (need_init || rng::coin() || flag::is_pilot_det()) + { + crate::log!(trace, "try to init opaque by relative call"); + let op = self.insert_relative_call_for_ith_arg(program, arg_pos, depth, true)?; + if op.is_nop() { + crate::log!(trace, "fail to init opaque pointer"); + } + } + + Ok(()) + } + + /// Add all required contexts + fn add_required_contexts( + &mut self, + program: &mut FuzzProgram, + depth: usize, + ) -> eyre::Result<()> { + let mut use_contexts = vec![]; + filter_function_constraint_with(&self.name, |fc| { + let iter = fc + .contexts + .iter() + .filter(|ctx| ctx.is_required() && !self.has_any_context(program, &ctx.f_name)); + use_contexts.extend(iter.cloned()); + true + }); + for ctx in use_contexts { + crate::log!(trace, "try to insert required relative context: {ctx:?}"); + // relative contexts + if let Some(arg_pos) = ctx.related_arg_pos { + let fg = global_gadgets::get_instance().get_func_gadget(&ctx.f_name)?; + if let Some(f_i) = check_relative_argument(&self.fg, fg, arg_pos, false) { + self.insert_relative_call_for_ith_arg_with( + program, arg_pos, depth, false, fg, f_i, + )?; + } + } else { + // implicit contexts + self.insert_implicit_call_with(program, &ctx.f_name, depth)?; + } + } + Ok(()) + } + + /// Insert call for `index`-th argument + fn insert_call_as_arg( + &mut self, + program: &mut FuzzProgram, + arg_pos: usize, + arg_type: &str, + f_name: &str, + ident: &str, + depth: usize, + ) -> eyre::Result<()> { + crate::log!(trace, "use call `{}` as arg ", f_name); + let _tmp = flag::ReuseStmtGuard::temp_disable(); + let call = CallStmt::generate_new(program, ident, f_name, depth + 1)?; + let call_index = program.insert_or_append_stmt(call)?; + let _ = program.insert_or_append_stmt(AssertStmt::assert_non_null(call_index.use_index())); + // arg's replace will mutate the pointer to new one, so we make it constant here.s + let mut ptr_load = LoadStmt::generate_constant(arg_type, ident)?; + let mut loc = Location::stmt(call_index); + loc.fields.push(FieldKey::Pointer); + ptr_load.state.get_pointer_mut()?.pointer_location = loc; + let ptr_index = program.insert_or_append_stmt(ptr_load)?; + self.set_arg(arg_pos, ptr_index); + Ok(()) + } + + /// Random replace call's argument + fn random_replace_arg( + &mut self, + program: &mut FuzzProgram, + depth: usize, + ) -> eyre::Result { + if self.args.is_empty() || self.is_relative() { + return Ok(MutateOperator::nop()); + } + + /* + // avoid replace arguments that related + let has_related = filter_function_constraint_with(self.fg.f_name, |fc| { + !fc.get_related_args(arg_pos).is_empty() + }); + if has_related || self.args[arg_pos].get_ref_used() > 2 { + return Ok(MutateOperator::nop()); + } + */ + let mut op = MutateOperation::Nop; + if rng::coin() { + let mut arg_indices: Vec = (0..self.args.len()).collect(); + // make the order randomly + rng::shuffle(&mut arg_indices); + let rng_state = rng::save_rng_state(); + for arg_pos in arg_indices { + if let Some(eff_i) = self.set_effective_ith_call_arg(program, arg_pos, depth)? { + op = MutateOperation::EffCallArg { + arg_pos, + eff_i, + rng_state, + }; + break; + } + } + } + if op.is_nop() { + let arg_pos = rng::gen_range(0..self.args.len()); + crate::log!(trace, "try to replace arg-{arg_pos}"); + let rng_state = rng::save_rng_state(); + self.set_ith_call_arg(program, arg_pos, depth)?; + op = MutateOperation::CallArg { arg_pos, rng_state }; + } + Ok(MutateOperator::stmt_op(op)) + } + + fn choose_new_target_and_append( + &mut self, + program: &mut FuzzProgram, + depth: usize, + ) -> eyre::Result { + if depth > config::MAX_DEPTH { + return Ok(MutateOperator::nop()); + } + let gadgets: &ProgramGadgets = global_gadgets::get_instance(); + if let Some(inner_ty) = self + .fg + .ret_type + .filter(|_| { + crate::filter_function_constraint_with(self.fg.f_name, |fc| fc.can_used_as_arg()) + }) + .and_then(utils::get_pointer_inner) + { + // FIXME: check ret_type is not static and writeable? + // `self` is used as arg of new target + if flag::use_call(inner_ty, utils::is_opaque_type(inner_ty), depth) { + let mut_ptr = utils::mut_pointer_type(inner_ty); + let mut_iter: &[(&str, usize)] = gadgets + .arg_graph + .get(mut_ptr.as_str()) + .map_or(&[], |l| l.as_slice()); + let const_ptr = utils::const_pointer_type(inner_ty); + let const_iter: &[(&str, usize)] = gadgets + .arg_graph + .get(const_ptr.as_str()) + .map_or(&[], |l| l.as_slice()); + let iter = mut_iter + .iter() + .chain(const_iter) + .filter(|(f_name, _)| filter_target_function(f_name)); + if let Some((f_name, arg_i)) = rng::choose_iter(iter) { + self.append_new_target_with_arg(program, depth, f_name, *arg_i)?; + return Ok(MutateOperator::stmt_op(MutateOperation::NewTarget { + f_name: f_name.to_string(), + arg_i: Some(*arg_i), + })); + } + } + } + // `self` is relative to new target. + let funcs = global_gadgets::get_instance() + .functions + .iter() + .filter(|(f_name, _)| filter_target_function(f_name)); + if let Some((f_name, _)) = rng::choose_iter(funcs) { + self.append_new_target_with_context(program, depth, f_name)?; + return Ok(MutateOperator::stmt_op(MutateOperation::NewTarget { + f_name: f_name.to_string(), + arg_i: None, + })); + } + + Ok(MutateOperator::nop()) + } + + fn append_new_target_with_arg( + &mut self, + program: &mut FuzzProgram, + depth: usize, + f_name: &str, + arg_i: usize, + ) -> eyre::Result<()> { + crate::log!(trace, "append new target : {f_name} and its arg: {arg_i}"); + let mut new_call = CallStmt::generate_new(program, CallStmt::TARGET, f_name, depth + 1)?; + program.set_calls_track_cov(false); + new_call.track_cov = true; + let arg_type = new_call.fg.arg_types[arg_i]; + let prev_target_index = program.get_stub_stmt_index().unwrap(); + let _ = program.append_stmt(AssertStmt::assert_non_null(prev_target_index.use_index())); + self.track_cov = false; + self.ident = new_call.fg.arg_idents[arg_i].to_string(); + if utils::is_pointer_type(arg_type) { + let mut ptr_load = LoadStmt::generate_constant(arg_type, &self.ident)?; + let mut loc = Location::stmt(prev_target_index); + loc.fields.push(FieldKey::Pointer); + ptr_load.state.get_pointer_mut()?.pointer_location = loc; + let ptr_index = program.append_stmt(ptr_load); + new_call.args[arg_i] = ptr_index.use_index(); + } else { + new_call.args[arg_i] = prev_target_index.use_index(); + } + program.append_stmt(new_call); + Ok(()) + } + + fn append_new_target_with_context( + &mut self, + program: &mut FuzzProgram, + depth: usize, + f_name: &str, + ) -> eyre::Result<()> { + crate::log!(trace, "append new target : {f_name} with context"); + let mut new_call = CallStmt::generate_new(program, CallStmt::TARGET, f_name, depth + 1)?; + program.set_calls_track_cov(false); + new_call.track_cov = true; + self.track_cov = false; + let common_arg = new_call.has_overlop_arg(program, self); + if filter_forbidden_context(f_name, self.fg.f_name, common_arg) { + return Ok(()); + } + if let Some(arg_pos) = common_arg { + // use its related args as possile + let _ = new_call.set_related_args(arg_pos, program, self, false)?; + self.ident = CallStmt::RELATIVE.to_string(); + } else { + let prev_target_index = program.get_stub_stmt_index().unwrap(); + new_call.contexts.push(prev_target_index); + self.ident = CallStmt::IMPLICIT.to_string(); + } + let _call_i = program.append_stmt(new_call); + Ok(()) + } + + /// Insert implicit function call before this call + fn insert_implicit_call_before( + &mut self, + program: &mut FuzzProgram, + depth: usize, + ) -> eyre::Result { + if !crate::config::ENABLE_INTER_API_LEARN && program.parent.is_some() { + return Ok(MutateOperator::nop()); + } + if crate::filter_init_func(self.fg.f_name) { + return Ok(MutateOperator::nop()); + } + crate::log!(trace, "try find implicit context.."); + let mut use_f_name = None; + // add optional implict context constraint + if rng::coin() { + filter_function_constraint_with(&self.name, |fc| { + if let Some(ctx) = rng::choose_iter(fc.contexts.iter().filter(|ctx| { + ctx.related_arg_pos.is_none() + && ctx.is_preferred() + && !self.has_implicit_context(program, &ctx.f_name) + })) { + crate::log!(trace, "add optioanl implicit context: {}", ctx.f_name); + use_f_name = Some(ctx.f_name.clone()); + } + true + }); + } + let f_name = if let Some(f_name) = &use_f_name { + f_name + } else if let Some(f_name) = rng::choose_iter( + global_gadgets::get_instance() + .functions + .keys() + .filter(|f_name| { + filter_function(f_name) + && !self.has_implicit_context(program, f_name) + && !filter_forbidden_context(self.fg.f_name, f_name, None) + }), + ) { + f_name + } else { + return Ok(MutateOperator::nop()); + }; + let rng_state = rng::save_rng_state(); + self.insert_implicit_call_with(program, f_name, depth)?; + Ok(MutateOperator::stmt_op( + MutateOperation::CallImplicitInsert { + f_name: f_name.to_string(), + rng_state, + }, + )) + } + + /// Insert implicit call with specific function + fn insert_implicit_call_with( + &mut self, + program: &mut FuzzProgram, + f_name: &str, + depth: usize, + ) -> eyre::Result<()> { + // Implicit calls don't use other statements as arguments, + // so it can't mutate this call's arguments. + let _tmp = flag::ReuseStmtGuard::temp_disable(); + let new_call = CallStmt::generate_new(program, CallStmt::IMPLICIT, f_name, depth + 1)?; + crate::log!(trace, "insert implicit call: {}", &new_call.name); + let stmt = program.insert_or_append_stmt(new_call)?; + self.contexts.push(stmt); + // only this call can be track + program.set_calls_track_cov(false); + self.track_cov = true; + Ok(()) + } + + /// Insert relative call before this pointer, + /// the inserted call is related to at least one of this call's argument. + /// + /// the call should + /// 1. avoid oveflapping arguments + /// 2. avoid remove/delete existing arguments + fn insert_relative_call_before( + &mut self, + program: &mut FuzzProgram, + depth: usize, + ) -> eyre::Result { + if !crate::config::ENABLE_INTER_API_LEARN && program.parent.is_some() { + return Ok(MutateOperator::nop()); + } + // do not add relative call to init function + if crate::filter_init_func(self.fg.f_name) { + return Ok(MutateOperator::nop()); + } + let mut arg_indices: Vec = (0..self.args.len()).collect(); + // make the order randomly + rng::shuffle(&mut arg_indices); + for arg_pos in arg_indices { + let op = self.insert_relative_call_for_ith_arg(program, arg_pos, depth, false)?; + if !op.is_nop() { + return Ok(op); + } + } + Ok(MutateOperator::nop()) + } + + /// Insert relative function call for i-th argument + fn insert_relative_call_for_ith_arg( + &mut self, + program: &mut FuzzProgram, + arg_pos: usize, + depth: usize, + init_opaque: bool, + ) -> eyre::Result { + crate::log!(trace, "try find relative for arg-{arg_pos}"); + // Do not consider primitive types that has contraints. + if utils::is_primitive_type(self.fg.arg_types[arg_pos]) + && filter_function_constraint_with(&self.name, |fc| { + fc.get_related_args(arg_pos).len() > 1 + || !fc.arg_constraints[arg_pos].list.is_empty() + }) + { + return Ok(MutateOperator::nop()); + } + let rng_state = rng::save_rng_state(); + let mut relative_f = None; + // add optional context + if !init_opaque && rng::coin() { + let mut use_ctx = None; + filter_function_constraint_with(&self.name, |fc| { + let optioanl_ctxs = fc.contexts.iter().filter(|ctx| { + ctx.is_preferred() + && ctx.related_arg_pos == Some(arg_pos) + && !self.has_relative_context(program, &ctx.f_name) + }); + use_ctx = rng::choose_iter(optioanl_ctxs).cloned(); + true + }); + if let Some(ctx) = use_ctx { + crate::log!(trace, "use optional realtive ctx: {ctx:?}"); + let fg = global_gadgets::get_instance().get_func_gadget(&ctx.f_name)?; + if let Some(related_arg_pos) = check_relative_argument(&self.fg, fg, arg_pos, false) + { + relative_f = Some((fg, related_arg_pos)); + } + } + } + if relative_f.is_none() { + // Find function that related to `index` + let funcs = + global_gadgets::get_instance() + .functions + .iter() + .filter_map(|(f_name, fg)| { + if f_name == &self.name + || crate::filter_function_constraint_with(f_name, |fc| { + fc.role.free_arg || !fc.is_success() || (init_opaque && !fc.role.init_arg) + }) + || Self::has_relative_context_for_stmt( + program, + f_name, + &self.args[arg_pos], + ) + { + return None; + } + if let Some(i) = check_relative_argument(&self.fg, fg, arg_pos, init_opaque) + { + // crate::log!(trace, "add {} as candidate", fg.f_name); + return Some((fg, i)); + } + None + }); + relative_f = rng::choose_iter(funcs); + } + + // Generate call and set arguments + if let Some((fg, related_arg_pos)) = relative_f { + if self.insert_relative_call_for_ith_arg_with( + program, + arg_pos, + depth, + init_opaque, + fg, + related_arg_pos, + )? { + return Ok(MutateOperator::stmt_op( + MutateOperation::CallRelatedInsert { + f_name: fg.f_name.to_string(), + arg_pos, + rng_state, + }, + )); + } + } + Ok(MutateOperator::nop()) + } + + fn insert_relative_call_for_ith_arg_with( + &mut self, + program: &mut FuzzProgram, + arg_pos: usize, + depth: usize, + init_opaque: bool, + fg: &FnGadget, + related_arg_pos: usize, + ) -> eyre::Result { + let f_name = fg.f_name; + crate::log!( + trace, + "insert relative call {f_name} with argument: {related_arg_pos}" + ); + // don't reuse arguments except `index` and its related ones + let _tmp = flag::ReuseStmtGuard::temp_disable(); + let mut new_call = CallStmt::generate_new(program, CallStmt::RELATIVE, f_name, depth + 1)?; + // disable this call's cov + new_call.track_cov = false; + + // new call must use arg: load_value + let load_value = &self.args[arg_pos]; + if !new_call.args.contains(load_value) { + new_call.set_ith_arg_for_relative_call( + program, + related_arg_pos, + load_value.use_index(), + self.fg.arg_types[arg_pos], + )?; + } + // and its related args as possile + if !self.set_related_args(arg_pos, program, &mut new_call, init_opaque)? { + return Ok(false); + } + + new_call.add_required_contexts(program, depth)?; + let call_stmt = program.insert_or_append_stmt(new_call)?; + if init_opaque { + let _ = program.insert_or_append_stmt(AssertStmt::assert_initialized( + load_value.use_index(), + call_stmt, + )); + } + crate::log!( + trace, + "insert relative call `{f_name}` for arg-{related_arg_pos}" + ); + Ok(true) + } + + /// Set related args as possiple + fn set_related_args( + &self, + arg_pos: usize, + program: &mut FuzzProgram, + new_call: &mut CallStmt, + init_opaque: bool, + ) -> eyre::Result { + let related = + inspect_function_constraint_with(&self.name, |fc| Ok(fc.get_related_args(arg_pos)))?; + crate::log!(trace, "related args: {:?}", related); + for r_i in related { + if r_i == arg_pos || init_opaque && r_i > arg_pos { + continue; + } + let r_value = self.args[r_i].use_index(); + if !new_call.args.contains(&r_value) { + if let Some(i) = check_relative_argument(&self.fg, &new_call.fg, r_i, init_opaque) { + new_call.set_ith_arg_for_relative_call( + program, + i, + r_value, + self.fg.arg_types[r_i], + )?; + } else { + // fail to find corresponding related arguments, + // so we do not push the call to context, + // it will be clean by check ref-use step. + flag::set_incomplete_gen(true); + crate::log!(trace, "fail to find correspoing related arguments"); + return Ok(false); + } + } + } + Ok(true) + } + + /// Set argument for relative call + pub fn set_ith_arg_for_relative_call( + &mut self, + program: &mut FuzzProgram, + arg_pos: usize, + load_value: StmtIndex, + load_type: &str, + ) -> eyre::Result { + let f_arg_type = self.fg.arg_types[arg_pos]; + if utils::is_same_type(load_type, f_arg_type) { + self.args[arg_pos] = load_value; + return Ok(false); + } else if utils::get_pointer_inner(f_arg_type) + .map_or(false, |inner_ty| inner_ty == load_type) + { + let f_ident = self.fg.arg_idents[arg_pos]; + let mut load_ptr = LoadStmt::generate_constant(f_arg_type, f_ident)?; + let ps = load_ptr.state.get_pointer_mut()?; + ps.pointer_location = Location::stmt(load_value.use_index()); + let ptr_stmt = program.insert_or_append_stmt(load_ptr)?; + self.args[arg_pos] = ptr_stmt; + return Ok(true); + } + eyre::bail!("type error for argument assign: {load_type} and {f_arg_type}"); + } +} + +/// Check if a function has arguments that the same as `arg` +fn check_relative_argument( + fg1: &FnGadget, + fg2: &FnGadget, + arg_pos: usize, + init_opaque: bool, +) -> Option { + let arg_ident = fg1.arg_idents[arg_pos]; + let arg_type = fg1.arg_types[arg_pos]; + let alias_arg_type = fg1.alias_arg_types[arg_pos]; + if !filter_function(fg2.f_name) + || filter_forbidden_context(fg1.f_name, fg2.f_name, Some(arg_pos)) + { + return None; + } + // do not reused arg with file constraints + if filter_function_constraint_with(fg1.f_name, |fc| fc.is_file(arg_pos)) { + return None; + } + let is_void_ptr = utils::is_void_pointer(arg_type); + // stupid case: alias void type to another type named void + if is_void_ptr && (alias_arg_type.contains("void") || alias_arg_type.contains("Void")) { + return None; + } + // find related arg_pos in fg2 + for arg_pos2 in 0..fg2.arg_idents.len() { + let f_arg_type = fg2.arg_types[arg_pos2]; + if utils::is_pointer_type(f_arg_type) { + let f_ident = fg2.arg_idents[arg_pos2]; + // avoid primitive type that has different ident + // exclude `arg*` idents since they are default ident names. + if utils::is_primitive_type(arg_type) + && (!f_ident.contains(arg_ident) || f_ident.starts_with("arg")) + { + continue; + } + // utils::is_primitive_pointer(arg_type) + if filter_function_constraint_with(fg2.f_name, |fc| fc.is_file(arg_pos2)) { + continue; + } + + // if any of the argument has the same type with pointer, + // or the function has one argument that its type is the pointer of argument's type. + + // check type_name if it is not void pointer + let f_arg_type = fg2.arg_types[arg_pos2]; + if !is_void_ptr { + if (!init_opaque && arg_type == f_arg_type) + || utils::const_pointer_type(arg_type) == f_arg_type + || utils::mut_pointer_type(arg_type) == f_arg_type + { + // crate::log!(trace, "relative type match: {arg_type} vs {f_arg_type}"); + return Some(arg_pos2); + } + } else { + // We check alias type instead of cast type (`arg_type`) since it is more accurate. + let f_alias_arg_type = fg2.alias_arg_types[arg_pos2]; + // crate::log!(trace, "check relative type : {alias_arg_type} vs. {f_alias_arg_type}"); + if (!init_opaque && alias_arg_type == f_alias_arg_type) + || utils::const_pointer_type(alias_arg_type) == f_alias_arg_type + || utils::mut_pointer_type(alias_arg_type) == f_alias_arg_type + { + // crate::log!(trace, "relative type match: {alias_arg_type} vs {f_alias_arg_type}"); + return Some(arg_pos2); + } + } + } + } + None +} + +/// Choose any existing stetements that return objects with specific type +fn find_stmts_with_type( + program: &FuzzProgram, + ident: &str, + type_name: &str, + used_args: &[StmtIndex], +) -> Option { + // only support pointer type + let (mut_ptr, const_ptr) = if let Some(inner) = utils::get_pointer_inner(type_name) { + ( + utils::mut_pointer_type(inner), + utils::const_pointer_type(inner), + ) + } else { + return None; + }; + let mut after_stub = false; + let iter = program.stmts.iter().filter_map(|indexed_stmt| { + if indexed_stmt.stmt.is_stub() { + after_stub = true; + } + if after_stub || indexed_stmt.freed.is_some() || used_args.contains(&indexed_stmt.index) { + return None; + } + if let FuzzStmt::Load(load) = &indexed_stmt.stmt { + // only consider load value is pointer here + if !load.point_to_freed_resource(program) + && ident == load.get_ident() + && (load.value.type_name() == mut_ptr || load.value.type_name() == const_ptr) + { + return Some(indexed_stmt.index.use_index()); + } + } + None + }); + rng::choose_iter(iter) +} + +#[test] +fn test_add_relative_call() { + let f_name = "test_arr"; + CONSTRAINTS.with(|c| { + c.borrow_mut().init_func_constraint(f_name).unwrap(); + println!("constraint: {:?}", c.borrow()); + }); + flag::set_pilot_det(true); + let mut program = FuzzProgram::generate_program_for_func(f_name).unwrap(); + println!("program: {}", program.serialize_all().unwrap()); + flag::set_pilot_det(false); + flag::set_reuse_stmt(true); + flag::set_single_call(false); + let call_index = program.stmts[program.get_target_index().unwrap()] + .index + .use_index(); + let rela_f_name = "test_mutate_arr"; + CONSTRAINTS.with(|c| { + c.borrow_mut().init_func_constraint(rela_f_name).unwrap(); + }); + let op = MutateOperator::new( + Location::stmt(call_index), + MutateOperation::CallRelatedInsert { + f_name: rela_f_name.to_string(), + arg_pos: 0, + rng_state: rng::gen_rng_state(), + }, + ); + program.mutate_program_by_op(&op).unwrap(); + println!("program: {}", program.serialize_all().unwrap()); + // test_arr's arguments used more than twice (used by test_mutate_arr) + assert_eq!(program.stmts[1].index.get_ref_used(), 3); + assert_eq!(program.stmts[2].index.get_ref_used(), 3); +} diff --git a/hopper-core/src/fuzz/stmt/file.rs b/hopper-core/src/fuzz/stmt/file.rs new file mode 100644 index 0000000..6043833 --- /dev/null +++ b/hopper-core/src/fuzz/stmt/file.rs @@ -0,0 +1,49 @@ +//! Mutate File statement + +use super::*; + +impl WeightedItem for FileStmt {} + +impl StmtMutate for FileStmt {} + +impl FileStmt { + pub fn generate_new( + program: &mut FuzzProgram, + ident: &str, + is_mut: bool, + is_fd: bool, + may_read: bool, + depth: usize, + ) -> eyre::Result { + let mut file_stmt = FileStmt::new(ident, is_mut, is_fd); + if may_read { + let _tmp = flag::ReuseStmtGuard::temp_disable(); + let load = LoadStmt::generate_vec( program, "u8", "file_buf", depth)?; + let index = program.insert_or_append_stmt(load)?; + file_stmt.set_buf_index(index); + } + Ok(file_stmt) + } +} + +/// Just some code to make FileFd to be Fuzzable +impl ObjGenerate for FileFd { + fn generate_new(_state: &mut ObjectState) -> eyre::Result { + Ok(Default::default()) + } +} + +impl ObjMutate for FileFd { + fn mutate(&mut self, _state: &mut ObjectState) -> eyre::Result { + Ok(MutateOperator::nop()) + } + + fn mutate_by_op( + &mut self, + _state: &mut ObjectState, + _keys: &[FieldKey], + _op: &MutateOperation, + ) -> eyre::Result<()> { + Ok(()) + } +} \ No newline at end of file diff --git a/hopper-core/src/fuzz/stmt/load.rs b/hopper-core/src/fuzz/stmt/load.rs new file mode 100644 index 0000000..cc4ba27 --- /dev/null +++ b/hopper-core/src/fuzz/stmt/load.rs @@ -0,0 +1,125 @@ +//! Mutate Load statement + +use super::*; + +impl WeightedItem for LoadStmt { + fn get_weight(&self) -> usize { + self.state.mutate.borrow().get_weight() + } +} + +impl StmtMutate for LoadStmt { + fn is_deterministic(&self) -> bool { + self.state.is_deterministic() + } + + fn is_incompatible(&self, op: &MutateOperator) -> bool { + matches!( + op.op, + MutateOperation::PointerTodo + | MutateOperation::PointerNull + | MutateOperation::PointerGen { rng_state: _ } + | MutateOperation::PointerUse { loc: _ } + | MutateOperation::PointerRet { + f_name: _, + rng_state: _ + } + | MutateOperation::UnionNew { rng_state: _ } + ) + } + + fn mutate(&mut self, program: &mut FuzzProgram) -> eyre::Result { + let mut op = self.value.mutate(&mut self.state)?; + if !program.ops.is_empty() && self.is_incompatible(&op) { + if op.op.is_pointer_todo() { + return Ok(MutateOperator::nop()); + } + return Ok(op); + } + if op.op.is_pointer_todo() { + op.op = pointer::mutate_pointer_location( + program, + &mut self.state, + op.key.fields.as_slice(), + )?; + } + Ok(op) + } + + fn det_mutate(&mut self, _program: &mut FuzzProgram) -> eyre::Result { + // crate::log!(trace, "det mutate: {:?}", self.value); + self.value.det_mutate(&mut self.state) + } + + fn mutate_by_op( + &mut self, + program: &mut FuzzProgram, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + if op.is_nop() { + return Ok(()); + } + flag::set_mutate_ptr(false); + let state = &mut self.state; + let keys = crate::check_fields(keys, state); + self.value.mutate_by_op(state, keys, op)?; + if flag::is_mutate_ptr() { + pointer::mutate_pointer_location_by_op(program, state, keys, op)?; + } + Ok(()) + } +} + +impl LoadStmt { + /// Generate variable for specific type + pub fn generate_new( + program: &mut FuzzProgram, + type_name: &str, + ident: &str, + depth: usize, + ) -> eyre::Result { + let mut state = LoadStmt::new_state(ident, type_name); + let value = global_gadgets::get_instance() + .get_object_builder(type_name)? + .generate_new(&mut state)?; + pointer::generate_pointer_location(program, &mut state, depth)?; + let load = LoadStmt::new(value, state); + Ok(load) + } + + /// Generate vector statement for specific type + pub fn generate_vec( + program: &mut FuzzProgram, + type_name: &str, + ident: &str, + depth: usize, + ) -> eyre::Result { + let mut state = + LoadStmt::new_state(ident, format!("alloc::vec::Vec<{type_name}>").as_str()); + let value = global_gadgets::get_instance() + .get_object_builder(type_name)? + .generate_vec(&mut state)?; + pointer::generate_pointer_location(program, &mut state, depth)?; + let load = LoadStmt::new(value, state); + Ok(load) + } + + /// Generate load statement without filling pointer + fn generate_new_without_filling_pointer(type_name: &str, ident: &str) -> eyre::Result { + let mut state = LoadStmt::new_state(ident, type_name); + let value = global_gadgets::get_instance() + .get_object_builder(type_name)? + .generate_new(&mut state)?; + let load = LoadStmt::new(value, state); + Ok(load) + } + + /// Generate fixed load statement, and the pointers are null in default. + pub fn generate_constant(type_name: &str, ident: &str) -> eyre::Result { + let mut load_stmt = Self::generate_new_without_filling_pointer(type_name, ident)?; + load_stmt.is_const = true; + load_stmt.state.replace_weight(0); + Ok(load_stmt) + } +} diff --git a/hopper-core/src/fuzz/stmt/mod.rs b/hopper-core/src/fuzz/stmt/mod.rs new file mode 100644 index 0000000..ea4ac06 --- /dev/null +++ b/hopper-core/src/fuzz/stmt/mod.rs @@ -0,0 +1,74 @@ +use crate::{impl_stmt_match, runtime::*}; +pub use call::*; +use super::*; + +pub trait StmtMutate: WeightedItem { + /// Is deterministic or not + fn is_deterministic(&self) -> bool { + false + } + // Is incompatible or not + fn is_incompatible(&self, _op: &MutateOperator) -> bool { + false + } + /// Mutate the statement + fn mutate(&mut self, _program: &mut FuzzProgram) -> eyre::Result { + unimplemented!() + } + /// Det mutate + fn det_mutate(&mut self, _program: &mut FuzzProgram) -> eyre::Result { + unimplemented!(); + } + /// Mutate by op + fn mutate_by_op( + &mut self, + _program: &mut FuzzProgram, + _keys: &[FieldKey], + _op: &MutateOperation, + ) -> eyre::Result<()> { + unimplemented!() + } +} + +impl FuzzStmt { + pub fn is_deterministic(&self) -> bool { + impl_stmt_match!(self, is_deterministic) + } + pub fn is_incompatible(&self, op: &MutateOperator) -> bool { + impl_stmt_match!(self, is_incompatible(op)) + } + pub fn mutate(&mut self, program: &mut FuzzProgram) -> eyre::Result { + impl_stmt_match!(self, mutate(program)) + } + pub fn det_mutate(&mut self, program: &mut FuzzProgram) -> eyre::Result { + impl_stmt_match!(self, det_mutate(program)) + } + pub fn mutate_by_op( + &mut self, + program: &mut FuzzProgram, + keys: &[FieldKey], + op: &MutateOperation, + ) -> eyre::Result<()> { + impl_stmt_match!(self, mutate_by_op(program, keys, op)) + } +} + +impl WeightedItem for FuzzStmt { + fn get_weight(&self) -> usize { + impl_stmt_match!(self, get_weight) + } +} + +impl WeightedItem for IndexedStmt { + fn get_weight(&self) -> usize { + self.stmt.get_weight() + } +} + +mod assert; +mod call; +mod file; +mod load; +mod update; + +pub use assert::*; \ No newline at end of file diff --git a/hopper-core/src/fuzz/stmt/update.rs b/hopper-core/src/fuzz/stmt/update.rs new file mode 100644 index 0000000..3c77a3a --- /dev/null +++ b/hopper-core/src/fuzz/stmt/update.rs @@ -0,0 +1,155 @@ +//! Update other statment's field by another +//! e.g update call statement 's return by a load statement + +use eyre::ContextCompat; + +use crate::utils; + +use super::*; + +impl WeightedItem for UpdateStmt {} + +impl StmtMutate for UpdateStmt {} + +/// Append update statement after call statement +/// update should used incompatiblely +pub fn append_update_stmt( + program: &mut FuzzProgram, + call: &mut CallStmt, +) -> eyre::Result { + if call.ret_ir.is_empty() || call.is_leaf() { + return Ok(MutateOperator::nop()); + } + let call_index = program.get_stub_stmt_index().context("no stub")?; + // no one use it as argument + if call_index.get_ref_used() <= 2 { + return Ok(MutateOperator::nop()); + } + // we want to skip the opaque type, especially those that are manually marked opaque type. + // However, ret_ir might not have a full structual layout, therefore + // we might still update a particular field of an partially exported opaque type + let mut unused = vec![]; + let mut opeaque_prefix = vec![]; + for (i, ir) in call.ret_ir.iter().enumerate() { + let ty_name = ir.value.type_name(); + let key = ir.fields.as_slice(); + if utils::is_opaque_type(ty_name) || utils::is_opaque_vec(ty_name) { + opeaque_prefix.push(key); + continue; + } + // skip those pointers inside opaque value + if opeaque_prefix.iter().any(|p| key.starts_with(p)) { + continue; + } + // skip private fields and empty list + if ir.state.is_private_field() || ir.value.get_length() == 0 { + continue; + } + // skip long vector for custom structure + if ir.value.get_length() > 1 && !utils::is_primitive_type(ty_name) { + continue; + } + if ir.used.is_none() { + crate::log!(trace, "key {key:?} is added for updated"); + unused.push(i); + } + } + // choose any unused + if let Some(ir_i) = rng::choose_iter(unused.into_iter()) { + let ir = &call.ret_ir[ir_i]; + // copy as load statement + let mut state = ir.state.clone_with_program(program); + // make call's ident to return + if ir.fields.as_slice() == [FieldKey::Pointer] { + state.set_ident(&call.ident); + } + let mut load = LoadStmt::new( + ir.value.clone(), + // this load stmt shares state with call_ret_ir + state, + ); + // then mutate load statement + let load_mutate_op = load.mutate(program)?; + let insert_i = call.ret_ir[ir_i + 1..] + .iter() + .find_map(|ir| ir.used.as_ref().map(|i| i.get())) + .unwrap_or_else(|| call_index.get() + 1); + let load_i = program.insert_stmt(insert_i + 1, load); + // add update statement + let dst = WeakLocation { + stmt_index: Some(call_index.downgrade()), + fields: call.ret_ir[ir_i].fields.clone(), + }; + let update = UpdateStmt::new(load_i, dst); + let update_i = program.insert_stmt(insert_i + 2, update); + call.ret_ir[ir_i].used = Some(update_i.downgrade()); + let ir = &call.ret_ir[ir_i]; + crate::log!( + trace, + "insert update : set call {}'s {} fields to {}", + call_index.get(), + ir.fields.serialize()?, + load_mutate_op.serialize()? + ); + return Ok(MutateOperator::stmt_op(MutateOperation::CallUpdate { + fields: ir.fields.clone(), + ops: vec![load_mutate_op], + })); + } + + Ok(MutateOperator::stmt_op(MutateOperation::Nop)) +} + +#[test] +fn test_update_mutate() { + use crate::{feedback, test}; + let mut program = FuzzProgram::default(); + let call_1 = program.append_stmt(FuzzStmt::Stub); + let mut call_last = test::generate_call_stmt("test_do_nothing1"); + call_last.ident = CallStmt::TARGET.to_string(); + call_last.contexts.push(call_1); + let _c = program.append_stmt(call_last); + // stub + let mut call = test::generate_call_stmt("create_test_ptr"); + let resource_states = feedback::ResourceStates::default(); + let val = Box::new(test::create_test_ptr()) as FuzzObject; + let ret = feedback::convert_ret_to_ir(&val, &resource_states); + println!("ret: {ret:?}"); + call.ret_ir = ret.ir; + if let Some(first) = call.ret_ir.first_mut() { + if first.fields.is_empty() { + first.used = Some(program.stmts[0].index.downgrade()); + } + } + // println!("ir: {:?}", call.ret_ir); + append_update_stmt(&mut program, &mut call).unwrap(); + append_update_stmt(&mut program, &mut call).unwrap(); + append_update_stmt(&mut program, &mut call).unwrap(); + let op = append_update_stmt(&mut program, &mut call).unwrap(); + assert!(op.is_nop()); + let _ = program.withdraw_stmt(call.into()); + println!("program: {}", program.serialize().unwrap()); + // the update fields should be from nested to shallow + let mut last_update_f_len = 1000; + for is in &program.stmts { + if let FuzzStmt::Update(update) = &is.stmt { + let f_len = update.dst.fields.len(); + assert!(f_len < last_update_f_len); + last_update_f_len = f_len; + } + } + + // test remove ret + println!("check remove ret"); + if let FuzzStmt::Call(call) = &mut program.stmts[0].stmt { + let last = call.ret_ir.pop().unwrap(); + println!("remove {}", last.fields.serialize().unwrap()); + program.check_update().unwrap(); + println!("program: {}", program.serialize().unwrap()); + for is in &program.stmts { + if let FuzzStmt::Update(update) = &is.stmt { + assert_ne!(update.dst.fields, last.fields); + } + } + } +} diff --git a/hopper-core/src/fuzz/weight.rs b/hopper-core/src/fuzz/weight.rs new file mode 100644 index 0000000..8077d84 --- /dev/null +++ b/hopper-core/src/fuzz/weight.rs @@ -0,0 +1,161 @@ +//! Choose item in slice with weight. +//! The slice's item should implement `WeightedItem` trait. +//! + +use crate::{ + fuzz::rng, FuzzProgram, FuzzStmt, LocFields, ObjectState, RcIndex, StmtIndex, TypeConstraint, +}; + +/// Define weight of a object +pub trait WeightedItem { + fn get_weight(&self) -> usize { + 0 + } +} + +pub fn get_weight_sum(items: &[T]) -> usize { + let mut sum = 0; + for item in items { + let w = item.get_weight(); + sum += w; + } + sum +} + +/// Choose element in slice by weight +pub fn choose_weighted(items: &[T]) -> Option { + if items.is_empty() { + return None; + } + let mut weights = vec![]; + let mut sum = 0; + for item in items { + let w = item.get_weight(); + sum += w; + weights.push(sum); + } + if sum == 0 { + return None; + } + let choose = rng::gen_range(0..sum); + crate::log!(trace, "slice weights: {:?}, choose: {}", weights, choose); + weights.iter().position(|w| choose < *w) +} + +/// Choose position based on state +pub fn choose_weighted_by_state(state: &ObjectState) -> Option { + let mut weights = vec![]; + let mut sum = 0; + for item in &state.children { + let w = item.mutate.borrow().get_weight(); + sum += w; + weights.push(sum); + } + if sum == 0 { + return None; + } + let choose = rng::gen_range(0..sum); + crate::log!(trace, "state weights: {:?}, choose: {}", weights, choose); + weights.iter().position(|w| choose < *w) +} + +impl FuzzProgram { + /// Update statements' weight from bottom to top + /// If the field or arg has constraints and fixed, we set its weight to 0 + pub fn update_weight(&self) { + for i in 0..self.stmts.len() { + let stmt = &self.stmts[i].stmt; + match stmt { + FuzzStmt::Call(call) => { + crate::inspect_function_constraint_with(&call.name, |fc| { + for (i, fc) in fc.arg_constraints.iter().enumerate() { + let arg_index = &call.args[i]; + self.update_type_weight(arg_index, fc, None); + } + Ok(()) + }) + .unwrap(); + } + FuzzStmt::Load(load) => { + if load.is_const { + let _ = load.state.replace_weight(0); + } + crate::iterate_type_constraint_with(|ty, tc| { + if tc + .list + .iter() + .any(|item| item.constraint.should_not_mutate()) + { + let stmt_index = &self.stmts[i].index; + let locs_containing_ty = + load.state.find_fields_with(|s| s.ty == ty, false); + for prefix in locs_containing_ty { + self.update_type_weight(stmt_index, tc, Some(prefix)); + } + } + Ok(()) + }) + .unwrap(); + } + _ => {} + } + } + } + + fn update_type_weight( + &self, + stmt_index: &StmtIndex, + tc: &TypeConstraint, + prefix: Option, + ) { + let prefix = prefix.unwrap_or_default(); + for item in tc.list.iter() { + if item.constraint.should_not_mutate() { + let loc = item.key.to_loc_for_refining(self, stmt_index, &prefix); + if loc.is_none() { + continue; + } + let loc = loc.unwrap(); + if let FuzzStmt::Load(load) = &self.stmts[loc.stmt_index.unwrap().get()].stmt { + let sub_state = load + .state + .get_child_by_fields(loc.fields.as_slice()) + .unwrap(); + let _ = sub_state.replace_weight(0); + load.state.update_weight_from_children(); + } + } + } + } +} + +impl ObjectState { + /// Update weight based on its children's weights + pub fn update_weight_from_children(&self) -> usize { + if self.children.is_empty() { + return self.mutate.borrow().get_weight(); + } + let sum = self + .children + .iter() + .map(|s| s.update_weight_from_children()) + .sum(); + + // anneal children's weight + self.mutate.borrow_mut().set_weight(1 + sum / 2); + sum + } + + /// Replace weight with the new value + pub fn replace_weight(&self, new_weight: usize) -> usize { + if new_weight == 0 { + self.done_deterministic(); + } + let mut mutate = self.mutate.borrow_mut(); + let weight = mutate.get_weight(); + if weight != new_weight { + mutate.set_weight(new_weight); + } + weight + } +} diff --git a/hopper-core/src/fuzzer.rs b/hopper-core/src/fuzzer.rs new file mode 100644 index 0000000..8ac2662 --- /dev/null +++ b/hopper-core/src/fuzzer.rs @@ -0,0 +1,617 @@ +//! Hopper fuzzer's main process, +//! it will setup the key modules, and generate and mutate inputs. + +use std::{ + io::Write, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time, +}; + +use eyre::Context; + +use crate::{config, depot::*, execute::*, feedback::*, fuzz::*, log, runtime::*, utils}; + +/// A really cool fuzzer +pub struct Fuzzer { + // -- Modules -- + pub executor: ForkCli, + pub observer: Observer, + pub depot: Depot, + // -- Stats -- + pub count: usize, + running: Arc, + rounds: usize, + stuck: usize, + start_at: time::Instant, + found_abort: bool, +} + +impl Fuzzer { + /// Setup a fuzzer! + pub fn new() -> eyre::Result { + let observer = Observer::new()?; + let executor = ForkCli::new(&observer.feedback)?; + let depot = Depot::new()?; + let running = new_running_state(); + let start_at = time::Instant::now(); + save_pid()?; + Ok(Self { + executor, + observer, + depot, + running, + rounds: 0, + count: 0, + stuck: 0, + start_at, + found_abort: false, + }) + } + + /// Main fuzz loop + pub fn fuzz_loop(&mut self) -> eyre::Result<()> { + // --- Pilot phase --- + // generate simple test cases for each functions, drawing skeletons of the inputs. + // pilot does not reuse statements, and only use single call + set_single_call(true); + set_reuse_stmt(false); + let is_restart = init_constraints()?; + self.executor.sync_all_configs()?; + if is_restart { + // sync existing seeds first + let _ = self.sync_depot()?; + } else { + // infer constraints at pilot phase + self.pilot_infer()?; + } + let density = self.observer.branches_state.get_coverage_density(); + if density > 15.0 { + log!( + warn, + "Density is too large (>= 15%)! Please try to increase map size !" + ); + } + + // --- Evolution phase --- + // randomly generate or mutates the statements, building diverse programs on the skeleton inputs. + while self.is_running() { + #[cfg(all(feature = "testsuite", not(test)))] + if self.check_testing()? { + break; + } + self.print_log(true); + let has_new = if config::ENABLE_MUTATE && cond_likely(self.rounds > 2500) { + self.mutate_round()? + } else { + self.generate_round()? + }; + self.check_stuck(has_new); + self.rounds += 1; + } + + self.print_log(false); + Ok(()) + } + + /// Check if the fuzzer is stuck or not. + /// Stuck indicates that the fuzzer has found nothing after N rounds. + /// We will set `single_call` to false , and `resuse_stmt` to true after stuck. + fn check_stuck(&mut self, has_new: bool) { + if has_new { + self.stuck = 0; + } else { + if self.stuck > config::ROUND_STUCK_NUM && is_single_call() { + log!( + info, + "Generate test with single call is stuck now ! Start generate multiple calls!" + ); + set_single_call(false); + set_reuse_stmt(true); + enable_call_det(); + } + self.stuck += 1; + } + } + + /// Pilot for specific function + pub fn pilot_generate_func(&mut self, f_name: &str) -> eyre::Result<()> { + log!(info, "pilot function: {f_name}"); + let mut fail_cnt = 0; + let prev_size = self.depot.inputs.size(); + for i in 0..config::ROUND_WARM_UP_NUM { + if !self.is_running() { + break; + } + log!(trace, "{i}-th generation in pilot {f_name}"); + set_incomplete_gen(false); + let program = FuzzProgram::generate_program_for_func(f_name)?; + // ignore some programs + if program.stmts.len() > config::MAX_STMTS_LEN || is_incomplete_gen() { + continue; + } + let mut save = true; + if let Some(target) = config::get_config().func_target { + if f_name != target { + save = false; + } + } + let (status, _) = self.run_program(&program, false, save)?; + // We skip the seed if it is easy to become failure after mutation, + if !status.is_normal() { + fail_cnt += 1; + if fail_cnt >= config::MAX_ROUND_FAIL_NUM { + log!( + warn, + "Pilot `{}` fail ! the func is easy to crash or hangs!", + f_name + ); + set_function_constraint_with(f_name, |fc| fc.can_succeed = false)?; + break; + } + } + } + log!( + info, + "find {} new seeds, failed {fail_cnt} times", + self.depot.inputs.size() - prev_size + ); + Ok(()) + } + + /// Check testing invoking + #[cfg(all(feature = "testsuite", not(test)))] + fn check_testing(&self) -> eyre::Result { + let (need_check, mut pass) = crate::check_contraints_in_testsuite(); + if need_check { + if std::env::var("TESTSUITE_ABORT").is_ok() && !self.found_abort { + pass = false; + } + } else { + // do not need check + pass = self.found_abort; + } + if pass { + log!(warn, "Test success at {}-th round!", self.rounds); + eyre::bail!(crate::HopperError::TestSuccess); + } else if self.rounds >= 500 { + log!(warn, "Test fail at {}-th round!", self.rounds); + return Ok(true); + } + Ok(false) + } + + /// Generate round + fn generate_round(&mut self) -> eyre::Result { + log!(debug, "start generation in round {}", self.rounds); + let mut round_has_new = false; + // functions that will choose to generate + let mut candidates = None; + // enable generate program for function failed to run. + let mut enable_fail = false; + // if we stuck in generate or mutate none new inputs, try to generate + // inputs for rarely or failed targets + if self.stuck > config::ROUND_STUCK_NUM { + if coin() { + // select rarely functions + candidates = self.observer.op_stat.get_rarely_fuzz_targets(); + } + if config::enable_gen_fail() { + enable_fail = true; + } + } + for i in 0..config::ROUND_GENERATE_NUM { + if !self.is_running() { + break; + } + log!(trace, "{i}-th generation in round {}", self.rounds); + set_incomplete_gen(false); + let program = FuzzProgram::generate_program(candidates.as_ref(), enable_fail)?; + // ignore some programs + if program.stmts.len() > config::MAX_STMTS_LEN || is_incomplete_gen() { + continue; + } + let (_, has_new) = self.run_program(&program, false, true)?; + if has_new { + round_has_new = true; + } + } + Ok(round_has_new) + } + + /// Mutate round + fn mutate_round(&mut self) -> eyre::Result { + let mut round_has_new = false; + let seed = self.depot.select_seed(); + if seed.is_none() { + return Ok(false); + } + let seed = seed.unwrap(); + let mut fail_cnt = 0; + let weight_sum = get_weight_sum(&seed.stmts); + let mut max = config::ROUND_MUTATE_NUM; + // if the program is simple, which does not deserve too many mutations + if weight_sum < 20 { + max /= 2; + } + let mut i = 0; + loop { + if !self.is_running() || i >= max { + break; + } + i += 1; + crate::log_trace!( + "{i}-th mutation for seed {} in round {}", + seed.id, + self.rounds + ); + set_incomplete_gen(false); + let mut p = seed.clone(); + p.mutate_program() + .with_context(|| p.serialize_all().unwrap())?; + // skip program without mutation + if p.ops.is_empty() || p.stmts.len() > config::MAX_STMTS_LEN || is_incomplete_gen() { + continue; + } + let (status, has_new) = self.run_program(&p, false, true)?; + if has_new { + round_has_new = true; + // give the seed more power if we have found something new from it. + // if max < 3 * config::ROUND_MUTATE_NUM && !status.is_timeout() { + // max += config::ROUND_MUTATE_NUM; + // } + } + // We skip the seed if it is easy to become failure after mutation, + if !status.is_normal() { + fail_cnt += 1; + if fail_cnt >= config::MAX_ROUND_FAIL_NUM { + log!(debug, "the seed is easy to crash or hangs after mutation!"); + break; + } + } + } + Ok(round_has_new) + } + + /// Sync depot from existing files. + fn sync_depot(&mut self) -> eyre::Result { + let inputs = self.depot.inputs.read_dir()?; + for f in inputs { + if !self.is_running() { + break; + } + log!(info, "sync existing inputs {:?}", f); + let buf = std::fs::read_to_string(&f)?; + let program = read_program(&buf, false)?; + self.run_program(&program, true, true)?; + } + // do not update their coverage + self.depot.inputs.update_size()?; + self.depot.hangs.update_size()?; + self.depot.crashes.update_size()?; + if self.depot.queue.is_empty() { + Ok(false) + } else { + Ok(true) + } + } + + /// Counting time for program' exeution + fn count_time(&mut self, program: &FuzzProgram) -> eyre::Result> { + // try to run multiple times, to check if it stable or not + let t_now = time::Instant::now(); + for _ in 0..config::RE_RUN_TIMES { + let status = self.executor.execute_program_fast(program)?; + // We do not like seeds may crash or hangs + if !status.is_normal() { + crate::log_warn!("fail to count time for program, status: {status:?}"); + crate::log_trace!("program: {program}"); + return Ok(None); + // it is not fit our `fast` mode, which may affect some global states or just not stable. + } + } + + Ok(Some( + t_now.elapsed().as_micros() / config::RE_RUN_TIMES as u128, + )) + } + + /// Run program, and save in depot if it has new feedback + fn run_program( + &mut self, + program: &FuzzProgram, + sync: bool, + save: bool, + ) -> eyre::Result<(StatusType, bool)> { + log!(trace, "run #program-{}", self.count); + log!(trace, "{}", program.serialize_all()?); + self.count += 1; + let status = self.executor.execute_program_fast(program)?; + let mut has_new = false; + if status.is_normal() { + has_new = self.handle_new_seed(program, status, sync, save)?; + // try to find out the relationship between op value and cmp. + self.observer.infer_cmp(program)?; + } else if status.is_ignore() { + log!(warn, "program is ignore: {}", program.serialize_all()?); + } else { + has_new = self.handle_new_crash(program, status, sync, save)?; + } + self.observer.op_stat.count_ops(program, status, has_new); + Ok((status, has_new)) + } + + pub fn handle_new_seed( + &mut self, + program: &FuzzProgram, + status: StatusType, + sync: bool, + save: bool, + ) -> eyre::Result { + let new_edges = self.observer.has_new_path(status)?; + if new_edges.is_empty() { + return Ok(false); + } + + // run again to check path to avoid some call in prev iteration modify environment, e.g. global variables. + let status = self.executor.execute_program(program)?; + self.count += 1; + let new_edges = self.observer.has_new_path(status)?; + if new_edges.is_empty() { + return Ok(false); + } + let mut fb_summary = self.observer.summary_feedback(status); + + let start_at = std::time::Instant::now(); + + // ---- initialize program that will be put on the queue + let mut p = program.clone_without_state()?; + eyre::ensure!( + p.stmts.len() == program.stmts.len() || (p.parent.is_some() && p.ops.is_empty()), + "inconsistent program: {program}" + ); + let id = self.depot.fetch_id(status); + p.id = id; + + // ----- infer new constraints for the seed + if !self.seed_infer(&p)?.is_empty() { + p.refine_program()?; + } + + // ----- minimize input + let _changed = self.minimize(&mut p, &status)?; + let min_secs = start_at.elapsed().as_secs_f32(); + + // ----- update coverage + crate::log!(trace, "id: {id}, new edges: {new_edges:?}"); + self.observer.merge_coverage(&new_edges, status); + + // ----- count time for caculate speed for the seed + let Some(time_used) = self.count_time(&p)? else { + return Ok(true); + }; + fb_summary.time_used = time_used; + let count_secs = start_at.elapsed().as_secs_f32() - min_secs; + + // ----- review the program to get more information + let status = self.executor.review_program(&p)?; + + if !status.is_normal() { + log!(warn, "fail to review program, id: {id}"); + if save { + self.depot.save(StatusType::Normal, &p, sync)?; + } + } else { + if save { + // save to disk + self.depot.save(status, &p, sync)?; + } else { + crate::log!(warn, "skip save queue id {id}"); + } + p.attach_with_review_result()?; + p.update_weight(); + // collect effective arguments + self.collect_effective_args(&p, &new_edges)?; + + // set calls to be trackable, and track 'un-track' functions + // it should do at the end!! + if p.set_calls_track_cov(true) { + // update coverage if has any 'un-track' function + log!(trace, "has un-track function, try to find new blocks."); + let status = self.executor.execute_program(&p)?; + self.observer + .update_summary(&mut fb_summary, status); + let new_edges_all = self.observer.has_new_path(status)?; + self.observer.merge_coverage(&new_edges_all, status); + } + if save { + // save to queue + p.parent = Some(p.id); + p.ops.clear(); + p.rng = None; + p.mutate_flag = 0; + self.depot.push_queue(p, &fb_summary)?; + } + } + + let review_secs = start_at.elapsed().as_secs_f32() - count_secs; + let cur_i = self.count; + crate::log_trace!("find new input at {cur_i} , min: {min_secs}s, count: {count_secs}, review: {review_secs}"); + + Ok(true) + } + + pub fn handle_new_crash( + &mut self, + program: &FuzzProgram, + status: StatusType, + sync: bool, + save: bool, + ) -> eyre::Result { + if status.is_abort() { + self.found_abort = true; + } + let mut fail_at = self.observer.feedback.last_stmt_index(); + let mut assert_failure = false; + let mut p = program.clone(); + p.ops = program.ops.clone(); + + // change failure stmt to the call stmt + if let Some(FuzzStmt::Assert(assert)) = p.stmts.get(fail_at).map(|is| &is.stmt) { + log!(trace, "assert failure"); + assert_failure = true; + if let Some(stmt) = assert.get_stmt() { + fail_at = stmt.get(); + } + } + + log!(trace, "fail at: {}, status: {:?}", fail_at, status); + if let Some(call) = p.get_call_stmt_mut(fail_at) { + call.failure = true; + } else { + // Sometimes `fail_at` == stmts.len(), + // may be double free in rust-side + log!(debug, "fail at runtime!"); + return Ok(false); + } + + // used for deduplication. It only track the crash function + let mut p_debup = p.clone(); + p_debup.set_calls_track_cov(false); + p_debup.get_call_stmt_mut(fail_at).unwrap().track_cov = true; + // crate::log!(info, "before set failure: {}", program.serialize()?); + // re-run it, and only track the crash location + let re_status = self.executor.execute_program(&p_debup)?; + if re_status.is_normal() { + // it seems to be not stable + return Ok(false); + } + let new_edges = self.observer.get_new_uniq_path(status); + if new_edges.is_empty() { + return Ok(false); + } + self.observer.merge_coverage(&new_edges, status); + + let id = self.depot.fetch_id(status); + p.id = id; + + // Here we sanitize the program to mark the possible false positive results. + // We infer the constraint for crash or timeout + let new_constraints = if assert_failure { + vec![] + } else if status.is_crash() { + self.crash_infer(&p) + .with_context(|| format!("crash update constraint failed: {p}"))? + } else if status.is_timeout() { + self.timeout_infer(&p) + .with_context(|| format!("timeout update constraint failed: {p}"))? + } else { + vec![] + }; + if save { + self.executor.sanitize_program(&p)?; + let mut sanitize_result = SanitizeResult::conclusion(&p)?; + sanitize_result.add_violated_constraints(&new_constraints)?; + + self.depot.save(status, &p, sync)?; + self.depot + .add_appendix(status, id, &sanitize_result.to_string())?; + } + Ok(true) + } + + /// is the loop should continue or not? + pub fn is_running(&self) -> bool { + self.running.load(Ordering::SeqCst) + } + + /// Print logs in terminal + fn print_log(&self, in_round: bool) { + if in_round && self.rounds % 50 != 0 { + return; + } + let dur = self.start_at.elapsed(); + let speed = utils::calculate_speed(self.count, dur); + let all_secs = dur.as_secs(); + let execute_log = format!( + "#round: {}, #exec: {} ({}), #speed: {:.2} ({})", + utils::format_count(self.rounds), + utils::format_count(self.count), + self.executor.usage.percent(all_secs), + speed, + self.executor.usage.avg_ms(), + ); + log!( + info, + "{} {}, {}, {}", + utils::format_time(all_secs), + self.depot, + self.observer.branches_state, + execute_log + ); + + #[cfg(feature = "verbose")] + { + log::info!(target: "{Status}", "{} {} {} {} {} {} {} {} {} {}", + dur.as_secs(), + self.depot.inputs.size(), + self.depot.crashes.size(), + self.depot.hangs.size(), + self.observer.branches_state.get_num_edge(), + self.observer.branches_state.get_coverage_density(), + self.rounds, + self.count, + speed, + self.executor.usage.avg_ms().trim_end_matches("ms"), + ); + log::info!(target: "{StatusOneShot}", "{} {} {} {} {} {} {} {} {} {}", + dur.as_secs(), + self.depot.inputs.size(), + self.depot.crashes.size(), + self.depot.hangs.size(), + self.observer.branches_state.get_num_edge(), + self.observer.branches_state.get_coverage_density(), + self.rounds, + self.count, + speed, + self.executor.usage.avg_ms().trim_end_matches("ms"), + ); + } + } +} + +/// Get a running state +fn new_running_state() -> Arc { + let running = Arc::new(AtomicBool::new(true)); + let r = running.clone(); + ctrlc::set_handler(move || { + log!(warn, "Ending Fuzzing."); + r.store(false, Ordering::SeqCst); + }) + .expect("Error setting SIGINT handler!"); + running +} + +fn save_pid() -> eyre::Result<()> { + let path = config::output_file_path("misc/pid"); + let mut f = std::fs::File::create(path)?; + let pid = std::process::id(); + f.write_all(pid.to_string().as_bytes())?; + crate::log!(info, "current pid: {}", pid); + Ok(()) +} + +impl Drop for Fuzzer { + fn drop(&mut self) { + if self.rounds == 0 { + return; + } + global_gadgets::get_instance() + .save_gadgets_to_file() + .unwrap(); + constraints::save_constraints_to_file().unwrap(); + effective::save_effective_args().unwrap(); + } +} diff --git a/hopper-core/src/lib.rs b/hopper-core/src/lib.rs new file mode 100644 index 0000000..6d90479 --- /dev/null +++ b/hopper-core/src/lib.rs @@ -0,0 +1,229 @@ +#![allow(ambiguous_glob_reexports)] + +mod config; +mod depot; +mod error; +mod execute; +mod feedback; +mod fuzz; +mod fuzzer; +mod runtime; +#[cfg(feature = "slices")] +pub mod slices; +#[cfg(test)] +mod test; +mod utils; + +pub use config::*; +pub use depot::*; +pub use error::*; +pub use execute::*; +pub use feedback::*; +pub use fuzz::*; +pub use fuzzer::Fuzzer; +pub use runtime::*; +pub use utils::*; + +/// Do some init work for harness +fn init_harness() -> eyre::Result<()> { + execute::install_signal_handler(); + reserve_fds(); + read_existing_opaue()?; + Ok(()) +} + +/// Reverse some fds, so they can't be allocated by the code. +/// We can set the variables to them, and then infer constraints. +fn reserve_fds() { + for fd in config::RESERVED_FD_MIN..=config::RESERVED_FD_MAX { + // check fd is used or not + if unsafe { libc::fcntl(fd, libc::F_GETFD) != -1 } + || std::io::Error::last_os_error().raw_os_error().unwrap_or(0) != libc::EBADF + { + continue; + } + // if not used, try to fill sth on it + unsafe { libc::dup2(0, fd) }; + // crate::log!(trace, "reserve fd: {fd}"); + } +} + +/// Run fork server to execute testing program +pub fn run_fork_server() -> eyre::Result<()> { + init_harness()?; + let mut fork_server = execute::ForkSrv::new()?; + fork_server.fork_loop() +} + +/// Run program from input file +pub fn run_program(file: &str, cmd: ForkCmd) -> eyre::Result<()> { + init_harness()?; + let start_at = std::time::Instant::now(); + crate::log!(info, "file: {}", file); + let mut feedback = feedback::Feedback::new()?; + // read + let buf = std::fs::read_to_string(file)?; + crate::log!(info, "program:\n{}", &buf); + let mut program = read_program(&buf, config::USE_CANARY)?; + feedback.clear(); + feedback::disable_coverage_feedback(); + // run + let p_start_at = std::time::Instant::now(); + let f = || match cmd { + ForkCmd::Sanitize => { + let ret = program.sanitize(); + if let Err(err) = &ret { + crate::log!(error, "call error: {:?}", err); + } + ret + } + ForkCmd::Review => { + let ret = program.review(); + if let Err(err) = &ret { + crate::log!(error, "call error: {:?}", err); + } + ret + } + ForkCmd::Execute => { + let start_at_inner = std::time::Instant::now(); + let ret = program.eval(); + if let Err(err) = &ret { + crate::log!(error, "call error: {:?}", err); + } + crate::log!( + trace, + "exec time(inner): {} micro seconds", + start_at_inner.elapsed().as_micros() + ); + ret + } + _ => Ok(()), + }; + let fork = !std::env::args().any(|f| f == "--nofork"); + if fork { + let mut executor = execute::Executor::default(); + let timeout_setting = + std::env::var(config::TIMEOUT_LIMIT_VAR).unwrap_or_else(|_| "1".to_string()); + let timeout_limit = std::time::Duration::from_secs(timeout_setting.parse()?); + crate::log!(info, "timeout setting: {:?}", timeout_limit); + executor.set_timeout(timeout_limit); + let status = executor.execute(f); + crate::log!(info, "status: {:?}", status); + if !status.is_normal() { + crate::log!(info, "segv addr: {:#02X}", { feedback.instrs.segv_addr }); + crate::log!(info, "rip addr: {:#02X}", { feedback.instrs.rip_addr }); + } + if let ForkCmd::Sanitize = cmd { + let sanitize_result = SanitizeResult::conclusion(&program)?; + crate::log!(info, "sanitize result: {:?}", sanitize_result); + } else if let ForkCmd::Review = cmd { + program.attach_with_review_result()?; + } + } else { + let _ret = execute::Executor::execute_fn(f); + }; + + let secs = p_start_at.elapsed().as_micros(); + crate::log!(info, "exec time: {} micro seconds", secs); + // feedback + crate::log!(info, "last stmt index: {}", feedback.last_stmt_index()); + let path = feedback.path.get_list(); + crate::log!(info, "path: {:?}", path); + let br = std::env::args().any(|f| f == "--br"); + if br { + let branches = GlobalBranches::load_from_file(); + let has_new = branches.has_new(&path, execute::StatusType::default()); + crate::log!(warn, "has_new: {}", !has_new.is_empty()); + } + let show_cmp = std::env::args().any(|f| f == "--cmp"); + if show_cmp { + feedback.instrs.cmp_iter(None).for_each(|c| c.log_cmp()); + } + crate::log!(info, "cmp_len: {}", feedback.instrs.cmp_len()); + crate::log!(info, "mem_len: {}", feedback.instrs.mem_len()); + crate::log!(info, "path_len: {}", path.len()); + let secs = start_at.elapsed().as_micros(); + crate::log!(info, "whole time: {} micro seconds", secs); + // Avoid freed objects drop again + std::mem::forget(program.stmts); + canary::clear_canary_protection(); + Ok(()) +} + +/// Run Hopper fuzzer +pub fn run_fuzzer() -> eyre::Result<()> { + check_gadgets()?; + let mut fuzzer = fuzzer::Fuzzer::new()?; + fuzzer.fuzz_loop() +} + +/// Check and print gadgets +fn check_gadgets() -> eyre::Result<()> { + let gadgets = global_gadgets::get_instance(); + crate::log!(info, "gadgets: {:?}", gadgets); + gadgets.check() +} + +/// Crate a fuzzer for debuging +pub fn create_fuzzer() -> eyre::Result { + let config = config::get_config_mut(); + config.timeout_limit = 1_u64; + let fuzzer = fuzzer::Fuzzer::new()?; + crate::init_constraints()?; + check_gadgets()?; + Ok(fuzzer) +} + +/// expose infer crash for debug +pub fn infer_crash(file: &str) -> eyre::Result<()> { + let mut fuzzer = create_fuzzer()?; + let buf = std::fs::read_to_string(file)?; + let program = crate::read_program(&buf, false)?; + let status = fuzzer.executor.execute_program(&program)?; + if status.is_normal() { + let list = fuzzer.seed_infer(&program)?; + crate::log!(info, "found constraints: {list:?}"); + } + if status.is_crash() { + let infer_length = std::env::args().any(|f| f == "--length"); + let infer_padding = std::env::args().any(|f| f == "--padding"); + if infer_length || infer_padding { + let fail_at = program.get_fail_stmt_index().unwrap().get(); + if let Some(mut crash_sig) = crate::get_crash_sig(Some(&program)) { + crash_sig.hash = fuzzer.observer.feedback.path.hash_trace(); + let c = if infer_length { + fuzzer.crash_infer_number_length(&program, fail_at, &crash_sig)? + } else { + fuzzer.infer_array_length(&program, fail_at, &crash_sig)? + }; + crate::log!(info, "found constraints: {c:?}"); + } + return Ok(()); + } + let list = fuzzer.crash_infer(&program)?; + crate::log!(info, "found constraints: {list:?}"); + } + if status.is_timeout() { + let list = fuzzer.timeout_infer(&program)?; + crate::log!(info, "found constraints: {list:?}"); + } + Ok(()) +} + +/// expose minimize for debug +pub fn minimize_input(file: &str) -> eyre::Result<()> { + let mut fuzzer = create_fuzzer()?; + let buf = std::fs::read_to_string(file)?; + let mut program = crate::read_program(&buf, false)?; + crate::parse_program_extra(&buf, &mut program)?; + if let Some(parent) = program.parent { + let p = read_input_in_queue(parent)?; + fuzzer.depot.push_queue(p, &FeedbackSummary::default())?; + } + let status = fuzzer.executor.execute_program(&program)?; + let minimized = fuzzer.minimize(&mut program, &status)?; + if minimized { + crate::log!(info, "input is minimized"); + } + Ok(()) +} diff --git a/hopper-core/src/runtime/func.rs b/hopper-core/src/runtime/func.rs new file mode 100644 index 0000000..32c4f5b --- /dev/null +++ b/hopper-core/src/runtime/func.rs @@ -0,0 +1,164 @@ +//! Traits for function we are fuzzing + +use downcast_rs::Downcast; +use std::fmt; + +use super::*; +use crate::ObjGenerate; + +pub type IgnoredFnPointer = ::std::ffi::c_void; + +/// Function signature +pub trait FnSignature: 'static { + fn arg_type_names() -> Vec<&'static str>; + fn ret_type_name() -> Option<&'static str>; + fn canary_fn_pointer() -> Self; +} + +/// Fucntion that we can fuzz +pub trait FnFuzzable: 'static + Sync + Send + Downcast { + fn get_arg_type_names(&self) -> Vec<&'static str>; + fn get_ret_type_name(&self) -> Option<&'static str>; + fn eval(&self, args: &[&FuzzObject]) -> FuzzObject; + fn add_type_gadgets(&self, gadgets: &mut ProgramGadgets); +} + +downcast_rs::impl_downcast!(FnFuzzable); + +macro_rules! init_fuzz_fn { + ($($name: ident),*) => { + init_fuzz_fn!( @internal | => $($name),* | |); + init_fuzz_fn!( @internal unsafe | => $($name),* | |); + init_fuzz_fn!( @internal unsafe | extern "C" => $($name),* | |); + }; + ($($name: ident),*, ...) => { + init_fuzz_fn!( @internal unsafe | extern "C" => $($name),* | "..." | ...); + }; + ( @internal $($unsafe_mark:ident)? | $($abi_mark:ident)? $($abi_name:literal)? => $($name: ident),* | $($dots_literal: literal)? | $($dots: tt)?) => { + +impl FnSignature for $($unsafe_mark)? $($abi_mark)? $($abi_name)? fn ($($name),*$(,$dots)?) -> T { + fn arg_type_names() -> Vec<&'static str> { + vec![ $(std::any::type_name::<$name>(), )* $($dots_literal)?] + } + fn ret_type_name() -> Option<&'static str> { + if !T::is_void() { + return Some(std::any::type_name::()) + } + None + } + fn canary_fn_pointer() -> Self { + let ptr = canary::get_canary_begin(); + unsafe { std::mem::transmute::<*const u8, Self>(ptr) } + } +} + +impl FnFuzzable for $($unsafe_mark)? $($abi_mark)? $($abi_name)? fn ($($name),*$(,$dots)?) -> T { +#[allow(non_snake_case)] +fn eval(&self, args: &[&FuzzObject]) -> FuzzObject { + let self_ = *self; + let a: ($($name,)*) = ArgFrom::downcast(args); + let ( $($name,)* ) = a.clone(); + let r = $($unsafe_mark)? { self_($($name),*) }; + Box::new(r) +} +fn add_type_gadgets(&self, _gadgets: &mut ProgramGadgets) { + $( + _gadgets.add_type::<$name>(); + )* + _gadgets.add_type::(); +} +fn get_arg_type_names(&self) -> Vec<&'static str> { + Self::arg_type_names() +} +fn get_ret_type_name(&self) -> Option<&'static str> { + Self::ret_type_name() +} +}}} + +init_fuzz_fn!(); +init_fuzz_fn!(A); +init_fuzz_fn!(A, B); +init_fuzz_fn!(A, B, C); +init_fuzz_fn!(A, B, C, D); +init_fuzz_fn!(A, B, C, D, E); +init_fuzz_fn!(A, B, C, D, E, F); +init_fuzz_fn!(A, B, C, D, E, F, G); +init_fuzz_fn!(A, B, C, D, E, F, G, H); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I, J); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I, J, K); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I, J, K, L); + +init_fuzz_fn!(A, ...); +init_fuzz_fn!(A, B, ...); +init_fuzz_fn!(A, B, C, ...); +init_fuzz_fn!(A, B, C, D, ...); +init_fuzz_fn!(A, B, C, D, E, ...); +init_fuzz_fn!(A, B, C, D, E, F, ...); +init_fuzz_fn!(A, B, C, D, E, F, G, ...); +init_fuzz_fn!(A, B, C, D, E, F, G, H, ...); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I, ...); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I, J, ...); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I, J, K, ...); +init_fuzz_fn!(A, B, C, D, E, F, G, H, I, J, K, L, ...); + +/// Cast vector of FuzzObject to a tuple with their original types +trait ArgFrom { + fn downcast(args: &[&FuzzObject]) -> Self; +} + +impl ArgFrom for () { + fn downcast(_args: &[&FuzzObject]) -> Self {} +} + +macro_rules! impl_cast_for_single_tuple { + ($(($type_param:ident, $tuple_index:tt),)*) => { + impl<$($type_param),*> ArgFrom for ($($type_param,)*) + where $($type_param: ObjFuzzable + ObjGenerate,)* + { + fn downcast(args: &[&FuzzObject]) -> ($($type_param,)*) { + ( $( + args.get($tuple_index).unwrap().downcast_ref::<$type_param>().map_or_else( + || *$type_param::cast_from(args.get($tuple_index).unwrap()), + |v| v.clone()), + )* ) + } + } + }; +} + +macro_rules! impl_cast_for_tuples { + (@internal [$($acc:tt,)*]) => { }; + (@internal [$($acc:tt,)*] ($type_param:ident, $tuple_index:tt), $($rest:tt,)*) => { + impl_cast_for_single_tuple!($($acc,)* ($type_param, $tuple_index),); + impl_cast_for_tuples!(@internal [$($acc,)* ($type_param, $tuple_index),] $($rest,)*); + }; + ($(($type_param:ident, $tuple_index:tt),)*) => { + impl_cast_for_tuples!(@internal [] $(($type_param, $tuple_index),)*); + }; +} + +impl_cast_for_tuples! { + (A, 0), + (B, 1), + (C, 2), + (D, 3), + (E, 4), + (F, 5), + (G, 6), + (H, 7), + (I, 8), + (J, 9), + (K, 10), + (L, 11), + (M, 12), +} + +impl fmt::Debug for dyn FnFuzzable { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FuzzFn") + .field("arg", &self.get_arg_type_names()) + .field("ret", &self.get_ret_type_name()) + .finish() + } +} diff --git a/hopper-core/src/runtime/gadgets.rs b/hopper-core/src/runtime/gadgets.rs new file mode 100644 index 0000000..99445ac --- /dev/null +++ b/hopper-core/src/runtime/gadgets.rs @@ -0,0 +1,421 @@ +//! Gadgets for fuzzing +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + fmt, +}; + +use crate::{runtime::*, utils, ObjGenerate}; + +/// Gadgets for generate programs. +/// +/// It includes functions, struct types, and so on. +pub struct ProgramGadgets { + /// Function gadgets used for create calls. + pub functions: BTreeMap, + /// Type gadgets used for create objects by `generate_new` or `deserialize` ... + pub types: BTreeMap, + + /// Ret Graph: mapping return type -> function + pub ret_graph: HashMap<&'static str, Vec<&'static str>>, + /// Arg Graph: mapping arg type -> (function, index) + pub arg_graph: HashMap<&'static str, Vec<(&'static str, usize)>>, + + /// Opaque types + pub opaque_types: HashSet, + + /// Type alias mapping of struct fields + pub field_alias: HashMap<(&'static str, &'static str), &'static str>, + + /// Ty Strings + pub ty_strings: HashSet, +} + +/// Function Gadget +#[derive(Debug, Clone)] +pub struct FnGadget { + pub f_name: &'static str, + pub f: &'static dyn FnFuzzable, + pub arg_idents: &'static [&'static str], + pub arg_types: &'static [&'static str], + pub ret_type: Option<&'static str>, + pub alias_arg_types: Vec<&'static str>, + pub alias_ret_type: Option<&'static str>, +} + +impl Default for ProgramGadgets { + fn default() -> Self { + let mut gadgets = Self { + functions: BTreeMap::new(), + types: BTreeMap::new(), + ret_graph: HashMap::new(), + arg_graph: HashMap::new(), + opaque_types: HashSet::new(), + field_alias: HashMap::new(), + ty_strings: HashSet::new(), + }; + gadgets.init_primitive_type(); + gadgets + } +} + +impl ProgramGadgets { + /// Init type gadgets for primitive types + /// + /// These types could only used in pointers or cast cases. + pub fn init_primitive_type(&mut self) { + macro_rules! add_primitive_type { + ( $($name:ident),* ) => { + $( + self.add_type_with_pointer::<$name>(); + )* + } + } + add_primitive_type!(u8, i8, u16, i16, u32, i32, u64, i64, f32, f64, char, bool, RetVoid); + } + + /// Check gadgets are valid or not + pub fn check(&self) -> eyre::Result<()> { + if self.functions.is_empty() { + eyre::bail!("Can't find any function for gadgets!"); + } + if self.types.is_empty() { + eyre::bail!("Can't find any type for gadgets!"); + } + read_existing_opaue()?; + Ok(()) + } + + /// Add function gadget + /// + /// We also will add types they used to type gadgets map. + pub fn add_function( + &mut self, + f_name: &'static str, + f: &'static dyn FnFuzzable, + arg_idents: &'static [&'static str], + alias_arg_types: &'static [&'static str], + alias_ret_type: Option<&'static str>, + ) { + // ignore func starts with "_" + if f_name.starts_with('_') { + return; + } + f.add_type_gadgets(self); + let fg = FnGadget { + f_name, + f, + arg_idents, + arg_types: f.get_arg_type_names().leak(), + ret_type: f.get_ret_type_name(), + alias_arg_types: Vec::from(alias_arg_types), + alias_ret_type, + }; + self.functions.insert(f_name.to_string(), fg); + } + + /// Get function caller by their function name + pub fn get_func_caller(&self, func_name: &str) -> eyre::Result<&dyn FnFuzzable> { + self.functions + .get(func_name) + .map(|fg| fg.f) + .ok_or_else(|| eyre::eyre!("Can't find any function caller for `{}`", func_name)) + } + + /// Get function gadget by their function name + pub fn get_func_gadget(&self, func_name: &str) -> eyre::Result<&FnGadget> { + self.functions + .get(func_name) + .ok_or_else(|| eyre::eyre!("Can't find any function gadget for `{}`", func_name)) + } + + /// Add opaque type + pub fn add_opaque_type(&mut self, ty: &str) { + crate::log_new_opaque(ty); + self.opaque_types.insert(ty.to_string()); + } + + /// Add type gadget + pub fn add_type(&mut self) { + let type_name = std::any::type_name::(); + if !self.types.contains_key(type_name) { + self.types + .insert(type_name.to_string(), FuzzTypeHolder::::builder()); + T::add_fields_to_gadgets(self); + } + } + + pub fn add_type_with_pointer(&mut self) { + self.add_type::(); + let type_name = std::any::type_name::(); + if !utils::is_pointer_type(type_name) { + self.types.insert( + utils::const_pointer_type(type_name), + FuzzTypeHolder::>::builder(), + ); + self.types.insert( + utils::mut_pointer_type(type_name), + FuzzTypeHolder::>::builder(), + ); + T::add_fields_to_gadgets(self); + } + } + + /// Add alias type for type inside struct + /// ident: field_name@sturct_name + pub fn add_field_alias_type( + &mut self, + ident: &'static str, + alias_type: &'static str, + ) { + let type_name = std::any::type_name::(); + let key = (ident, type_name); + if let Some(v) = self.field_alias.get_mut(&key) { + // if the alias has conflict + if alias_type != *v { + *v = "-"; + } + return; + } + self.field_alias.insert(key, alias_type); + } + + /// Get field's alias type + /// ident: field_name@struct_name + pub fn get_field_alias_type<'a>(&self, ident: &str, type_name: &'a str) -> &'a str { + if let Some(ty) = self.field_alias.get(&(ident, type_name)) { + // if there exists any conflict, return type without alias + if *ty != "-" { + return ty; + } + } + type_name + } + + /// Get object builder + pub fn get_object_builder<'a>( + &'a self, + type_name: &str, + ) -> eyre::Result<&'a FuzzObjectBuilder> { + self.types + .get(type_name) + .ok_or_else(|| eyre::eyre!("Can't find any type gadget for `{}`", type_name,)) + } + + /// Build some graphs for relationship between types and functions + pub fn build_graph(&mut self) { + // crate::log!(trace, "build graph"); + // should be done both in fuzzer and harness + for (type_name, builder) in &self.types { + if builder.is_opaque() { + self.opaque_types.insert(type_name.to_string()); + } + } + self.opaque_types.insert("hopper::runtime::FuzzVoid".into()); + + // ignore build graph in harness + if let Ok(path) = std::env::current_exe() { + if path.as_os_str().to_str().unwrap().ends_with("harness") { + return; + } + } + self.build_arg_and_ret_graph(); + } + + /// Build graph for function's arg and return + pub fn build_arg_and_ret_graph(&mut self) { + self.ret_graph.clear(); + for fg in self.functions.values() { + if let Some(ret_type) = fg.ret_type { + let list = self.ret_graph.entry(ret_type).or_default(); + list.push(fg.f_name); + } + if let Some(ret_type) = fg.alias_ret_type { + let list = self.ret_graph.entry(ret_type).or_default(); + list.push(fg.f_name); + } + } + self.arg_graph.clear(); + for fg in self.functions.values() { + for i in 0..fg.alias_arg_types.len() { + let arg_type = fg.arg_types[i]; + let alias_type = fg.alias_arg_types[i]; + if utils::is_pointer_type(arg_type) { + let list = self.arg_graph.entry(arg_type).or_default(); + list.push((fg.f_name, i)); + let list = self.arg_graph.entry(alias_type).or_default(); + list.push((fg.f_name, i)); + } + } + } + } + + pub fn save_gadgets_to_file(&self) -> eyre::Result<()> { + if cfg!(test) { + return Ok(()); + } + use std::io::Write; + let path = crate::config::output_file_path("misc/gadgets.log"); + let mut f = std::fs::File::create(path)?; + writeln!(f, "functions:")?; + for fg in &self.functions { + writeln!(f, "{fg:?}")?; + } + writeln!(f, "types:")?; + writeln!(f, "{:?}", self.types.keys().collect::>())?; + writeln!(f, "opaques:")?; + writeln!(f, "{:?}", self.opaque_types)?; + + writeln!(f, "field alias:").unwrap(); + writeln!(f, "{:?}", self.field_alias)?; + + writeln!(f, "ret graph:").unwrap(); + for r in &self.ret_graph { + writeln!(f, "{r:?}")?; + } + Ok(()) + } + + pub fn init_ty_strings(&mut self) { + self.types.keys().for_each(|ty| { + self.ty_strings.insert(ty.clone()); + }); + } +} + +impl fmt::Debug for ProgramGadgets { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut s = f.debug_struct("ProgramGadgets"); + s.field("functions", &self.functions.keys()); + s.field("types", &self.types.keys()); + s.finish() + } +} + +/// Hook for adding gadgets at runtime by ctor +#[cfg(feature = "ctor_hook")] +pub mod ctor_hook { + use super::ProgramGadgets; + + /// Global variable for gadgets that will fill data at ctor + pub static mut GADGETS: Option = None; + + // We assume gadgets can't be mutated while running, therefore it is safe to share between threads. + unsafe impl Sync for ProgramGadgets {} +} + +/// Hook for adding gadgets using linkme approach +/// +// Use linkme to find all functions that handle gadgets during compilation and linking +// It's more secure and efficient that won't call ctor multiple times. +#[cfg(feature = "link_hook")] +pub mod link_hook { + use super::ProgramGadgets; + use linkme::distributed_slice; + use once_cell::sync::OnceCell; + + #[distributed_slice] + pub static HOPPER_FN_GADGET_PROVIDERS: [fn(&mut ProgramGadgets)] = [..]; + + #[distributed_slice(HOPPER_FN_GADGET_PROVIDERS)] + fn test_link_hook_works(_g: &mut ProgramGadgets) { + // Just for test and print some logs. + crate::log!(info, "link hook works!"); + } + + pub static GADGETS_INSTANCE: OnceCell = OnceCell::new(); +} + +/// Gadegets used as global variables. It can be accessed at anywhere. +pub mod global_gadgets { + use crate::ProgramGadgets; + + /// Get gadgets instance + /// + /// ProgramGadgets are initialized at this function. + #[cfg(feature = "link_hook")] + pub fn get_instance() -> &'static ProgramGadgets { + use super::link_hook; + link_hook::GADGETS_INSTANCE.get_or_init(|| { + let mut gadgets = ProgramGadgets::default(); + for provider in link_hook::HOPPER_FN_GADGET_PROVIDERS { + provider(&mut gadgets); + } + gadgets + }) + } + + /// Get gadgets instance + /// + /// ProgramGadgets are initialized at ctor. This function just used for fetch the data. + /// It can be called once in ctor feature since we take and return the ownershop directly. + #[cfg(feature = "ctor_hook")] + pub fn get_instance() -> &'static ProgramGadgets { + use super::ctor_hook; + unsafe { + if ctor_hook::GADGETS.is_none() { + ctor_hook::GADGETS = Some(ProgramGadgets::default()); + } + ctor_hook::GADGETS.as_ref().unwrap() + } + } + + #[cfg(feature = "ctor_hook")] + pub fn get_mut_instance() -> &'static mut ProgramGadgets { + use super::ctor_hook; + unsafe { + if ctor_hook::GADGETS.is_none() { + ctor_hook::GADGETS = Some(ProgramGadgets::default()); + } + ctor_hook::GADGETS.as_mut().unwrap() + } + } +} + +#[test] +fn test_gadgets() { + // gadgets_test_setup::test_setup(); + let instance = global_gadgets::get_instance(); + assert!(instance.check().is_ok()); + let func = instance.get_func_caller("func_add").unwrap(); + assert_eq!(func.get_arg_type_names(), &["u8", "u8"]); + assert_eq!(func.get_ret_type_name(), Some("u8")); + assert!(instance.get_object_builder("u64").is_ok()); +} + +// logging for constraint updates +pub fn log_new_opaque(content: &str) { + #[cfg(test)] + { + print!("log opaque types: {content}"); + } + #[cfg(not(test))] + { + use std::io::prelude::*; + let path = crate::config::output_file_path("misc/opaque.log"); + let mut f = std::fs::OpenOptions::new() + .append(true) + .create(true) + .open(path) + .unwrap(); + writeln!(f, "{content}").unwrap(); + } +} + +pub fn read_existing_opaue() -> eyre::Result<()> { + if cfg!(test) { + return Ok(()); + } + let path = crate::config::output_file_path("misc/opaque.log"); + if !path.exists() { + return Ok(()); + } + use std::io::prelude::*; + let buf = std::fs::read(path)?; + for line in buf.lines() { + let ty= line?; + if !ty.is_empty() { + global_gadgets::get_mut_instance().opaque_types.insert(ty); + } + } + Ok(()) +} diff --git a/hopper-core/src/runtime/loc.rs b/hopper-core/src/runtime/loc.rs new file mode 100644 index 0000000..385b399 --- /dev/null +++ b/hopper-core/src/runtime/loc.rs @@ -0,0 +1,664 @@ +use std::fmt; + +use super::*; + +/// Field key +/// use for describe the place/location of a value inside a *statement* +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum FieldKey { + Index(usize), + Field(String), + Root(String), + Pointer, + Option, +} + +impl FieldKey { + pub fn as_str(&self) -> eyre::Result<&str> { + match self { + FieldKey::Root(s) => Ok(s), + FieldKey::Field(s) => Ok(s), + FieldKey::Index(_i) => Ok("$index"), + FieldKey::Pointer => Ok("$ptr"), + FieldKey::Option => Ok("$opt"), + } + } + + pub fn as_usize(&self) -> eyre::Result { + match self { + FieldKey::Index(i) => Ok(*i), + _ => eyre::bail!("field is not index!"), + } + } + + pub fn is_index(&self) -> bool { + matches!(self, FieldKey::Index(_)) + } + + pub fn union_root() -> Self { + Self::Root(crate::UNION_ROOT.to_string()) + } + + pub fn is_union_root(&self) -> bool { + if let FieldKey::Root(tag) = self { + return tag == crate::UNION_ROOT; + } + false + } +} + +impl From for FieldKey { + fn from(i: usize) -> Self { + FieldKey::Index(i) + } +} + +impl From for FieldKey { + fn from(f: String) -> Self { + FieldKey::Field(f) + } +} + +impl From<&str> for FieldKey { + fn from(f: &str) -> Self { + FieldKey::Field(f.to_string()) + } +} + +pub trait FieldEqual { + fn eq_field(&self, entry: &FieldKey) -> bool; + fn as_field_key(&self) -> FieldKey; +} + +impl FieldEqual for usize { + fn eq_field(&self, entry: &FieldKey) -> bool { + match entry { + FieldKey::Index(i) => i == self, + _ => false, + } + } + fn as_field_key(&self) -> FieldKey { + FieldKey::Index(*self) + } +} + +impl FieldEqual for &str { + fn eq_field(&self, entry: &FieldKey) -> bool { + match entry { + FieldKey::Field(f) => f == self, + _ => false, + } + } + fn as_field_key(&self) -> FieldKey { + FieldKey::Field(self.to_string()) + } +} + +impl FieldEqual for &FieldKey { + fn eq_field(&self, entry: &FieldKey) -> bool { + *self == entry + } + fn as_field_key(&self) -> FieldKey { + (*self).clone() + } +} + +/// Location +/// use for describe the place/location of a value inside a *program* +#[derive(Debug, Clone, PartialEq)] +pub struct Location { + pub stmt_index: Option, + pub fields: LocFields, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct WeakLocation { + pub stmt_index: Option, + pub fields: LocFields, +} + +pub trait RcLocation { + /// Get the index of statement + fn get_index(&self) -> eyre::Result<&dyn RcIndex>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Default, Hash)] +pub struct LocFields { + pub list: Vec, +} + +impl LocFields { + pub fn new(list: Vec) -> Self { + Self { list } + } + + pub fn push(&mut self, item: FieldKey) { + self.list.push(item); + } + + pub fn pop(&mut self) { + self.list.pop(); + } + + pub fn as_slice(&self) -> &[FieldKey] { + self.list.as_slice() + } + + pub fn len(&self) -> usize { + self.list.len() + } + + pub fn is_empty(&self) -> bool { + self.list.is_empty() + } +} + +impl RcLocation for Location { + fn get_index(&self) -> eyre::Result<&dyn RcIndex> { + Ok(self + .stmt_index + .as_ref() + .ok_or_else(|| eyre::eyre!("loc is null"))?) + } +} + +impl RcLocation for WeakLocation { + fn get_index(&self) -> eyre::Result<&dyn RcIndex> { + Ok(self + .stmt_index + .as_ref() + .ok_or_else(|| eyre::eyre!("loc is null"))?) + } +} + +impl Location { + /// Create new location + pub fn new(stmt_index: StmtIndex, fields: LocFields) -> Self { + Self { + stmt_index: Some(stmt_index), + fields, + } + } + + /// Create a null location + pub fn null() -> Self { + Self { + stmt_index: None, + fields: LocFields::default(), + } + } + + /// Create a location directing to a statement + pub fn stmt(stmt_index: StmtIndex) -> Self { + Self { + stmt_index: Some(stmt_index), + fields: LocFields::default(), + } + } + + /// Use this location in other places. + pub fn use_loc(&self) -> Self { + Self { + stmt_index: self.stmt_index.as_ref().map(|i| i.use_index()), + fields: self.fields.clone(), + } + } + + /// Duplicate a new loc with the same index (but with different reference) + pub fn dup(&self) -> Self { + Self { + stmt_index: self.stmt_index.as_ref().map(|i| i.dup()), + fields: self.fields.clone(), + } + } + + /// Set this location with a new statement index + pub fn set_index(&mut self, index: StmtIndex) { + self.stmt_index = Some(index); + } + + /// Is the location is null or not + pub fn is_null(&self) -> bool { + self.stmt_index.is_none() + } + + /// Convert to weak location + pub fn to_weak_loc(&self) -> WeakLocation { + WeakLocation { + stmt_index: self.stmt_index.as_ref().map(|i| i.downgrade()), + fields: self.fields.clone(), + } + } + + /// Compare with weak location + pub fn compare_weak(&self, weak_loc: &WeakLocation) -> bool { + let weak_index = weak_loc.stmt_index.as_ref().map(|i| i.upgrade().unwrap()); + self.stmt_index == weak_index && self.fields == weak_loc.fields + } +} + +impl WeakLocation { + /// Create a null location + pub fn null() -> Self { + Self { + stmt_index: None, + fields: LocFields::default(), + } + } + + /// Is the location is null or not + pub fn is_null(&self) -> bool { + self.stmt_index.is_none() + } + + /// Set this location with a new statement index + pub fn set_index(&mut self, index: StmtIndex) { + self.stmt_index = Some(index.downgrade()); + } + + /// Check if the location is released or not + pub fn is_released(&self) -> bool { + if let Some(index) = &self.stmt_index { + return index.is_released(); + } + false + } +} + +const FIELD_SEP: &str = "."; + +impl Serialize for LocFields { + fn serialize(&self) -> eyre::Result { + let mut buf = String::new(); + buf.push('['); + for entry in self.list.iter() { + if buf.len() > 1 { + buf.push_str(FIELD_SEP) + } + buf.push_str(&entry.serialize()?); + } + buf.push(']'); + Ok(buf) + } +} + +impl Serialize for FieldKey { + fn serialize(&self) -> eyre::Result { + let buf = match self { + FieldKey::Index(i) => format!("${i}"), + FieldKey::Field(f) => f.to_string(), + FieldKey::Root(i) => format!("@{i}"), + FieldKey::Pointer => "&".to_string(), + FieldKey::Option => "?".to_string(), + }; + Ok(buf) + } +} + +impl LocFields { + /// Append suffix after it + pub fn with_suffix(&self, suffix: LocFields) -> Self { + let mut full = self.clone(); + full.list.extend(suffix.list); + full + } + + /// Strip pointer field if has + pub fn strip_pointer_suffix(&mut self) -> bool { + if self.list.last() == Some(&FieldKey::Pointer) { + self.pop(); + true + } else { + false + } + } + /// Strip index field after pointer field if has + pub fn strip_index_suffix(&mut self) -> bool { + if self + .list + .as_slice() + .ends_with(&[FieldKey::Pointer, FieldKey::Index(0)]) + { + self.pop(); + true + } else { + false + } + } + + /// Get a location for refining + pub fn to_loc_for_refining( + &self, + program: &FuzzProgram, + index: &StmtIndex, + prefix: &LocFields, + ) -> Option { + // special for pointer argument + let mut sub_list: Vec<&[FieldKey]> = self.list.split(|f| f == &FieldKey::Pointer).collect(); + if sub_list.is_empty() { + return None; + } + let last_sub_list = sub_list.pop().unwrap(); + let mut suffix_fields = Vec::from(last_sub_list); + let mut index = index; + 'loop_sub: for i in 0..sub_list.len() { + let sub_fields = sub_list[i]; + // crate::log_trace!("cur_sub_list: {sub_fields:?}"); + match &program.stmts[index.get()].stmt { + FuzzStmt::Load(load) => match load.state.get_child_by_fields(sub_fields) { + Ok(sub_state) => { + if let Some(new_index) = sub_state.get_pointer_stmt_index() { + // crate::log_trace!("load {new_index} from {index}"); + index = new_index; + continue; + } + } + Err(err) => { + if matches!( + err, + crate::HopperError::UnionErr | crate::HopperError::IndexNotExist + ) { + return None; + } else { + unreachable!("err: {}", err); + } + } + }, + FuzzStmt::Call(_call_stmt) => { + crate::log_trace!("call {index}, find its updates"); + for is in &program.stmts[index.get()..] { + let FuzzStmt::Update(update_stmt) = &is.stmt else { + continue; + }; + let Some(dst_index) = update_stmt.dst.stmt_index.as_ref() else { + continue; + }; + if dst_index.get_uniq() != index.get_uniq() { + continue; + } + let fields = update_stmt.dst.fields.as_slice(); + let mut update_sub_list: Vec<&[FieldKey]> = + fields.split(|f| f == &FieldKey::Pointer).collect(); + update_sub_list.remove(0); // remove pointer key at the beginning + let remain_sub_list = &sub_list[i..]; + // adjust to update fields. + // all update fields are fields for a pointer, + // but the sub_list can be to any fields, including primitive types and structure. + // crate::log!(trace, "remain: {:?}", remain_sub_list); + // crate::log!(trace, "update: {:?}", update_sub_list); + if remain_sub_list.is_empty() + || update_sub_list.starts_with(remain_sub_list) + { + for update_fields in &update_sub_list[remain_sub_list.len()..] { + if update_fields.is_empty() { + continue; + } + if suffix_fields.as_slice().starts_with(update_fields) { + suffix_fields = suffix_fields.split_off(update_fields.len()); + } else { + // crate::log_trace!("suffix is not match, suffix is {suffix_fields:?}, buf update fields is: {update_fields:?}"); + return None; + } + } + // crate::log!(trace, "update suffix field: {suffix_fields:?}"); + index = &update_stmt.src; + break 'loop_sub; + } + } + + // crate::log_trace!("can't find any match update"); + return None; + } + _ => {} + } + return None; + } + // prefix is used for type refining + // it is a prefix for strcture instead of location for function. + if !prefix.is_empty() { + let suffix = std::mem::replace(&mut suffix_fields, prefix.list.clone()); + suffix_fields.extend(suffix); + } + // check if fields exists + if let FuzzStmt::Load(load) = &program.stmts[index.get()].stmt { + if let Err(err) = load.state.get_child_by_fields(suffix_fields.as_slice()) { + if matches!( + err, + crate::HopperError::UnionErr | crate::HopperError::IndexNotExist + ) { + return None; + } else { + crate::log!(error, "index: {}", index.get()); + crate::log!(error, "load: {:?}", load.serialize().unwrap()); + crate::log!(error, "list: {:?} fields: {suffix_fields:?}", self.list); + crate::log!(error, "program: {}", program.serialize_all().unwrap()); + unreachable!("err: {}", err); + } + } + } else if !suffix_fields.is_empty() { + return None; + } + let loc = Location::new(index.use_index(), LocFields::new(suffix_fields)); + Some(loc) + } + + pub fn from_rule(fields_str: &str) -> eyre::Result { + let mut fields = vec![]; + if fields_str.is_empty() { + return Ok(Self::default()); + } + let fields_vec = fields_str.split(FIELD_SEP); + for f in fields_vec { + if f.is_empty() { + continue; + } + if let Some(rest) = f.strip_prefix('@') { + fields.push(FieldKey::Root(rest.to_string())); + } else if f == "&" { + fields.push(FieldKey::Pointer); + } else if f == "?" { + fields.push(FieldKey::Option); + } else if let Some(rest) = f.strip_prefix('$') { + let index: usize = rest.parse()?; + fields.push(index.into()); + } else { + fields.push(f.trim_end_matches(&[')', ']']).to_string().into()); + } + } + Ok(Self { list: fields }) + } +} + +impl ObjectSerialize for LocFields { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + self.serialize() + } +} + +impl Deserialize for FieldKey { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + let c = de.peek_char().ok_or_else(|| eyre::eyre!("has char"))?; + let key = if c == '@' { + let _ = de.next_char(); + let ident = de.parse_string()?; + FieldKey::Root(ident) + } else if c == '&' { + let _ = de.next_char(); + FieldKey::Pointer + } else if c == '?' { + let _ = de.next_char(); + FieldKey::Option + } else if c == '$' { + let _ = de.next_char(); + let index: usize = de.parse_number()?; + index.into() + } else { + de.parse_string()?.into() + }; + Ok(key) + } +} + +impl Deserialize for LocFields { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.eat_token("[")?; + let fields_str = de.next_token_until("]")?; + Self::from_rule(fields_str) + } +} + +impl ObjectDeserialize for LocFields { + fn deserialize_obj(de: &mut Deserializer, _state: &mut ObjectState) -> eyre::Result { + Self::deserialize(de) + } +} + +macro_rules! impl_location { + ($ty:ident, $index_ty:ident) => { + impl CloneProgram for $ty { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + Self { + stmt_index: self.stmt_index.clone_with_program(program), + fields: self.fields.clone(), + } + } + } + + impl Serialize for $ty { + fn serialize(&self) -> eyre::Result { + if let Some(stmt_index) = &self.stmt_index { + Ok(format!( + "{}{}", + stmt_index.serialize()?, + self.fields.serialize()? + )) + } else { + self.fields.serialize() + } + } + } + + impl ObjectTranslate for $ty { + fn translate_obj_to_c( + &self, + _state: &ObjectState, + program: &FuzzProgram, + ) -> eyre::Result { + if self.is_null() { + return Ok("NULL".to_string()); + } + let index = self.get_index()?.get(); + let value_name = format!("v{}", index); + let mut fields = String::new(); + let mut prev_ptr = false; + for f in self.fields.list.iter() { + match f { + FieldKey::Index(i) => { + fields.push('['); + fields.push_str(&i.to_string()); + fields.push(']'); + prev_ptr = false; + } + FieldKey::Field(f) => { + if prev_ptr { + fields.push_str("->"); + } else { + fields.push('.'); + } + fields.push_str(f); + prev_ptr = false; + } + FieldKey::Pointer => { + prev_ptr = true; + } + _ => { + prev_ptr = false; + } + }; + } + // if it is point to vec + let mut is_vec = false; + if self.fields.is_empty() { + if let FuzzStmt::Load(load) = &program.stmts[index].stmt { + if crate::utils::is_vec_type(&load.value.type_name()) { + is_vec = true; + } + } + } + // if last field is pointer used in right side + // we should wrap with *({}) for pointer + // if it is used in left side (e.g update's location), + // we should ignore it + if is_vec || prev_ptr { + Ok(format!("{}{}", value_name, fields)) + } else if fields.is_empty() { + Ok(format!("&{}", value_name)) + } else { + Ok(format!("&({}{})", value_name, fields)) + } + } + } + + impl ObjectSerialize for $ty { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + self.serialize() + } + } + + impl ObjectDeserialize for $ty { + fn deserialize_obj( + de: &mut Deserializer, + _state: &mut ObjectState, + ) -> eyre::Result { + Self::deserialize(de) + } + } + }; +} + +impl_location!(Location, StmtIndex); +impl_location!(WeakLocation, WeakStmtIndex); + +impl Serialize for Option<&Location> { + fn serialize(&self) -> eyre::Result { + if let Some(loc) = self { + loc.serialize() + } else { + Ok("null".to_string()) + } + } +} + +impl Deserialize for Location { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + if de.peek_char() == Some('<') { + let stmt_index = StmtIndex::deserialize(de)?; + let fields = LocFields::deserialize(de)?; + Ok(Self::new(stmt_index, fields)) + } else { + let mut loc = Location::null(); + loc.fields = LocFields::deserialize(de)?; + Ok(loc) + } + } +} + +impl Deserialize for WeakLocation { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + let loc = Location::deserialize(de)?; + Ok(loc.to_weak_loc()) + } +} + +impl std::hash::Hash for Location { + fn hash(&self, state: &mut H) { + self.stmt_index.as_ref().map(|i| i.get_uniq()).hash(state); + self.fields.hash(state); + } +} + +impl fmt::Display for LocFields { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.serialize().unwrap()) + } +} diff --git a/hopper-core/src/runtime/mod.rs b/hopper-core/src/runtime/mod.rs new file mode 100644 index 0000000..a54545f --- /dev/null +++ b/hopper-core/src/runtime/mod.rs @@ -0,0 +1,48 @@ +//! Runtime module +//! includes IR of functions, calls, variables .. of programs, +//! how they wil be executed(eval), +//! their trais for serde, fuzz(new, mutating).., +//! and stored them as gadgets +mod func; +#[macro_use] +mod object; +mod stmt; + +mod gadgets; +mod loc; +mod program; +mod serde; +mod translate; + +use std::marker::PhantomData; + +pub use func::*; +pub use gadgets::*; +pub use loc::*; +pub use object::*; +pub use program::*; +pub use serde::*; +pub use stmt::*; +pub use translate::*; +/// Custom void type +#[repr(transparent)] +#[derive(Debug, Default)] +pub struct FuzzVoid(u8); // just hold something useless + +/// Mutatable pointer: *mut +#[repr(transparent)] +#[derive(Debug)] +pub struct FuzzMutPointer(*mut T); + +/// Const pointer: *const +/// +/// We use *mut as inner data since it will be mutated by our fuzzer +#[repr(transparent)] +#[derive(Debug)] +pub struct FuzzConstPointer(*mut T); + +/// FrozenPointer: Always be NULL +/// +/// We use this kind of pointer to hold filtered function pointer type +#[repr(transparent)] +pub struct FuzzFrozenPointer(*const u8, PhantomData); \ No newline at end of file diff --git a/hopper-core/src/runtime/object/bitfield.rs b/hopper-core/src/runtime/object/bitfield.rs new file mode 100644 index 0000000..4f11913 --- /dev/null +++ b/hopper-core/src/runtime/object/bitfield.rs @@ -0,0 +1,133 @@ +use super::*; + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct HopperBindgenBitfieldUnit { + pub storage: Storage, +} + +impl ObjFuzzable for HopperBindgenBitfieldUnit {} + +impl ObjValue for HopperBindgenBitfieldUnit {} + +impl ObjType for HopperBindgenBitfieldUnit {} + +impl Serialize for HopperBindgenBitfieldUnit { + fn serialize(&self) -> eyre::Result { + Ok(format!("bitfield {}", self.storage.serialize()?)) + } +} + +impl ObjectSerialize for HopperBindgenBitfieldUnit { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + Ok(format!( + "bitfield {}", + self.storage.serialize_obj(state.last_child()?)? + )) + } +} + +impl ObjectTranslate for HopperBindgenBitfieldUnit { + fn translate_obj_to_c( + &self, + state: &ObjectState, + program: &FuzzProgram, + ) -> eyre::Result { + self.storage.translate_obj_to_c(state.last_child()?, program) + } +} + +impl Deserialize for HopperBindgenBitfieldUnit { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.eat_token("bitfield")?; + let storage = Storage::deserialize(de)?; + Ok(Self{ storage }) + } +} + +impl ObjectDeserialize for HopperBindgenBitfieldUnit { + fn deserialize_obj(de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result { + de.eat_token("bitfield")?; + let sub_state = state + .add_child(FieldKey::Field("storage".to_string()), std::any::type_name::()) + .last_child_mut()?; + let storage = Storage::deserialize_obj(de, sub_state)?; + Ok(Self{ storage }) + } +} + +impl HopperBindgenBitfieldUnit { + #[inline] + pub const fn new(storage: Storage) -> Self { + Self { storage } + } +} +impl HopperBindgenBitfieldUnit +where + Storage: AsRef<[u8]> + AsMut<[u8]>, +{ + #[inline] + pub fn get_bit(&self, index: usize) -> bool { + debug_assert!(index / 8 < self.storage.as_ref().len()); + let byte_index = index / 8; + let byte = self.storage.as_ref()[byte_index]; + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + let mask = 1 << bit_index; + byte & mask == mask + } + #[inline] + pub fn set_bit(&mut self, index: usize, val: bool) { + debug_assert!(index / 8 < self.storage.as_ref().len()); + let byte_index = index / 8; + let byte = &mut self.storage.as_mut()[byte_index]; + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + let mask = 1 << bit_index; + if val { + *byte |= mask; + } else { + *byte &= !mask; + } + } + #[inline] + pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()); + let mut val = 0; + for i in 0..(bit_width as usize) { + if self.get_bit(i + bit_offset) { + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + val |= 1 << index; + } + } + val + } + #[inline] + pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()); + for i in 0..(bit_width as usize) { + let mask = 1 << i; + let val_bit_is_set = val & mask == mask; + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + self.set_bit(index + bit_offset, val_bit_is_set); + } + } +} \ No newline at end of file diff --git a/hopper-core/src/runtime/object/builder.rs b/hopper-core/src/runtime/object/builder.rs new file mode 100644 index 0000000..e5d49ca --- /dev/null +++ b/hopper-core/src/runtime/object/builder.rs @@ -0,0 +1,77 @@ +//! FuzzObjectBuilder is used to generate objects at runtime by theit types, +//! The builds will be stored at global gadgets for usage at anywhere. + +use std::{marker::PhantomData, collections::HashMap}; + +use crate::{fuzz::*, runtime::*}; + +/// Hold the type of objects +pub struct FuzzTypeHolder(PhantomData); + +/// Use for build new objects by its type, it is a wrapper of holder +pub type FuzzObjectBuilder = Box; + +unsafe impl Sync for FuzzTypeHolder {} +unsafe impl Send for FuzzTypeHolder {} + +/// Trait for build new objects by its type at runtime +pub trait DynBuildFuzzObject: Sync + Send { + /// Desrialize the value the trait hold + fn deserialize(&self, de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result; + /// Desrialize a vector of the value that the trait hold + fn deserialize_vec(&self, de: &mut Deserializer, state: &mut ObjectState) + -> eyre::Result; + /// Generate the value the trait hold + fn generate_new(&self, state: &mut ObjectState) -> eyre::Result; + /// Generate a vector of the value that the trait hold + fn generate_vec(&self, state: &mut ObjectState) -> eyre::Result; + /// Get size of Object + fn mem_size(&self) -> usize; + /// Is opaque + fn is_opaque(&self) -> bool; + /// fields ty + fn get_fields_ty(&self) -> HashMap; +} + +impl FuzzTypeHolder { + pub fn builder() -> FuzzObjectBuilder { + Box::new(Self(PhantomData)) + } + pub fn as_builder(self) -> FuzzObjectBuilder { + Box::new(self) + } +} + +impl DynBuildFuzzObject for FuzzTypeHolder { + fn deserialize(&self, de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result { + let val = T::deserialize_obj(de, state)?; + Ok(Box::new(val)) + } + fn deserialize_vec( + &self, + de: &mut Deserializer, + state: &mut ObjectState, + ) -> eyre::Result { + if de.canary { + Ok(Box::new(CanarySlice::::deserialize_obj(de, state)?)) + } else { + Ok(Box::new(Vec::::deserialize_obj(de, state)?)) + } + } + + fn generate_new(&self, state: &mut ObjectState) -> eyre::Result { + Ok(Box::new(T::generate_new( state)?)) + } + fn generate_vec(&self, state: &mut ObjectState) -> eyre::Result { + Ok(Box::new(Vec::::generate_new( state)?)) + } + fn mem_size(&self) -> usize { + std::mem::size_of::() + } + fn is_opaque(&self) -> bool { + T::is_opaque() + } + fn get_fields_ty(&self) -> HashMap { + T::get_fields_ty() + } +} diff --git a/hopper-core/src/runtime/object/canary.rs b/hopper-core/src/runtime/object/canary.rs new file mode 100644 index 0000000..1c2aef4 --- /dev/null +++ b/hopper-core/src/runtime/object/canary.rs @@ -0,0 +1,325 @@ +//! Adding canary after each vector we created. +//! Canary is protected by page access permission +//! + +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Once, +}; + +use hopper_derive::Serde; + +use crate::{config, fuzz::*, runtime::*}; + +/// Slice warp with canary. +/// The memory blocks are allocated by mmap, +/// and we manage them by ourselfves. +/// Thus, it can't be freed by `free` function. +/// We hook `free`, and filter out pointers starts within +/// the range [MEM_PTR, MEM_PTR + MEM_AREA_SIZE) +pub struct CanarySlice { + pub ptr: *mut T, + pub len: usize, + pub canary: *const u8, +} + +pub static MEM_OFFSET: AtomicUsize = AtomicUsize::new(0); +static MEM_INIT: Once = Once::new(); + +#[inline] +pub fn is_in_canary(addr: *mut u8) -> bool { + let addr = addr as usize; + addr >= config::CANARY_PTR as usize + && addr < config::CANARY_PTR as usize + config::CANARY_AREA_SIZE +} + +#[inline] +pub fn get_canary_begin() -> *const u8 { + config::CANARY_PTR +} + +#[derive(Debug, Serde, Clone)] +pub struct CanaryInfo { + pub stmt_index: usize, + pub len: usize, +} + +/// Simulate the canary allocation, and find pointer address among the canaries. +pub fn find_ptr_in_canary(program: &FuzzProgram, addr: *mut u8) -> Option { + let page_size = region::page::size() as u64; + // add page_size since there it an mock canary at beginning + let mut offset = config::CANARY_PTR as u64 + page_size; + let addr = addr as u64; + for is in &program.stmts { + if let FuzzStmt::Load(load) = &is.stmt { + if crate::utils::is_vec_type(load.value.type_name()) { + let len = load.state.children.len(); + if len == 0 { + continue; + } + let ele_ty = load.state.children[0].ty; + let ele_size = global_gadgets::get_instance() + .get_object_builder(ele_ty) + .unwrap() + .mem_size() as u64; + let mem_size = ele_size * len as u64; + let n = mem_size / page_size; + let next_canary = offset + (n + 1) * page_size; + offset = next_canary + page_size; + // crate::log!(trace, "stmt {} addr: {} - {}", is.index.get(), next_canary, offset); + if addr >= next_canary && addr < offset { + return Some(CanaryInfo { + stmt_index: is.index.get(), + len, + }); + } + } + } + } + None +} + +pub fn clear_canary_protection() { + let offset = MEM_OFFSET.load(Ordering::SeqCst); + if offset > 0 { + unsafe { + region::protect( + get_canary_begin() as *mut std::ffi::c_void, + config::CANARY_AREA_SIZE, + region::Protection::READ_WRITE_EXECUTE, + ) + .unwrap(); + } + protect_first_page(); + } +} + +fn protect_first_page() { + let page_size = region::page::size(); + unsafe { + region::protect( + get_canary_begin() as *mut std::ffi::c_void, + page_size, + region::Protection::NONE, + ) + .unwrap(); + } + MEM_OFFSET.store(page_size, Ordering::SeqCst); +} + +impl CanarySlice { + fn new(len: usize) -> eyre::Result { + let page_size = region::page::size(); + MEM_INIT.call_once(|| { + // we assume the page size is 4KB + if page_size > 4096 { + crate::log!( + warn, + "Page size `{page_size}`is larger than 4096, which may cause error in canary!" + ); + } + let mem = region::alloc_at( + config::CANARY_PTR, + config::CANARY_AREA_SIZE, + region::Protection::READ_WRITE_EXECUTE, + ) + .unwrap(); + crate::log!( + trace, + "init memory area for canary: {:?}", + mem.as_ptr::() + ); + if mem.as_ptr::() != config::CANARY_PTR { + panic!("fail to allocate memory at MEM_PTR"); + } + // remain first page as canary + protect_first_page(); + std::mem::forget(mem); + }); + let mem_size = std::mem::size_of::() * len; + let n = mem_size / page_size; + let r = mem_size % page_size; + let offset = MEM_OFFSET.load(Ordering::SeqCst); + let next_ptr = offset + page_size - r; + let ptr = unsafe { config::CANARY_PTR.add(next_ptr) } as *mut T; + let next_canary = offset + (n + 1) * page_size; + if offset >= config::CANARY_AREA_SIZE || next_canary >= config::CANARY_AREA_SIZE { + eyre::bail!("The pointer exceed the range of cananry! Canary is full!"); + } + let canary = unsafe { config::CANARY_PTR.add(next_canary) }; + MEM_OFFSET.store(next_canary + page_size, Ordering::SeqCst); + + Ok(Self { ptr, len, canary }) + } + + fn protect(&mut self, flag: region::Protection) -> eyre::Result<()> { + let page_size = region::page::size(); + unsafe { + region::protect(self.canary as *mut std::ffi::c_void, page_size, flag)?; + } + Ok(()) + } + + pub fn as_mut_slice(&mut self) -> &mut [T] { + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } + + pub fn as_slice(&self) -> &[T] { + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } +} + +impl ObjValue for CanarySlice { + fn get_ptr_by_keys(&self, keys: &[FieldKey]) -> eyre::Result<*mut u8> { + if keys.is_empty() { + return Ok(self.ptr as *const T as *mut u8); + } + if let FieldKey::Index(i) = &keys[0] { + let list = self.as_slice(); + // try to get a overflow address for inference + if *i > list.len() { + return Ok(unsafe { list.as_ptr().add(*i) as *mut u8 }); + } + return list[*i].get_ptr_by_keys(&keys[1..]); + } + eyre::bail!("Key `{:?}` is not fit for CanarySlice", keys); + } + fn get_layout(&self, fold_ptr: bool) -> ObjectLayout { + let list = self.as_slice(); + list.get_layout(fold_ptr) + } + + fn get_length(&self) -> usize { + self.len * std::mem::size_of::() + } +} + +impl std::fmt::Debug for CanarySlice { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ObjectLayout") + .field("ptr", &self.ptr) + .field("canary", &self.canary) + .field("len", &self.len) + .finish() + } +} + +impl Clone for CanarySlice { + fn clone(&self) -> Self { + Self::new(self.len).unwrap() + } +} + +impl ObjFuzzable for CanarySlice {} + +unsafe impl Sync for CanarySlice {} +unsafe impl Send for CanarySlice {} + +impl ObjMutate for CanarySlice { + fn mutate(&mut self, _state: &mut ObjectState) -> eyre::Result { + unimplemented!(); + } + fn mutate_by_op( + &mut self, + + _state: &mut ObjectState, + _keys: &[FieldKey], + _op: &MutateOperation, + ) -> eyre::Result<()> { + unimplemented!(); + } +} + +impl ObjectSerialize for CanarySlice { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + if self.len > 0 && state.children.is_empty() { + let mut buf = String::new(); + buf.push_str("bvec("); + buf.push_str(&self.len.to_string()); + buf.push_str(")[\""); + super::seq::serialize_bytes(&mut buf, self.as_slice()); + buf.push_str("\"]"); + return Ok(buf); + } + self.as_slice().serialize_obj(state) + } +} + +impl Serialize for CanarySlice { + fn serialize(&self) -> eyre::Result { + self.as_slice().serialize() + } +} + +impl ObjectTranslate for CanarySlice {} + +impl ObjectDeserialize for CanarySlice { + fn deserialize_obj(de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result { + if de.strip_token("bvec(") { + let len: usize = de.parse_number()?; + de.eat_token(")[\"")?; + let mut canary = CanarySlice::::new(len)?; + canary.protect(region::Protection::NONE)?; + let buf = de.next_token_until("\"]")?; + let list = unsafe { std::slice::from_raw_parts_mut(canary.ptr as *mut u8, len) }; + base64::decode_config_slice(buf, base64::STANDARD, list)?; + return Ok(canary); + } + de.eat_token("vec(")?; + let len: usize = de.parse_number()?; + de.eat_token(")[")?; + let mut canary = CanarySlice::::new(len)?; + // crate::log!(trace, "protect canary: {:?}", canary); + canary.protect(region::Protection::NONE)?; + // crate::log!(trace, "protect done"); + let list = canary.as_mut_slice(); + for v in list { + let sub_state = state + .add_child(state.children.len(), std::any::type_name::()) + .last_child_mut()?; + let element = T::deserialize_obj(de, sub_state)?; + *v = element; + de.eat_token(",")?; + } + de.eat_token("]")?; + // crate::log!(trace, "canary done"); + Ok(canary) + } +} + +#[test] +fn test_vec_canary() { + let mut canary = CanarySlice::new(5).unwrap(); + println!("canary : {canary:?}"); + let canary2 = CanarySlice::::new(5).unwrap(); + println!("canary2: {canary2:?}"); + { + let list = canary.as_mut_slice(); + for (i, val) in list.iter_mut().enumerate().take(5) { + *val = i; + } + println!("list: {list:?}"); + } + { + let list = canary.as_slice(); + let i = list.len(); + println!("index: {}, val: {}", i, unsafe { list.get_unchecked(i) }); + } + /* + { + canary.protect(region::Protection::NONE).unwrap(); + // crash + let list = canary.as_slice(); + let i = list.len(); + println!("index: {}, val: {}", i, unsafe { list.get_unchecked(i) } ); + } + */ + { + canary + .protect(region::Protection::READ_WRITE_EXECUTE) + .unwrap(); + let list = canary.as_slice(); + let i = list.len(); + println!("index: {}, val: {}", i, unsafe { list.get_unchecked(i) }); + } +} diff --git a/hopper-core/src/runtime/object/fn_pointer.rs b/hopper-core/src/runtime/object/fn_pointer.rs new file mode 100644 index 0000000..ac8c3c8 --- /dev/null +++ b/hopper-core/src/runtime/object/fn_pointer.rs @@ -0,0 +1,102 @@ +//! Describe function pointer 's fuzzing trait +//! function pointer are : Option xx> +//! +//! ATTN: we can only choose function from gadgets, +//! and can't crate functions for pointer dynamicly. + +use eyre::ContextCompat; + +use super::*; + +impl ObjFuzzable for T {} + +/// Type Cast FnFuzzAble to its original type +pub fn cast_fn_pointer( + f_name: &'static str, + f: &'static dyn FnFuzzable, + state: &mut ObjectState, +) -> eyre::Result { + // crate::log!(trace, "use {f_name} as function pointer"); + state.pointer = Some(PointerState::new_fn_pointer(f_name, false)); + state.done_deterministic(); + let f = f + .downcast_ref::() + .context("fail to cast function pointer")?; + Ok(f.clone()) +} + +pub fn cast_canary_fn_pointer(state: &mut ObjectState) -> T { + // crate::log!(trace, "use canary as function pointer"); + state.pointer = Some(PointerState::new_fn_pointer("__hopper_fn_canary", false)); + state.done_deterministic(); + T::canary_fn_pointer() +} + +impl ObjValue for T {} +impl ObjType for T {} + +impl ObjectSerialize for T { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + let fn_name = &state + .pointer + .as_ref() + .context("pointer state does not exists in fn pointer!")? + .pointer_type; + Ok(format!("fn* {fn_name}&")) + } +} + +impl ObjectTranslate for T { + fn translate_obj_to_c( + &self, + state: &ObjectState, + _program: &FuzzProgram, + ) -> eyre::Result { + let fn_name = &state + .pointer + .as_ref() + .context("pointer state does not exists in fn pointer!")? + .pointer_type; + Ok(format!("&{fn_name}")) + } +} + +impl Serialize for T { + fn serialize(&self) -> eyre::Result { + // we simply return null here + Ok(format!("fn* {}&", "__null_fp")) + } +} + +impl ObjectDeserialize for T { + fn deserialize_obj(de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result { + de.eat_token("fn* ")?; + let func_name = de.next_token_until("&")?; + if func_name == "__null_fp" { + eyre::bail!(HopperError::NullFuncionPointer); + } + if func_name == "__hopper_fn_canary" { + return Ok(cast_canary_fn_pointer(state)); + } + let fg = global_gadgets::get_instance().get_func_gadget(func_name)?; + cast_fn_pointer(fg.f_name, fg.f, state) + } +} + +impl Deserialize for T { + fn deserialize(_de: &mut Deserializer) -> eyre::Result { + unimplemented!(); + } +} + +#[test] +fn test_fn_pointer_serde() { + use crate::test; + use crate::ObjectSerialize; + let mut de = Deserializer::new("fn* func_add&", None); + let mut state = ObjectState::root("test", "fn(u8, u8) -> u8"); + let f = u8>::deserialize_obj(&mut de, &mut state).unwrap(); + assert_eq!(f, test::func_add as fn(u8, u8) -> u8); + let f_name = f.serialize_obj(&state).unwrap(); + assert_eq!(f_name, "fn* func_add&"); +} diff --git a/hopper-core/src/runtime/object/layout.rs b/hopper-core/src/runtime/object/layout.rs new file mode 100644 index 0000000..c70eeb6 --- /dev/null +++ b/hopper-core/src/runtime/object/layout.rs @@ -0,0 +1,241 @@ +//! Layout of an object for fuzzing +//! It describe the fields of structure and their types, +//! we consider pointers but ignore arrays in our implementation + +use std::{cell::RefCell, fmt}; + +use eyre::ContextCompat; + +use crate::{ + feedback::ResourceStates, utils, FieldEqual, FieldKey, HopperError, LocFields, + Serialize, +}; + +type LazyLoaderHolder = ( + RefCell>, + Box Vec>, +); + +/// Object's field-type layout +pub struct ObjectLayout { + pub key: FieldKey, + pub type_name: &'static str, + pub ptr: *mut u8, + pub is_union: bool, + pub fields: Vec, + // used for lazy get fields + pub lazy_loader: Option, + // used for serialize current object + pub serializer: Option eyre::Result>>, +} + +impl fmt::Debug for ObjectLayout { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ObjectLayout") + .field("key", &self.key) + .field("type_name", &self.type_name) + .field("ptr", &self.ptr) + .finish() + } +} + +impl ObjectLayout { + pub fn root(type_name: &'static str, ptr: *mut u8) -> Self { + Self { + key: FieldKey::Root("layout".to_string()), + ptr, + type_name, + is_union: false, + fields: vec![], + lazy_loader: None, + serializer: None, + } + } + + pub fn set_lazy_loader Vec + 'static>(&mut self, f: T) { + self.lazy_loader = Some((RefCell::new(vec![]), Box::new(f))) + } + + /// Get fields with reseource states + /// it will expand pointer's fields + pub fn get_fields_with_rs(&self, resource_states: &ResourceStates) -> &[ObjectLayout] { + if let Some((holder, f)) = &self.lazy_loader { + if holder.borrow().is_empty() { + let size = resource_states.get_ptr_size(self.ptr).unwrap_or(1); + // crate::log!(trace, "type: {:?}, ptr: {:?}, size: {}", self.type_name, self.ptr, size); + let fields = f(size); + holder.replace(fields); + } + return unsafe { holder.as_ptr().as_ref().unwrap() }; + } + &self.fields + } + + /// Get fields directly, without expanding pointer's fields + pub fn get_ir_fields(&self) -> &[ObjectLayout] { + &self.fields + } + + /// Get Child + pub fn get_child(&self, key: F) -> eyre::Result<&Self> { + self.fields + .iter() + .find(|c| key.eq_field(&c.key)) + .with_context(|| format!("fail to find child layout `{:?}` in `{:?}`", key, self.key)) + } + + /// Get reference of child's state by keys recursively + pub fn get_child_by_fields(&self, fields: &[FieldKey]) -> Result<&Self, HopperError> { + if fields.is_empty() { + return Ok(self); + } + + let layout = match self.get_child(&fields[0]) { + Ok(l) => l, + Err(r) => { + if self.is_union { + return Err(HopperError::UnionErr); + } else { + return Err(HopperError::FieldNotFound(r.to_string())); + } + } + }; + + layout.get_child_by_fields(&fields[1..]) + } + + /// Add fields + pub fn add_field>(&mut self, field_key: K, layout: ObjectLayout) { + let mut layout = layout; + layout.key = field_key.into(); + self.fields.push(layout); + } + + /// Check if the layout is revisit (Pointer) + pub fn is_revisited(&self, visited_pointers: &mut Vec<*mut u8>) -> bool { + // Only pointer type has such cases + if self.key == FieldKey::Pointer { + if visited_pointers.contains(&self.ptr) { + return true; + } + visited_pointers.push(self.ptr); + } + false + } + + /// Find particular pointer in the object + pub fn find_ptr(&self, ptr: *mut u8, resource_states: &ResourceStates) -> Option { + let mut cur_path = LocFields::default(); + let mut visited_pointers = vec![]; + self.find_ptr_in_layout(&mut visited_pointers, &mut cur_path, ptr, resource_states) + } + + fn find_ptr_in_layout( + &self, + visited_pointers: &mut Vec<*mut u8>, + cur_path: &mut LocFields, + ptr: *mut u8, + resource_states: &ResourceStates, + ) -> Option { + if self.is_revisited(visited_pointers) { + return None; + } + if self.ptr == ptr { + return Some(std::mem::take(cur_path)); + } + for layout in self.get_fields_with_rs(resource_states) { + cur_path.push(layout.key.clone()); + let found = layout.find_ptr_in_layout(visited_pointers, cur_path, ptr, resource_states); + if found.is_some() { + return found; + } + cur_path.pop(); + } + None + } + + /// check pointer by closure + pub fn check_ptr(&self, resource_states: &ResourceStates, depth: usize) -> eyre::Result<()> { + // since we will skip using freed canary poitners in our generating, + // and the pointers is `false` freed actually (we hook and enfore to free them), + // so we do not need check them here. + if depth > 5 || crate::is_in_canary(self.ptr) { + return Ok(()); + } + resource_states.check_pointer(self.ptr)?; + if self.key == FieldKey::Pointer && crate::utils::is_primitive_type(self.type_name) { + return Ok(()); + } + for layout in self.get_fields_with_rs(resource_states) { + layout.check_ptr(resource_states, depth + 1)?; + } + Ok(()) + } + + /// Serialzie pointers in return object + pub fn serialize_return_object_pointers( + &self, + resource_states: &ResourceStates, + ) -> eyre::Result> { + let mut found = vec![]; + let mut visited_pointers = vec![]; + let mut cur_path = LocFields::default(); + self.serialize_pointers_in_return_inner( + &mut found, + &mut visited_pointers, + &mut cur_path, + resource_states, + )?; + Ok(found) + } + + fn serialize_pointers_in_return_inner( + &self, + found: &mut Vec, + visited_pointers: &mut Vec<*mut u8>, + cur_path: &mut LocFields, + resource_states: &ResourceStates, + ) -> eyre::Result<()> { + if self.is_revisited(visited_pointers) { + return Ok(()); + } + if let Some(f) = &self.serializer { + let size = resource_states.get_ptr_size(self.ptr); + if size.is_none() && utils::is_primitive_type(self.type_name) { + return Ok(()); + } + let size = size.unwrap_or(1); + if utils::is_opaque_type(self.type_name) { + return Ok(()); + } + if size > 0 { + let content = f(size)?; + found.push(format!( + "({}, {}, {})", + cur_path.serialize()?, + utils::vec_type(self.type_name), + content + )); + } + } + // for custom type that may contain pointers, we only + // serialize first element in list + let keep_first_ptr_element = + self.key == FieldKey::Pointer && utils::is_primitive_type(self.type_name); + for layout in self.get_fields_with_rs(resource_states) { + cur_path.push(layout.key.clone()); + layout.serialize_pointers_in_return_inner( + found, + visited_pointers, + cur_path, + resource_states, + )?; + cur_path.pop(); + if keep_first_ptr_element { + break; + } + } + Ok(()) + } + +} diff --git a/hopper-core/src/runtime/object/mod.rs b/hopper-core/src/runtime/object/mod.rs new file mode 100644 index 0000000..16de4de --- /dev/null +++ b/hopper-core/src/runtime/object/mod.rs @@ -0,0 +1,131 @@ +//! Traits for object we are fuzzing + +mod builder; +pub mod canary; +pub mod fn_pointer; +mod layout; +mod number; +mod option; +mod pointer; +pub mod seq; +mod state; +mod void; +mod bitfield; + +pub use builder::*; +pub use canary::*; +pub use layout::*; +pub use pointer::*; +pub use state::*; +pub use void::*; +pub use bitfield::*; + +use downcast_rs::Downcast; +use dyn_clone::DynClone; +use std::{fmt::Debug, collections::HashMap}; + +use crate::{runtime::*, HopperError, ObjMutate}; + +pub type FuzzObject = Box; + +/// Trait for fuzzing objects +pub trait ObjFuzzable: + ObjMutate + + ObjValue + + ObjectSerialize + + Serialize + + ObjectTranslate + + Debug + + Send + + DynClone + + Downcast +{ +} + +/// Trait for get object's value information +pub trait ObjValue: 'static + std::any::Any { + /// Get type name of object + fn type_name(&self) -> &'static str { + std::any::type_name::() + } + /// Get layout of the object + fn get_layout(&self, _fold_ptr: bool) -> ObjectLayout { + ObjectLayout::root( + std::any::type_name::(), + self as *const Self as *mut u8, + ) + } + // get raw pointer by key for mutating or fill pointer + fn get_ptr_by_keys(&self, keys: &[FieldKey]) -> eyre::Result<*mut u8> { + if !keys.is_empty() { + unimplemented!() + } + Ok(self as *const Self as *mut u8) + } + // Is it zero or not + fn is_zero(&self) -> bool { + false + } + // get length + fn get_length(&self) -> usize { + 1 + } +} + +/// Trait for get type information +pub trait ObjType { + /// If this type is void or not + fn is_void() -> bool { + false + } + /// If this type is primitive type or not + fn is_primitive() -> bool { + false + } + /// If it is opaque type + /// e.g zero-size array, or type contains fields starts with '_' + fn is_opaque() -> bool { + false + } + + /// add fields's types to gadgets + fn add_fields_to_gadgets(_gadgets: &mut ProgramGadgets) {} + + /// try renew a object from antoher + fn cast_from(_other: &FuzzObject) -> Box { + unreachable!("the type is not support cast"); + } + + /// Get all fields + fn get_fields_ty() -> HashMap { + HashMap::default() + } +} + +#[macro_export] +macro_rules! impl_obj_fuzz { + ( $($name:ident),* ) => { + $( + impl ObjFuzzable for $name {} + )* + } +} + +dyn_clone::clone_trait_object!(ObjFuzzable); +downcast_rs::impl_downcast!(ObjFuzzable); + +#[test] +fn test_get_ptr() { + let ptr = crate::test::create_test_ptr(); + let keys = vec![FieldKey::Pointer, FieldKey::Field("p".to_string())]; + let ptr = ptr.get_ptr_by_keys(&keys[..]).unwrap(); + println!("ptr: {ptr:?}"); +} + +#[test] +fn test_get_layout() { + let val: i32 = 1; + let layout = val.get_layout(false); + println!("layout: {layout:?}"); + assert_eq!(layout.get_ir_fields().len(), 0); +} diff --git a/hopper-core/src/runtime/object/number.rs b/hopper-core/src/runtime/object/number.rs new file mode 100644 index 0000000..11c8be0 --- /dev/null +++ b/hopper-core/src/runtime/object/number.rs @@ -0,0 +1,223 @@ +//! Describe numbers' fuzzing trait +//! numbers are: integers and floats + +use eyre::ContextCompat; + +use crate::impl_obj_fuzz; + +use super::*; + +impl_obj_fuzz!(u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, char, bool, usize, isize); + +macro_rules! impl_fuzz_value { + ( $($name:ident),* ) => { + $( + impl ObjValue for $name { + fn is_zero(&self) -> bool { + let zero: Self = unsafe { std::mem::zeroed() }; + self == &zero + } + } + )* + } +} + +impl_fuzz_value!(u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, char, bool, usize, isize); + +macro_rules! impl_fuzz_type { + ( $($name:ident),* ) => { + $( + impl ObjType for $name { + fn is_primitive() -> bool { + true + } + } + )* + } +} +impl_fuzz_type!(u8, i8, u16, i16, u128, i128, f32, f64, char, bool, usize, isize); + +macro_rules! impl_fuzz_type_fd { + ( $($name:ident),* ) => { + $( + impl ObjType for $name { + fn is_primitive() -> bool { + true + } + fn cast_from(other: &FuzzObject) -> Box { + if let Some(fd) = other.downcast_ref::() { + return Box::new(fd.inner() as $name); + } + Box::new(0) + } + } + )* + } +} +impl_fuzz_type_fd!(u32, i32, u64, i64); + + +macro_rules! impl_number_serde { + ( $($name:ident),* ) => { + $( + impl ObjectSerialize for $name { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + Ok(self.to_string()) + } + } + impl Serialize for $name { + fn serialize(&self) -> eyre::Result { + Ok(self.to_string()) + } + } + impl ObjectDeserialize for $name { + fn deserialize_obj(de: &mut Deserializer, _state: &mut ObjectState) -> eyre::Result { + de.parse_number() + } + } + impl Deserialize for $name { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.parse_number() + } + } + )* + } +} + +impl_number_serde!(u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, usize, isize); + +macro_rules! impl_number_translate { + ( $($name:ident),* ) => { + $( + impl ObjectTranslate for $name {} + )* + } +} + +impl_number_translate!(u8, i8, u16, i16, u32, i32, u128, i128, bool, char, usize, isize); + +impl ObjectTranslate for u64 { + fn translate_obj_to_c( + &self, + _state: &ObjectState, + _program: &FuzzProgram, + ) -> eyre::Result { + Ok(self.to_string() + "ULL") + } +} + +impl ObjectTranslate for i64 { + fn translate_obj_to_c( + &self, + _state: &ObjectState, + _program: &FuzzProgram, + ) -> eyre::Result { + Ok(self.to_string() + "LL") + } +} + +macro_rules! impl_float_serde { + ( $($name:ident),* ) => { + $( + impl ObjectSerialize for $name { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + Ok(self.to_string()) + } + } + impl Serialize for $name { + fn serialize(&self) -> eyre::Result { + Ok(self.to_string()) + } + } + impl ObjectDeserialize for $name { + fn deserialize_obj(de: &mut Deserializer, _state: &mut ObjectState) -> eyre::Result { + de.parse_float() + } + } + impl Deserialize for $name { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.parse_float() + } + } + impl ObjectTranslate for $name { + fn translate_obj_to_c(&self, _state: &ObjectState, _program: &FuzzProgram) -> eyre::Result { + let val = self.to_string(); + match val.as_str() { + "NaN" => Ok("NAN".to_string()), + "inf" => Ok("INFINITY".to_string()), + "-inf" => Ok("INFINITY".to_string()), + _ => { + if !val.contains('.') { + return Ok(val + ".0"); + } + Ok(val) + } + } + + } + } + )* + } +} + +impl_float_serde!(f32, f64); + +impl ObjectSerialize for char { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + self.serialize() + } +} + +impl Serialize for char { + fn serialize(&self) -> eyre::Result { + Ok(format!("\'{self}\'")) + } +} + +impl ObjectDeserialize for char { + fn deserialize_obj(de: &mut Deserializer, _state: &mut ObjectState) -> eyre::Result { + Self::deserialize(de) + } +} + +impl Deserialize for char { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + eyre::ensure!(de.next_char() == Some('\''), "next char is quote"); + let c = de.next_char().context("Char is not exisited")?; + eyre::ensure!(de.next_char() == Some('\''), "next char is quote"); + Ok(c) + } +} + +impl ObjectSerialize for bool { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + self.serialize() + } +} + +impl Serialize for bool { + fn serialize(&self) -> eyre::Result { + if *self { + Ok("T".to_string()) + } else { + Ok("F".to_string()) + } + } +} + +impl ObjectDeserialize for bool { + fn deserialize_obj(de: &mut Deserializer, _state: &mut ObjectState) -> eyre::Result { + Self::deserialize(de) + } +} + +impl Deserialize for bool { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + let c = de.next_char().context("buffer is empty")?; + match c { + 'T' => Ok(true), + 'F' => Ok(false), + _ => eyre::bail!("Unknown token for boolean"), + } + } +} diff --git a/hopper-core/src/runtime/object/option.rs b/hopper-core/src/runtime/object/option.rs new file mode 100644 index 0000000..8a31eb8 --- /dev/null +++ b/hopper-core/src/runtime/object/option.rs @@ -0,0 +1,100 @@ +//! Describe option type's fuzzing trait +//! +//! If T is an FFI-safe non-nullable pointer type, Option is guaranteed to have the same layout +//! and ABI as T and is therefore also FFI-safe. As of this writing, this covers &, &mut, and +//! function pointers, all of which can never be null. +//! +//! In FFI, Option is warpped with function pointer to bring `NULL` pointer in rust. +//! + +use eyre::Context; + +use crate::{config, ObjGenerate}; + +use super::*; + +impl ObjFuzzable for Option {} + +impl ObjValue for Option {} + +impl ObjType for Option {} + +impl ObjectSerialize for Option { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + if let Some(v) = self { + let sub_state = state + .last_child() + .with_context(|| format!("failed state: {state:?}"))?; + if sub_state.pointer.is_some() { + let s = v.serialize_obj(sub_state)?; + return Ok(format!("option {s}")); + } + } + // avoid data is "None" + Ok("option_ None".to_string()) + } +} + +impl ObjectTranslate for Option { + fn translate_obj_to_c( + &self, + state: &ObjectState, + program: &FuzzProgram, + ) -> eyre::Result { + if let Some(v) = self { + let s = v.translate_obj_to_c(state.last_child()?, program)?; + Ok(s) + } else { + Ok("NULL".to_string()) + } + } +} + +impl Serialize for Option { + fn serialize(&self) -> eyre::Result { + if let Some(v) = self { + let s = v.serialize()?; + Ok(format!("option {s}")) + } else { + // avoid data is "None" + Ok("option_ None".to_string()) + } + } +} + +impl ObjectDeserialize for Option { + fn deserialize_obj(de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result { + if !config::ENABLE_SET_FN_POINTER { + let _ = state.replace_weight(0); + } + state.done_deterministic(); + let sub_state = state + .add_child(FieldKey::Option, std::any::type_name::>()) + .last_child_mut()?; + if de.strip_token("option_ None") { + return Ok(None); + } + de.eat_token("option")?; + let ret = T::deserialize_obj(de, sub_state); + match ret { + Ok(obj) => Ok(Some(obj)), + Err(err) => { + if let Some(HopperError::NullFuncionPointer) = err.downcast_ref::() { + return Ok(None); + } + Err(err) + } + } + } +} + +impl Deserialize for Option { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + if de.strip_token("option_ None") { + return Ok(None); + } + de.eat_token("option")?; + let ret = T::deserialize(de)?; + Ok(Some(ret)) + } +} diff --git a/hopper-core/src/runtime/object/pointer.rs b/hopper-core/src/runtime/object/pointer.rs new file mode 100644 index 0000000..41ddf35 --- /dev/null +++ b/hopper-core/src/runtime/object/pointer.rs @@ -0,0 +1,312 @@ +//! Describe pointer's fuzzing triat +//! pointers include: *mut, *const +use super::*; +use crate::config; +use crate::ObjGenerate; + +macro_rules! impl_fuzz_pointer { + ($pointer:ident, $name:literal, $is_mut:tt) => { + impl $pointer { + pub fn new(ptr: *mut T) -> Self { + Self(ptr) + } + + pub fn get_inner(&self) -> *mut T { + self.0 + } + + /// Return a pointer which points to `loc` + pub fn loc_pointer(state: &mut ObjectState, loc: Location) -> Self { + state.pointer = Some(PointerState::new_pointer( + loc, + std::any::type_name::(), + $is_mut, + )); + state.done_deterministic(); + Self(::std::ptr::null_mut()) + } + + /// Return a null pointer + pub fn null(state: &mut ObjectState) -> Self { + state.done_deterministic(); + Self::loc_pointer(state, Location::null()) + } + + /// Return a stub pointer + pub fn stub(state: &mut ObjectState) -> Self { + state.done_deterministic(); + let p = Self::loc_pointer(state, Location::null()); + if let Some(ps) = &mut state.pointer { + ps.stub = true; + } + p + } + + /// Get length by memory size + pub fn get_length_by_mem_size(size: usize) -> usize { + let mut size = size; + // if it malloc a huge space (may used as arena) + if size > config::MAX_INPUT_SZIE { + size = config::MAX_INPUT_SZIE; + } + let obj_size = std::mem::size_of::(); + if obj_size == 0 { + size = 1; + } else if size > 1 { + let rem = size % obj_size; + size /= obj_size; + // the size could be not multiple of obj_size, + // e.g. arena, partial opaque + if rem > 0 { + size = 1; + } + } + size + } + + /// Check if the pointed memory remains untouched since allocated or is obtained through a null pointer plus an offset. + pub fn check_validity(&self) -> bool { + // TODO: > 0x8000000 can only filter out the situation when a null pointer is used as the base. however, chances are that + // a pointer with a different type is mistakenly feed in as a base pointer, and the addition of offset to the pointer makes it a + // illegal pointer. This usually happens when an argument has the `void *` type + (self.0 as usize) != config::UNINITIALIZED_MEMORY_MAGIC + && (self.0 as usize) > 0x300000 + } + } + + unsafe impl Sync for $pointer {} + unsafe impl Send for $pointer {} + + // Since we do not require T being Copy or Clone, + // and we do not copy *mut T actually, + // we implement Copy/Clone directly instead of derive.. + impl Copy for $pointer {} + impl Clone for $pointer { + fn clone(&self) -> Self { + // The pointer will be updated before `eval` load statements based on their location, + // and they will be cloned at clone arguments or fetch from returns at call statements, + // thus we should fill old pointer here. + // Also, we should avoid dangling pointer, so the pointers of programs in queue + // for mutating is null due to the specifical clone `clone_without_state`. + // Self(self.0) + *self + } + } + + impl ObjFuzzable for $pointer {} + + impl ObjValue for $pointer { + /// Get layout of the object + fn get_layout(&self, fold_ptr: bool) -> ObjectLayout { + let mut layout = + ObjectLayout::root(self.type_name(), self as *const Self as *mut u8); + if !fold_ptr && self.check_validity() && !self.0.is_null() { + // If the pointer has point-to sth, we add its layout with `pointer` key. + // To avoid recursion, we expand layout lazily. + // Also, we should ensure it is not a dangling pointer. + // the pointer key will only exist in return value in call stmt, + // since it is null in load stmt. + let ptr = self.0; + let mut ptr_layout = + ObjectLayout::root(std::any::type_name::(), ptr as *mut u8); + // lazy load + ptr_layout.set_lazy_loader(move |size: usize| { + if size == 0 { + return vec![]; + } + let len = Self::get_length_by_mem_size(size); + let v = unsafe { std::slice::from_raw_parts(ptr as *const T, len) }; + v.get_layout(fold_ptr).fields + }); + // serializer + ptr_layout.serializer = Some(Box::new(move |size: usize| { + if size == 0 { + return Ok("null".to_string()); + } + let len = Self::get_length_by_mem_size(size); + let v = unsafe { std::slice::from_raw_parts(ptr as *const T, len) }; + v.serialize() + })); + layout.add_field(FieldKey::Pointer, ptr_layout); + } + layout + } + fn get_ptr_by_keys(&self, keys: &[FieldKey]) -> eyre::Result<*mut u8> { + if keys.len() > 0 { + eyre::ensure!(keys[0] == FieldKey::Pointer, "the key should be pointer"); + if keys.len() == 1 { + return Ok(self.0 as *mut u8); + } + + if keys.len() > 1 { + if let FieldKey::Index(i) = keys[1] { + if let Some(p) = unsafe { self.0.add(i).as_ref() } { + return p.get_ptr_by_keys(&keys[2..]); + } + } + } + // we should ensure it is not a dangling pointer + if let Some(p) = unsafe { self.0.as_ref() } { + return p.get_ptr_by_keys(&keys[1..]); + } else { + eyre::bail!("wrong location"); + } + } + Ok(self as *const Self as *mut u8) + } + } + + impl ObjType for $pointer { + fn is_opaque() -> bool { + false + } + + fn add_fields_to_gadgets(gadgets: &mut ProgramGadgets) { + gadgets.add_type::(); + } + + fn cast_from(other: &FuzzObject) -> Box { + let ptr = other.get_ptr_by_keys(&[FieldKey::Pointer]).unwrap(); + Box::new(Self::new(ptr as *mut T)) + } + } + + impl ObjectSerialize for $pointer { + /// Serialize pointer + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + let loc = &state.get_pointer()?.pointer_location; + if loc.is_null() { + Ok(format!("{}* null", $name)) + } else { + Ok(format!("{}* {}", $name, loc.serialize()?)) + } + } + } + + impl ObjectTranslate for $pointer { + fn translate_obj_to_c( + &self, + state: &ObjectState, + program: &FuzzProgram, + ) -> eyre::Result { + let loc = &state.get_pointer()?.pointer_location; + crate::log!(trace, "pointer loc: {:?}", loc); + let trans = loc.translate_obj_to_c(state, program)?; + crate::log!(trace, "translate to: {}", trans); + Ok(trans) + } + } + + impl Serialize for $pointer { + /// Serialize pointer + fn serialize(&self) -> eyre::Result { + if self.0.is_null() { + Ok(format!("{}* null", $name)) + } else { + Ok(format!("{}* stub", $name)) + } + } + } + + impl ObjectDeserialize for $pointer { + fn deserialize_obj( + de: &mut Deserializer, + state: &mut ObjectState, + ) -> eyre::Result { + de.eat_token($name)?; + de.eat_token("*")?; + if de.strip_token("null") { + return Ok(Self::null(state)); + } + if de.strip_token("stub") { + return Ok(Self::stub(state)); + } + de.trim_start(); + let loc = Location::deserialize(de)?; + return Ok(Self::loc_pointer(state, loc)); + } + } + + impl Deserialize for $pointer { + fn deserialize(_de: &mut Deserializer) -> eyre::Result { + unimplemented!(); + } + } + }; +} + +impl_fuzz_pointer!(FuzzMutPointer, "mut", true); +impl_fuzz_pointer!(FuzzConstPointer, "const", false); + +impl FuzzFrozenPointer { + pub fn new() -> Self { + Self(std::ptr::null(), PhantomData) + } +} + +impl Default for FuzzFrozenPointer { + fn default() -> Self { + Self::new() + } +} + +unsafe impl Sync for FuzzFrozenPointer {} +unsafe impl Send for FuzzFrozenPointer {} + +impl ObjectSerialize for FuzzFrozenPointer { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + let _ = state.replace_weight(0); + Ok(String::from("frozen* null")) + } +} + +impl ObjectDeserialize for FuzzFrozenPointer { + fn deserialize_obj(de: &mut Deserializer, _state: &mut ObjectState) -> eyre::Result { + de.eat_token("frozen")?; + de.eat_token("* null")?; + Ok(Self::new()) + } +} + +impl Serialize for FuzzFrozenPointer { + fn serialize(&self) -> eyre::Result { + Ok(String::from("frozen* null")) + } +} + +impl Deserialize for FuzzFrozenPointer { + fn deserialize(_de: &mut Deserializer) -> eyre::Result { + unimplemented!(); + } +} + +impl ObjectTranslate for FuzzFrozenPointer { + fn translate_obj_to_c( + &self, + _state: &ObjectState, + _program: &FuzzProgram, + ) -> eyre::Result { + Ok("NULL".to_string()) + } +} + +impl Clone for FuzzFrozenPointer { + fn clone(&self) -> Self { + // Self(self.0, self.1) + *self + } +} + +impl Copy for FuzzFrozenPointer {} + +impl ObjType for FuzzFrozenPointer {} + +impl Debug for FuzzFrozenPointer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", std::any::type_name::()) + } +} + +impl ObjValue for FuzzFrozenPointer {} + +impl ObjFuzzable for FuzzFrozenPointer {} diff --git a/hopper-core/src/runtime/object/seq.rs b/hopper-core/src/runtime/object/seq.rs new file mode 100644 index 0000000..d072f2d --- /dev/null +++ b/hopper-core/src/runtime/object/seq.rs @@ -0,0 +1,356 @@ +//! Describe array's fuzzing trait +//! arrays are: [T; N], where T is fuzzable + +use std::mem::MaybeUninit; + +use super::*; +use crate::{utils, ObjGenerate}; + +impl ObjFuzzable + for [T; N] +{ +} + +impl ObjFuzzable for Vec {} + +impl ObjValue for [T] { + fn get_layout(&self, fold_ptr: bool) -> ObjectLayout { + let mut layout = ObjectLayout::root(self.type_name(), self.as_ptr() as *mut u8); + for (i, v) in self.iter().enumerate() { + layout.add_field(FieldKey::Index(i), v.get_layout(fold_ptr)); + } + layout + } + + fn get_ptr_by_keys(&self, keys: &[FieldKey]) -> eyre::Result<*mut u8> { + if keys.is_empty() { + return Ok(self as *const Self as *mut u8); + } + if let FieldKey::Index(i) = &keys[0] { + return self[*i].get_ptr_by_keys(&keys[1..]); + } + eyre::bail!("Key `{:?}` is not fit for sequence", keys); + } + fn get_length(&self) -> usize { + self.len() + } +} + +impl ObjValue for [T; N] { + fn get_layout(&self, fold_ptr: bool) -> ObjectLayout { + let mut layout = self.as_slice().get_layout(fold_ptr); + layout.type_name = self.type_name(); + layout + } + fn get_ptr_by_keys(&self, keys: &[FieldKey]) -> eyre::Result<*mut u8> { + self.as_slice().get_ptr_by_keys(keys) + } + fn get_length(&self) -> usize { + N + } +} + +impl ObjType for [T; N] { + fn is_opaque() -> bool { + // zero length array + // https://doc.rust-lang.org/nomicon/exotic-sizes.html#zero-sized-types-zsts + N == 0 + } + + fn add_fields_to_gadgets(gadgets: &mut ProgramGadgets) { + gadgets.add_type::(); + } +} + +impl ObjValue for Vec { + fn get_layout(&self, fold_ptr: bool) -> ObjectLayout { + let mut layout = self.as_slice().get_layout(fold_ptr); + layout.type_name = self.type_name(); + layout + } + fn get_ptr_by_keys(&self, keys: &[FieldKey]) -> eyre::Result<*mut u8> { + self.as_slice().get_ptr_by_keys(keys) + } + fn get_length(&self) -> usize { + self.len() + } +} + +impl ObjType for Vec {} + +impl ObjectSerialize for [T; N] { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + let mut buf = String::new(); + buf.push('['); + for (i, v) in self.iter().enumerate() { + buf.push_str(&v.serialize_obj(state.get_child(i)?)?); + buf.push_str(", "); + } + buf.push(']'); + Ok(buf) + } +} + +impl Serialize for [T; N] { + fn serialize(&self) -> eyre::Result { + let mut buf = String::new(); + buf.push('['); + for v in self.iter() { + buf.push_str(&v.serialize()?); + buf.push_str(", "); + } + buf.push(']'); + Ok(buf) + } +} + +impl ObjectTranslate for [T; N] { + fn translate_obj_to_c( + &self, + state: &ObjectState, + program: &FuzzProgram, + ) -> eyre::Result { + let mut buf = String::new(); + buf.push('{'); + for (i, v) in self.iter().enumerate() { + buf.push_str(&v.translate_obj_to_c(state.get_child(i)?, program)?); + buf.push_str(", "); + } + buf.push('}'); + Ok(buf) + } +} + +impl ObjectSerialize for Vec { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + if self.len() > 16 && utils::is_byte(std::any::type_name::()) { + let mut buf = String::new(); + buf.push_str("bvec("); + buf.push_str(&self.len().to_string()); + buf.push_str(")[\""); + serialize_bytes(&mut buf, self); + buf.push_str("\"]"); + return Ok(buf); + } + self.as_slice().serialize_obj(state) + } +} + +impl Serialize for Vec { + fn serialize(&self) -> eyre::Result { + self.as_slice().serialize() + } +} + +impl ObjectTranslate for Vec { + fn translate_obj_to_c( + &self, + state: &ObjectState, + program: &FuzzProgram, + ) -> eyre::Result { + let mut buf = String::new(); + buf.push('{'); + for (i, v) in self.iter().enumerate() { + buf.push_str(&v.translate_obj_to_c(state.get_child(i)?, program)?); + buf.push_str(", "); + } + buf.push('}'); + Ok(buf) + } +} + +impl Serialize for [T] { + fn serialize(&self) -> eyre::Result { + // serialize like vec + let mut buf = String::new(); + buf.push_str("vec("); + buf.push_str(&self.len().to_string()); + buf.push_str(")["); + for v in self.iter() { + buf.push_str(&v.serialize()?); + buf.push_str(", "); + } + buf.push(']'); + Ok(buf) + } +} + +impl ObjectSerialize for [T] { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result { + let mut buf = String::new(); + buf.push_str("vec("); + buf.push_str(&self.len().to_string()); + buf.push_str(")["); + for (i, v) in self.iter().enumerate() { + buf.push_str(&v.serialize_obj(state.get_child(i)?)?); + buf.push_str(", "); + } + buf.push(']'); + Ok(buf) + } +} + +fn deserialize_array( + de: &mut Deserializer, + mut f: impl FnMut(&mut Deserializer) -> eyre::Result, +) -> eyre::Result<[T; N]> { + if N == 0 { + unsafe { + return Ok(std::mem::zeroed()); + } + } + de.eat_token("[")?; + let mut output: MaybeUninit<[T; N]> = MaybeUninit::uninit(); + let arr_ptr = output.as_mut_ptr() as *mut T; + for i in 0..N { + let element = f(de)?; + unsafe { + arr_ptr.add(i).write(element); + } + de.eat_token(",")?; + } + de.eat_token("]")?; + let val = unsafe { output.assume_init() }; + Ok(val) +} + +impl ObjectDeserialize for [T; N] { + fn deserialize_obj(de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result { + deserialize_array(de, |de| deserialize_element_for_slice::(de, state)) + } +} + +impl Deserialize for [T; N] { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + deserialize_array(de, |de| T::deserialize(de)) + } +} + +fn deserialize_vec( + de: &mut Deserializer, + mut f: impl FnMut(&mut Deserializer) -> eyre::Result, +) -> eyre::Result> { + de.eat_token("vec(")?; + let len: usize = de.parse_number()?; + de.eat_token(")[")?; + let mut list = Vec::::with_capacity(len); + for _ in 0..len { + let element = f(de)?; + list.push(element); + de.eat_token(",")?; + } + de.eat_token("]")?; + Ok(list) +} + +impl ObjectDeserialize for Vec { + fn deserialize_obj(de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result { + if de.strip_token("bvec(") { + let len: usize = de.parse_number()?; + de.eat_token(")[\"")?; + let mut list = Vec::::with_capacity(len); + let buf = de.next_token_until("\"]")?; + let remaining = list.spare_capacity_mut(); // `&mut [MaybeUninit]` + deserialize_bytes(remaining, buf)?; + // add state + for i in 0..len { + let _ = state + .add_child(i, std::any::type_name::()) + .last_child_mut()?; + } + unsafe { list.set_len(len) }; + return Ok(list); + } + deserialize_vec(de, |de| deserialize_element_for_slice::(de, state)) + } +} + +impl Deserialize for Vec { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + deserialize_vec(de, |de| T::deserialize(de)) + } +} + +/// Deserialize element for slice +pub fn deserialize_element_for_slice( + de: &mut Deserializer, + state: &mut ObjectState, +) -> eyre::Result { + let sub_state = state + .add_child(state.children.len(), std::any::type_name::()) + .last_child_mut()?; + let element = T::deserialize_obj(de, sub_state)?; + // check_seq_element_state(element.type_name(), sub_state); + Ok(element) +} + +/* +#[inline] +pub fn check_seq_element_state(type_name: &str, state: &ObjectState) { + // avoid float det + if type_name == "f32" || type_name == "f64" { + state.done_deterministic_itself(); + } +} +*/ + +impl ObjectSerialize for String { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + self.serialize() + } +} + +impl Serialize for String { + fn serialize(&self) -> eyre::Result { + Ok(format!("\"{}\"", &self)) + } +} + +impl ObjectDeserialize for String { + fn deserialize_obj(de: &mut Deserializer, _state: &mut ObjectState) -> eyre::Result { + Self::deserialize(de) + } +} + +impl Deserialize for String { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.eat_token("\"")?; + let val = de.next_token_until("\"")?; + Ok(val.into()) + } +} + +#[test] +fn test_vec_serde() { + let s = "vec(3)[1, 2, 3, ]"; + let mut state = ObjectState::root("test", "Vec"); + let mut de = Deserializer::new(s, None); + let v2 = Vec::::deserialize_obj(&mut de, &mut state).unwrap(); + let s2 = ObjectSerialize::serialize_obj(&v2, &state).unwrap(); + assert_eq!(s, s2); +} + +pub fn serialize_bytes(buf: &mut String, seq: &[T]) { + let size = std::mem::size_of_val(seq); + let bytes = unsafe { std::slice::from_raw_parts(seq.as_ptr() as *const u8, size) }; + base64::encode_config_buf(bytes, base64::STANDARD, buf); +} + +pub fn deserialize_bytes(buf: &mut [T], raw: &str) -> eyre::Result<()> { + let size = std::mem::size_of_val(buf); + let buf = unsafe { std::slice::from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, size) }; + base64::decode_config_slice(raw, base64::STANDARD, buf)?; + Ok(()) +} + +#[test] +fn test_base64_seq() { + let seq = vec![1, -1, 771230]; + let mut buf = String::new(); + serialize_bytes(&mut buf, &seq); + println!("buf: {buf}"); + let mut seq2 = vec![0; 3]; + deserialize_bytes(&mut seq2, &buf).unwrap(); + println!("seq2: {seq2:?}"); + assert_eq!(seq, seq2); +} diff --git a/hopper-core/src/runtime/object/state.rs b/hopper-core/src/runtime/object/state.rs new file mode 100644 index 0000000..947b716 --- /dev/null +++ b/hopper-core/src/runtime/object/state.rs @@ -0,0 +1,692 @@ +//! State of object in load statement, +//! it stores state information for mutating, serde.. + +use std::{cell::RefCell, ptr::NonNull, rc::Rc}; + +use eyre::ContextCompat; + +use crate::{ + feedback::{CmpBuf, CmpState}, + fuzz::{MutateOperation, MutateOperator}, + CloneProgram, FieldEqual, FieldKey, FuzzProgram, HopperError, LocFields, Location, StmtIndex, +}; + +#[derive(Debug)] +pub struct ObjectState { + // -- tree struct of the state -- + /// key + pub key: FieldKey, + /// parent + pub parent: Option>, + /// children + pub children: Vec>, + // -- data the state holds -- + /// info used for mutation + pub mutate: Rc>, + /// info used for pointer + pub pointer: Option, + // -- info about the object underneath -- + /// if the object is an union + pub is_union: bool, + /// ty + pub ty: &'static str, +} + +// Check fields to support both &a[0] and &a +#[inline] +pub fn check_fields<'a>(fields: &'a [FieldKey], state: &ObjectState) -> &'a [FieldKey] { + if !fields.is_empty() + && fields.first() == Some(&FieldKey::Index(0)) + && state + .children + .first() + .map_or(true, |c| c.key != FieldKey::Index(0)) + { + return &fields[1..]; + } + fields +} + +macro_rules! error_handle_get_field { + ($state: ident, $key: ident) => {{ + // crate::log!(warn, "fail to find: {fields:?} in {:?}", self); + let field = $key.as_field_key(); + if let FieldKey::Index(i) = field { + if let Some(c) = $state.children.first() { + if matches!(c.key, FieldKey::Index(_)) && i >= $state.children.len() { + return Err(HopperError::IndexNotExist); + } + } else { + return Err(HopperError::IndexNotExist); + } + } + if $state.is_union { + return Err(HopperError::UnionErr); + } + return Err(HopperError::FieldNotFound(format!( + "fail to find child `{:?}` in `{:?}`, child: {:?}", + $key, + $state.key, + $state + .children + .iter() + .map(|c| &c.key) + .collect::>() + ))); + }}; +} + +impl ObjectState { + pub fn root(ident: T, ty: &'static str) -> Self { + let s = Self { + key: FieldKey::Root(ident.to_string()), + parent: None, + children: vec![], + mutate: Rc::new(RefCell::new(MutateState::default())), + pointer: None, + is_union: false, + ty, + }; + if s.is_private_field() { + s.mutate.borrow_mut().set_weight(0); + } + s + } + + /// Create child's state, which located by `key` + pub fn add_child>(&mut self, key: K, ty: &'static str) -> &mut Self { + let child = Box::new(Self { + key: key.into(), + parent: NonNull::new(self), + children: vec![], + mutate: Rc::new(RefCell::new(MutateState::default())), + pointer: None, + is_union: false, + ty, + }); + if child.is_private_field() || self.is_private_field() { + child.mutate.borrow_mut().set_weight(0); + } + self.children.push(child); + self + } + + pub fn add_child_at_offset(&mut self, offset: usize, ty: &'static str) -> &mut Self { + let child = Box::new(Self { + key: offset.into(), + parent: NonNull::new(self), + children: vec![], + mutate: Rc::new(RefCell::new(MutateState::default())), + pointer: None, + is_union: false, + ty, + }); + if self.is_private_field() { + child.mutate.borrow_mut().set_weight(0); + } + self.children.insert(offset, child); + self + } + + pub fn resort_children_indices(&mut self) { + for index in 0..self.children.len() { + if !self.children[index].key.is_index() { + break; + } + self.children[index].key = index.into(); + } + } + + pub fn clear(&mut self) { + self.children.clear(); + } + + pub fn set_ident(&mut self, ident: T) { + self.key = FieldKey::Root(ident.to_string()); + } + + /// Get its parent + pub fn get_parent(&self) -> Option<&ObjectState> { + self.parent.map(|p| unsafe { p.as_ref() }) + } + + /// Check if current field is private or not + /// private field's ident is starts with '_' + pub fn is_private_field(&self) -> bool { + match &self.key { + FieldKey::Root(ident) => { + if ident.starts_with('_') { + return true; + } + } + FieldKey::Field(ident) => { + if ident.starts_with('_') { + return true; + } + } + _ => { + if let Some(p) = self.get_parent() { + if p.is_private_field() { + return true; + } + } + } + } + false + } + + /// Get last reference of child's state + /// used in union's serialzie + pub fn last_child(&self) -> eyre::Result<&ObjectState> { + self.children + .last() + .context("fail to get last child") + .map(|v| v.as_ref()) + } + + /// Get last mut reference of child's state + /// used in generation and de-ser + pub fn last_child_mut(&mut self) -> eyre::Result<&mut ObjectState> { + self.children + .last_mut() + .context("fail to get last child") + .map(|v| v.as_mut()) + } + + /// Get reference of child's state by key + /// use in struct's serialize + pub fn get_child( + &self, + key: F, + ) -> Result<&ObjectState, HopperError> { + let ret = self.children.iter().find(|c| key.eq_field(&c.key)); + if ret.is_none() { + error_handle_get_field!(self, key); + }; + Ok(ret.unwrap()) + } + + /// Get mut reference of child's state by key + /// use in mutation + pub fn get_child_mut( + &mut self, + key: F, + ) -> Result<&mut ObjectState, HopperError> { + let ret = self.children.iter().position(|c| key.eq_field(&c.key)); + if let Some(index) = ret { + Ok(&mut self.children[index]) + } else { + error_handle_get_field!(self, key); + } + } + + /// Get reference of child's state by keys recursively + pub fn get_child_by_fields(&self, fields: &[FieldKey]) -> Result<&ObjectState, HopperError> { + let fields = check_fields(fields, self); + if fields.is_empty() { + return Ok(self); + } + let state = self.get_child(&fields[0])?; + state.get_child_by_fields(&fields[1..]) + } + + /// Get reference of child's mut state by keys recursively + pub fn get_child_mut_by_fields( + &mut self, + fields: &[FieldKey], + ) -> Result<&mut ObjectState, HopperError> { + let fields = check_fields(fields, self); + if fields.is_empty() { + return Ok(self); + } + let state = self.get_child_mut(&fields[0])?; + state.get_child_mut_by_fields(&fields[1..]) + } + + /// Get full path of the state + pub fn get_location_fields(&self) -> LocFields { + let mut fields = vec![]; + let mut st = self; + fields.push(st.key.clone()); + while let Some(p) = st.get_parent() { + fields.push(p.key.clone()); + st = p; + } + // remove root + fields.pop(); + fields.reverse(); + LocFields::new(fields) + } + + /// Get pointer + pub fn get_pointer(&self) -> eyre::Result<&PointerState> { + self.pointer + .as_ref() + .ok_or_else(|| eyre::eyre!("pointer has pointer state")) + } + + /// Get pointer mut + pub fn get_pointer_mut(&mut self) -> eyre::Result<&mut PointerState> { + self.pointer + .as_mut() + .ok_or_else(|| eyre::eyre!("pointer has pointer state")) + } + + // Get pointer statement + pub fn get_pointer_stmt_index(&self) -> Option<&StmtIndex> { + if let Some(ps) = &self.pointer { + return ps.pointer_location.stmt_index.as_ref(); + } + None + } + + /// Return mutate operator for current state + /// which used after mutation, and also increase the count of mutation + pub fn as_mutate_operator(&self, op: MutateOperation) -> MutateOperator { + let fields = self.inc_mutation().get_location_fields(); + let mut loc = Location::null(); + loc.fields = fields; + MutateOperator::new(loc, op) + } + + /// Inc(+1) mutations + pub fn inc_mutation(&self) -> &Self { + self.mutate.borrow_mut().inc_mutation(); + /* + let mut st = self; + while let Some(p) = st.get_parent() { + st = p; + (&st.mutate).borrow_mut().inc_mutation(); + } + */ + self + } + + /// Get child's position whose state with deterministic flag + pub fn get_deterministic_child_position(&self) -> Option { + self.children.iter().position(|s| s.is_deterministic()) + } + + /// if it is deterministic or not + pub fn is_deterministic(&self) -> bool { + self.mutate.borrow().deterministic + } + + /// Mark current state no-deterministic , and check its parent's state, + /// If all the children is no-deterministic, the parent become no-deter too. + pub fn done_deterministic(&self) { + self.done_deterministic_itself(); + self.update_deterministic(); + } + + /// Check parent's state of deterministic + pub fn update_deterministic(&self) { + let mut st = self; + while let Some(p) = st.get_parent() { + // parent may be vec/array/struct, + // we only propagate if parent is struct, + // since array/vec has det steps. + if let FieldKey::Field(_) = st.key { + st = p; + if st.get_deterministic_child_position().is_none() { + st.done_deterministic_itself(); + } else { + break; + } + } else { + break; + } + } + } + + /// Mark current state no-deterministic, do not check and modify its parent. + pub fn done_deterministic_itself(&self) { + let mut mutate_state = self.mutate.borrow_mut(); + // crate::log!(trace, "done state {:?}: {}", self.key, (*mutate_state).deterministic ); + if !mutate_state.deterministic { + return; + } + mutate_state.done_deterministic(); + for c in &self.children { + c.done_deterministic_itself(); + } + } + + /// If has any stmt in state's pointee satisfy `filter` + pub fn find_any_stmt_in_state_with bool>(&self, mut filter: F) -> bool { + let mut st = vec![self]; + while let Some(s) = st.pop() { + if let Some(i) = &s.get_pointer_stmt_index() { + if filter(i) { + return true; + } + } + for sub_state in s.children.iter() { + // only consider first element for sequence + if sub_state.children.is_empty() { + if let FieldKey::Index(_) = sub_state.key { + break; + } + } + st.push(sub_state); + } + } + false + } + + /// find all locations whose type is `type_name` in the object + pub fn find_fields_with bool>( + &self, + filter: F, + partial: bool, + ) -> Vec { + let mut found = vec![]; + let mut st = vec![self]; + while let Some(s) = st.pop() { + if filter(s) { + found.push(s.get_location_fields()); + } + for sub_state in s.children.iter() { + st.push(sub_state); + // only consider first element for sequence + if partial || sub_state.children.is_empty() { + if let FieldKey::Index(_) = sub_state.key { + break; + } + } + } + } + found + } + + /// Is null or not + pub fn is_null(&self) -> bool { + if let FieldKey::Option = self.key { + return self.pointer.is_none(); + } + if let Some(ps) = self.pointer.as_ref() { + return ps.pointer_location.is_null(); + } + false + } + + /// Is non-null or not + pub fn is_non_null(&self) -> bool { + if let FieldKey::Option = self.key { + return self.pointer.is_some(); + } + if let Some(ps) = self.pointer.as_ref() { + return !ps.pointer_location.is_null(); + } + false + } + + /// Show the state as a tree for debugging + pub fn show_tree(&self, depth: usize) { + println!( + "key: {:?}, depth: {}, addr: {:#x}, parent: {:?}", + self.key, depth, self as *const Self as usize, self.parent + ); + for c in &self.children { + c.show_tree(depth + 1); + let parent = c.get_parent().unwrap(); + assert!(parent.key == self.key, "parent is not match: {parent:?}"); + } + } + + /// Clone state without mutate state + pub fn clone_without_mutate_info(&self, parent: Option>) -> Box { + let mut s = Box::new(Self { + key: self.key.clone(), + parent, + children: vec![], + mutate: Rc::new(RefCell::new(MutateState::default())), + pointer: self.pointer.as_ref().map(|s| s.shallow_clone()), + is_union: self.is_union, + ty: self.ty, + }); + s.children = self + .children + .iter() + .map(|c| c.clone_without_mutate_info(NonNull::new(s.as_mut()))) + .collect(); + s + } + + /// Copy child's state from `from` to `to` + pub fn dup_child_state(&mut self, from: usize, to: usize) { + crate::log!( + trace, + "copy {from} to {to}, #child: {}", + self.children[from].children.len() + ); + debug_assert!(from < self.children.len() || to < self.children.len()); + let parent = NonNull::new(self); + let mut s = self.children[from].clone_without_mutate_info(parent); + s.key = to.into(); + if s.is_private_field() { + s.mutate.borrow_mut().set_weight(0); + } + self.children.insert(to, s); + } +} + +#[derive(Debug, Clone)] +pub struct MutateState { + pub num_mutations: usize, + pub weight: usize, + pub deterministic: bool, + pub det_iter: usize, + pub related_cmps: Vec, + pub cmp_bufs: Vec, +} + +impl Default for MutateState { + fn default() -> Self { + Self { + num_mutations: 0, + weight: 1, + deterministic: true, + det_iter: 0, + related_cmps: vec![], + cmp_bufs: vec![], + } + } +} + +impl MutateState { + /// Done deterministic step + pub fn done_deterministic(&mut self) { + self.deterministic = false; + } + + /// Count mutation + pub fn inc_mutation(&mut self) { + self.num_mutations += 1; + } + + /// Get mutation + pub fn get_mutation(&self) -> usize { + self.num_mutations + } + + /// Set weight + pub fn set_weight(&mut self, weight: usize) { + self.weight = weight; + } + + /// Get weight + pub fn get_weight(&self) -> usize { + self.weight + } + + /// Is zero weight or not ? + pub fn is_zero_weight(&self) -> bool { + self.weight == 0 + } + + /// Mutation on the `loc` affect cmp's value + pub fn affect_cmp(&mut self, cmp_state: CmpState) { + // if self.related_cmps.iter_mut().any(|c| c.id == cmp_state.id) { + // return; + // } + self.related_cmps.push(cmp_state); + // increase weights + self.weight = 1 + self + .related_cmps + .iter() + .filter(|c| c.op.borrow().is_solved()) + .count(); + // avoid huge weight + if self.weight > 5 { + self.weight = 5; + } + } + + /// Mutation on the `loc` affect cmp function + pub fn affect_cmp_buf(&mut self, cmp_buf: CmpBuf) { + // if self.cmp_bufs.iter_mut().any(|c| c.id == cmp_buf.id) { + // return; + // } + self.cmp_bufs.push(cmp_buf); + } + + /// Move to next deterministic iteration + pub fn next_det_iter(&mut self) { + self.det_iter += 1; + } + + pub fn set_det_iter(&mut self, iter: usize) { + self.det_iter = iter; + } +} + +#[derive(Debug)] +pub struct PointerState { + /// Inner type of pointer + pub pointer_type: &'static str, + /// Is mut + pub is_mut: bool, + /// Location of pointer + pub pointer_location: Location, + /// stub for mock sth + pub stub: bool, +} + +impl PointerState { + /// New state for pointer + pub fn new_pointer(loc: Location, pointer_type: &'static str, is_mut: bool) -> Self { + Self { + pointer_type, + is_mut, + pointer_location: loc, + stub: false, + } + } + + /// New state for fn pointer + pub fn new_fn_pointer(pointer_type: &'static str, is_mut: bool) -> Self { + Self { + pointer_type, + is_mut, + // fn pointer fill pointer directly, thus it does not need location + pointer_location: Location::null(), + stub: false, + } + } + + /// Shallow clone, that clone the location directly + pub fn shallow_clone(&self) -> Self { + Self { + pointer_type: self.pointer_type, + is_mut: self.is_mut, + pointer_location: self.pointer_location.use_loc(), + stub: self.stub, + } + } +} + +impl CloneProgram for Box { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + let mut new_state = Box::new(ObjectState { + key: self.key.clone(), + mutate: self.mutate.clone(), + pointer: self.pointer.clone_with_program(program), + parent: self.parent, + children: vec![], + is_union: self.is_union, + ty: self.ty, + }); + let parent_ptr = NonNull::new(new_state.as_mut()); + for c in self.children.iter() { + let mut new_c = c.clone_with_program(program); + new_c.parent = parent_ptr; + new_state.children.push(new_c); + } + new_state + } +} + +impl CloneProgram for Option { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + self.as_ref().map(|ps| PointerState { + pointer_type: ps.pointer_type, + is_mut: ps.is_mut, + pointer_location: ps.pointer_location.clone_with_program(program), + stub: ps.stub, + }) + } +} + +#[test] +fn test_state_inc() { + let mut program = FuzzProgram::default(); + let state = Box::new(ObjectState::root("test", "")); + assert_eq!(state.mutate.borrow().num_mutations, 0); + let state2 = state.clone_with_program(&mut program); + state2.inc_mutation(); + assert_eq!(state.mutate.borrow().num_mutations, 1); +} + +#[test] +fn test_get_child_and_clone_state() { + use crate::Serialize; + let mut program = FuzzProgram::default(); + let state2 = { + let mut state = Box::new(ObjectState::root("test", "")); + let sub = state + .add_child("xx".to_string(), "") + .last_child_mut() + .unwrap() + .add_child("bb".to_string(), "") + .add_child("cc".to_string(), "") + .last_child_mut() + .unwrap(); + + let fields = sub.get_location_fields(); + assert_eq!(fields.serialize().unwrap(), "[xx.cc]"); + let mut new_state = state.clone_with_program(&mut program); + new_state.add_child("dd".to_string(), ""); + state.show_tree(0); + new_state + }; + + state2.show_tree(0); + let sub1 = state2.get_child("xx").unwrap(); + assert_eq!(sub1.key, "xx".to_string().into()); + let sub2 = sub1.get_child("bb").unwrap(); + assert_eq!(sub2.key, "bb".to_string().into()); + let sub3 = state2.get_child_by_fields(&[]).unwrap(); + assert_eq!(sub3.key, FieldKey::Root("test".to_string())); + let sub4 = state2 + .get_child_by_fields(&["xx".to_string().into()]) + .unwrap(); + assert_eq!(sub4.key, "xx".to_string().into()); + let sub5 = state2 + .get_child_by_fields(&["xx".to_string().into(), "bb".to_string().into()]) + .unwrap(); + assert_eq!(sub5.key, "bb".to_string().into()); + sub5.inc_mutation(); + let fields = sub5.get_location_fields(); + assert_eq!(fields.serialize().unwrap(), "[xx.bb]"); +} diff --git a/hopper-core/src/runtime/object/void.rs b/hopper-core/src/runtime/object/void.rs new file mode 100644 index 0000000..a945c2c --- /dev/null +++ b/hopper-core/src/runtime/object/void.rs @@ -0,0 +1,71 @@ +//! Describe void's fuzzing trait, +//! There are two kinds of void: +//! 1) RetVoid: return void, which is similar to RUST's `()` +//! 2) ArgVoid: void in arguments, or pointer, struct ..., we use cvoid directly. + +use super::*; + +/// Void for return +pub type RetVoid = (); + +macro_rules! impl_void { + ($void:ident) => { + impl ObjFuzzable for $void {} + + impl ObjValue for $void {} + + impl ObjType for $void { + fn is_void() -> bool { + true + } + fn is_opaque() -> bool { + true + } + } + + impl ObjectSerialize for $void { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + Ok("void".to_string()) + } + } + + impl Serialize for $void { + fn serialize(&self) -> eyre::Result { + Ok("void".to_string()) + } + } + + impl ObjectTranslate for $void {} + + impl ObjectDeserialize for $void { + fn deserialize_obj( + de: &mut Deserializer, + state: &mut ObjectState, + ) -> eyre::Result { + let _ = state.replace_weight(0); + state.done_deterministic(); + de.eat_token("void")?; + Ok(Self::default()) + } + } + + impl Deserialize for $void { + fn deserialize(_de: &mut Deserializer) -> eyre::Result { + unimplemented!(); + } + } + }; +} + +impl_void!(FuzzVoid); +impl_void!(RetVoid); + +impl Clone for FuzzVoid { + fn clone(&self) -> Self { + // unreachable!("void can't be clone!") + Self(0_u8) + } +} + +unsafe impl Sync for FuzzVoid {} +unsafe impl Send for FuzzVoid {} diff --git a/hopper-core/src/runtime/program.rs b/hopper-core/src/runtime/program.rs new file mode 100644 index 0000000..9272aaa --- /dev/null +++ b/hopper-core/src/runtime/program.rs @@ -0,0 +1,499 @@ +//! Program for fuzzing + +use std::fmt::Write as _; +use std::{ + cell::RefCell, + fmt, + hash::{Hash, Hasher}, + rc::Rc, +}; + +use super::*; +use crate::{ + feedback::{CmpOperation, ResourceStates, ReviewCollector, SanitizeChecker}, + log, MutateOperator, RngState, +}; + +/// Program for fuzzing +#[derive(Debug, Default)] +pub struct FuzzProgram { + // Id of ther current program, + pub id: usize, + // This program's parent + pub parent: Option, + /// Statements + pub stmts: Vec, + /// Compare operations, we can mutate them after clone + pub cmps: Rc>>>, + /// Operators + pub ops: Vec, + /// Temperal indices + pub tmp_indices: Vec, + /// rng + pub rng: Option, + /// mutate flag + pub mutate_flag: u8, +} + +impl FuzzProgram { + /// Eval this program + pub fn eval(&mut self) -> eyre::Result<()> { + reset_rt_stmt_index(); + let mut resource_states = ResourceStates::default(); + for i in 0..self.stmts.len() { + let (used_stmts, unused) = self.stmts.split_at_mut(i); + log!(trace, "{}", unused[0].serialize()?.trim_end()); + unused[0].stmt.eval(used_stmts, &mut resource_states)?; + inc_rt_stmt_index(); + } + set_rt_last_stmt_index(); + Ok(()) + } + + /// Review the program + pub fn review(&mut self) -> eyre::Result<()> { + static mut REVIEWING: bool = false; + static mut STMTS: *const IndexedStmt = std::ptr::null(); + static mut COLLECTOR: *mut ReviewCollector = std::ptr::null_mut(); + static mut STATE: *const ResourceStates = std::ptr::null(); + extern "C" fn on_exit() { + // it can't hook SIGKILL + if unsafe { !REVIEWING } { + return; + } + #[cfg(not(test))] + unsafe { + crate::utils::LOG_COND = false + }; + let instrs = crate::feedback::get_instr_list(); + let index = instrs.last_stmt_index(); + println!("exit at {index}.."); + let used_stmts = unsafe { std::slice::from_raw_parts(STMTS, index) }; + let cur = unsafe { &*STMTS.add(index) }; + if let FuzzStmt::Call(call) = &cur.stmt { + let rc: &mut ReviewCollector = unsafe { &mut *COLLECTOR }; + let state: &ResourceStates = unsafe { &*STATE }; + println!("call {}..", call.name); + // logger is thead local and can not call at exit, + // so we should sure this function won't log sth by RUST_LOG=info + let ret = rc.collect_call_review(call, index, used_stmts, state); + if let Err(err) = ret { + println!("err: {err:?}"); + } + } + unsafe { + REVIEWING = false; + } + } + let mut review_collector = Box::new(ReviewCollector::new(self.id)?); + let mut resource_states = Box::::default(); + unsafe { + libc::atexit(on_exit); + REVIEWING = true; + STMTS = self.stmts.as_ptr(); + COLLECTOR = review_collector.as_mut() as *mut ReviewCollector; + STATE = resource_states.as_ref() as *const ResourceStates; + } + reset_rt_stmt_index(); + resource_states.set_review(); + for i in 0..self.stmts.len() { + let (used_stmts, unused) = self.stmts.split_at_mut(i); + log!(trace, "{}", unused[0].serialize()?.trim_end()); + unused[0].stmt.eval(used_stmts, &mut resource_states)?; + if let FuzzStmt::Call(call) = &unused[0].stmt { + review_collector.collect_call_review(call, i, used_stmts, &resource_states)?; + } + inc_rt_stmt_index(); + } + set_rt_last_stmt_index(); + unsafe { + REVIEWING = false; + } + Ok(()) + } + + /// Sanitize the program, try to find false positive in crashes + pub fn sanitize(&mut self) -> eyre::Result<()> { + let mut checker = SanitizeChecker::new()?; + reset_rt_stmt_index(); + let mut resource_states = ResourceStates::default(); + resource_states.set_review(); + for i in 0..self.stmts.len() { + let (used_stmts, unused) = self.stmts.split_at_mut(i); + log!(trace, "{}", unused[0].serialize()?.trim_end()); + checker.check_before_eval_stmt(&unused[0], used_stmts, &resource_states)?; + unused[0].stmt.eval(used_stmts, &mut resource_states)?; + inc_rt_stmt_index(); + } + set_rt_last_stmt_index(); + Ok(()) + } + + /// Append new statement + pub fn append_stmt>(&mut self, stmt: T) -> StmtIndex { + let indexed_stmt = IndexedStmt::new(stmt.into(), self.stmts.len()); + let use_index = indexed_stmt.index.use_index(); + self.stmts.push(indexed_stmt); + use_index + } + + /// Insert new statement + pub fn insert_stmt>(&mut self, index: usize, stmt: T) -> StmtIndex { + let indexed_stmt = IndexedStmt::new(stmt.into(), index); + let use_index = indexed_stmt.index.use_index(); + self.stmts.insert(index, indexed_stmt); + self.resort_indices(); + use_index + } + + /// Insert or append new statement + pub fn insert_or_append_stmt>(&mut self, stmt: T) -> eyre::Result { + let stmt = if let Some(stub_index) = self.get_stub_stmt_index() { + // mutate mode + self.insert_stmt(stub_index.get(), stmt) + } else { + self.append_stmt(stmt) + }; + Ok(stmt) + } + + /// Delete statment at `index` + pub fn delete_stmt(&mut self, index: usize) { + // log!(trace, "delete stmt index: {:?}", index); + let _is = self.stmts.remove(index); + // self.tmp_indices.push(stmt.index); + self.resort_indices(); + } + + /// Resort indices of statement, + /// it was called after insert or delete statements + pub fn resort_indices(&mut self) { + for (i, indexed_stmt) in self.stmts.iter_mut().enumerate() { + indexed_stmt.index.set(i); + } + } + + /// Withdraw the statement we have lent + pub fn withdraw_stmt(&mut self, stmt: FuzzStmt) -> eyre::Result { + let is = self + .stmts + .iter_mut() + .find(|is| is.stmt.is_stub()) + .ok_or_else(|| eyre::eyre!("can't find any stub stmt"))?; + let index = is.index.use_index(); + let _ = std::mem::replace(&mut is.stmt, stmt); + Ok(index) + } + + /// Save mutate state for replay + pub fn save_mutate_state(&mut self) { + let rng_cur = crate::save_rng_state(); + self.rng = Some(rng_cur); + self.mutate_flag = crate::get_mutate_flag(); + self.ops = vec![]; + } + + /// Get index that is stub type + pub fn get_stub_stmt_index(&self) -> Option { + self.stmts + .iter() + .find(|is| is.stmt.is_stub()) + .map(|is| is.index.use_index()) + } + + /// Get failure stmt + pub fn get_fail_stmt_index(&self) -> Option { + self.stmts + .iter() + .find(|is| { + if let FuzzStmt::Call(call) = &is.stmt { + if call.failure { + return true; + } + } + false + }) + .map(|is| is.index.use_index()) + } + + /// Get statement by loc's index + pub fn get_stmt_by_loc(&self, loc: &impl RcLocation) -> eyre::Result<&IndexedStmt> { + self.stmts + .get(loc.get_index()?.get()) + .ok_or_else(|| eyre::eyre!("index in loc out of bound stmt")) + } + + /// Get i-th stmt + pub fn get_stmt(&self, index: usize) -> eyre::Result<&IndexedStmt> { + if let Some(is) = self.stmts.get(index) { + return Ok(is); + } + eyre::bail!(format!( + "fail to get {index} in program, length: {}", + self.stmts.len() + )) + } + + /// Get target stmt + pub fn get_target_stmt(&self) -> Option<&CallStmt> { + if let Some(is) = self.stmts.last() { + match &is.stmt { + FuzzStmt::Call(call) => return Some(call), + FuzzStmt::Assert(assert) => { + if let Some(stmt) = assert.get_stmt() { + if let FuzzStmt::Call(call) = &self.stmts[stmt.get()].stmt { + return Some(call); + } + } + } + _ => {} + } + } + None + } + + /// Get index of target stmt + pub fn get_target_index(&self) -> Option { + if let Some(is) = self.stmts.last() { + match &is.stmt { + FuzzStmt::Call(_) => return Some(is.index.get()), + FuzzStmt::Assert(assert) => { + if let Some(stmt) = assert.get_stmt() { + if let FuzzStmt::Call(_) = &self.stmts[stmt.get()].stmt { + return Some(stmt.get()); + } + } + } + _ => { + crate::log!(error, "last stmt is: {:?}", is.serialize().unwrap()); + } + } + } + None + } + + /// Get i-th call stmt + pub fn get_call_stmt(&self, index: usize) -> Option<&CallStmt> { + if let Some(is) = self.stmts.get(index) { + if let FuzzStmt::Call(call) = &is.stmt { + return Some(call); + } + } + None + } + + /// Get i-th call stmt + pub fn get_call_stmt_mut(&mut self, index: usize) -> Option<&mut CallStmt> { + if let Some(is) = self.stmts.get_mut(index) { + if let FuzzStmt::Call(call) = &mut is.stmt { + return Some(call); + } + } + None + } + + /// Get the failed call stmt + pub fn get_fail_call_stmt(&self) -> Option<&CallStmt> { + if let Some(fail_at) = self.get_fail_stmt_index() { + if let Some(call) = self.get_call_stmt(fail_at.get()) { + return Some(call); + } + } + None + } + + /// Get stmt by index uniq + pub fn get_stmt_by_index_uniq(&self, index: &T) -> Option<&IndexedStmt> { + let index_uniq = index.get_uniq(); + let index_val = index.get(); + // return quickly if they are matched + if index_val < self.stmts.len() { + let is = &self.stmts[index.get()]; + if is.index.get_uniq() == index_uniq { + return Some(is); + } + } + self.stmts + .iter() + .find(|is| is.index.get_uniq() == index_uniq) + } + + /// Get mut stmt by index uniq + pub fn get_mut_stmt_by_index_uniq( + &mut self, + index: &T, + ) -> Option<&mut IndexedStmt> { + let index_uniq = index.get_uniq(); + self.stmts + .iter_mut() + .find(|is| is.index.get_uniq() == index_uniq) + } + + /// Get position of stmt by uniq + pub fn position_stmt_by_index_uniq(&mut self, index: &T) -> Option { + let index_uniq = index.get_uniq(); + self.stmts + .iter() + .position(|is| is.index.get_uniq() == index_uniq) + } + + /// Clone without state information + pub fn clone_without_state(&self) -> eyre::Result { + let output = self.serialize()?; + let mut p = read_program(&output, false)?; + // clone ops + let op_output = self.ops.serialize()?; + let mut de = Deserializer::new(&op_output, Some(&mut p)); + let ops = Vec::::deserialize(&mut de)?; + p.ops = ops; + p.rng = self.rng.clone(); + p.mutate_flag = self.mutate_flag; + Ok(p) + } + + /// Set all calls's track_cov + pub fn set_calls_track_cov(&mut self, track_cov: bool) -> bool { + let mut changed = false; + for is in self.stmts.iter_mut() { + if let FuzzStmt::Call(call) = &mut is.stmt { + if call.track_cov != track_cov { + call.track_cov = track_cov; + changed = true; + } + } + } + changed + } + + /// find all track calls + pub fn get_track_calls(&self) -> Vec { + let mut track_calls = vec![]; + for is in &self.stmts { + if let FuzzStmt::Call(call) = &is.stmt { + if call.track_cov { + track_calls.push(is.index.get_uniq()); + } + } + } + track_calls + } + + /// Check if any tmp index is unused, and then clear all of them + pub fn clear_tmp_indices(&mut self) -> eyre::Result<()> { + for i in &self.tmp_indices { + if i.get_ref_used() == 1 { + log!(warn, "program: {}", self.serialize()?); + eyre::bail!("index {:?} is unused", i); + } + } + self.tmp_indices.clear(); + Ok(()) + } + + /// serialize all with rng, ops ... + pub fn serialize_all(&self) -> eyre::Result { + let mut buf = self.serialize()?; + if let Some(rng) = &self.rng { + let _ = writeln!(buf, " {}", rng.serialize()?); + } + let _ = writeln!(buf, " {}", self.mutate_flag); + if !self.ops.is_empty() { + let _ = writeln!(buf, " {}", self.ops.serialize()?); + } + Ok(buf) + } +} + +pub trait CloneProgram { + /// Clone with program + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self; +} + +impl Clone for FuzzProgram { + fn clone(&self) -> Self { + let mut p = Self { + id: self.id, + parent: self.parent, + stmts: vec![], + cmps: self.cmps.clone(), + ops: vec![], + tmp_indices: vec![], + rng: self.rng.clone(), + mutate_flag: self.mutate_flag, + }; + for stmt in self.stmts.iter() { + let stmt = stmt.clone_with_program(&mut p); + p.stmts.push(stmt); + } + p.clear_tmp_indices().unwrap(); + p + } +} + +impl PartialEq for FuzzProgram { + fn eq(&self, other: &FuzzProgram) -> bool { + self.id == other.id + } +} + +impl Eq for FuzzProgram {} + +impl Hash for FuzzProgram { + fn hash(&self, state: &mut H) { + self.id.hash(state); + } +} + +/// Just disply sth +impl fmt::Display for FuzzProgram { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.serialize_all().unwrap()) + } +} + +// Used in updating stmt_index for cmp feeback +#[cfg(feature = "e9_mode")] +extern "C" { + // defined in asm.S + fn __hopper_inc_stmt_index(); + fn __hopper_reset_stmt_index(); + fn __hopper_last_stmt_index(); +} + +#[inline] +fn inc_rt_stmt_index() { + #[cfg(all(feature = "e9_mode", not(test)))] + unsafe { + __hopper_inc_stmt_index() + } +} + +#[inline] +fn reset_rt_stmt_index() { + #[cfg(all(feature = "e9_mode", not(test)))] + unsafe { + __hopper_reset_stmt_index() + } +} + +#[inline] +fn set_rt_last_stmt_index() { + #[cfg(all(feature = "e9_mode", not(test)))] + unsafe { + __hopper_last_stmt_index() + } +} + +#[test] +fn test_stmt_insert() { + let mut program = FuzzProgram::default(); + let load = LoadStmt::generate_new(&mut program, "u64", "test", 0).unwrap(); + let _index = program.append_stmt(load); + let load2 = LoadStmt::generate_new(&mut program, "i32", "test", 0).unwrap(); + let _index2 = program.insert_stmt(0, load2); + let load3 = LoadStmt::generate_new(&mut program, "char", "test", 0).unwrap(); + let _index3 = program.insert_stmt(0, load3); + for (i, stmt) in program.stmts.iter().enumerate() { + assert!(stmt.index.get() == i) + } +} diff --git a/hopper-core/src/runtime/serde.rs b/hopper-core/src/runtime/serde.rs new file mode 100644 index 0000000..fb4fb3b --- /dev/null +++ b/hopper-core/src/runtime/serde.rs @@ -0,0 +1,375 @@ +//! Trait for serialize and deserialize program + +use std::str::FromStr; + +use eyre::Context; +use num::Float; + +use super::*; + +/// Trait for serialize runtime irs without states, e.g. program, call, value +pub trait Serialize { + fn serialize(&self) -> eyre::Result; +} + +/// Serialize for object values with states +pub trait ObjectSerialize { + fn serialize_obj(&self, state: &ObjectState) -> eyre::Result; +} + +/// Structure that maintains string buffer, and implements useful functions for deserialize. +pub struct Deserializer<'a> { + pub buf: &'a str, + /// program is used to serialize/deserialize index + pub program: Option<&'a mut FuzzProgram>, + pub canary: bool, +} + +/// Trait for deserialize runtime irs without states, e.g. program, call +pub trait Deserialize: Sized { + fn deserialize(de: &mut Deserializer) -> eyre::Result; +} + +/// Deserialize for object values with states +pub trait ObjectDeserialize: Sized { + fn deserialize_obj(de: &mut Deserializer, state: &mut ObjectState) -> eyre::Result; +} + +impl<'a> Deserializer<'a> { + pub fn new(buf: &'a str, program: Option<&'a mut FuzzProgram>) -> Self { + let buf = buf.trim(); + Self { + buf, + program, + canary: false, + } + } + + pub fn peek_char(&self) -> Option { + self.buf.chars().next() + } + + pub fn next_char(&mut self) -> Option { + let mut chars = self.buf.chars(); + let c = chars.next(); + self.buf = chars.as_str(); + c + } + + pub fn strip_token(&mut self, token: &str) -> bool { + if let Some(buf) = self.buf.strip_prefix(token) { + self.buf = buf; + return true; + } + false + } + + pub fn eat_token(&mut self, token: &str) -> eyre::Result<()> { + self.buf = self + .buf + .strip_prefix(token) + .ok_or_else(|| eyre::eyre!("`{}` should start with `{}`", self.buf, token))?; + self.trim_start(); + Ok(()) + } + + pub fn next_token_until(&mut self, del: &str) -> eyre::Result<&'a str> { + let (token, buf) = self + .buf + .split_once(del) + .ok_or_else(|| eyre::eyre!("`{}` can not split by `{}`", self.buf, del))?; + self.buf = buf; + self.trim_start(); + Ok(token) + } + + pub fn eat_ty(&mut self) -> eyre::Result<&'a str> { + // crate::log!(warn, "self.buf before: {:?}", self.buf); + let mut opened = 0; + let mut index = 0; + let mut it = self.buf.chars(); + while let Some(c) = it.next() { + // crate::log!(warn, "c: {}, opened: {}", c, opened); + match c { + '<' | '(' | '[' => { + opened += 1; + index += 1; + } + '>' | ')' | ']' => { + opened -= 1; + index += 1; + } + ',' => { + if opened == 0 { + break; + } + index += 1; + } + '-' => { + it.next(); + index += 2; + } + _ => { + index += 1; + } + } + } + let (ty, buf) = self.buf.split_at(index); + self.buf = buf; + // crate::log!(warn, "self.buf: {:?}", self.buf); + self.eat_token(",") + .with_context(|| format!("failed to eat ty: ty: {ty} buf: {buf}"))?; + self.trim_start(); + Ok(ty) + } + + pub fn parse_next_until(&mut self, del: &str) -> eyre::Result { + let token = self.next_token_until(del)?; + match token.parse() { + Ok(v) => Ok(v), + Err(_) => Err(eyre::eyre!("fail to parse `{}`", token)), + } + } + + pub fn parse_float(&mut self) -> eyre::Result { + if self.strip_token("NaN") { + return Ok(Float::nan()); + } + if self.strip_token("inf") { + return Ok(Float::infinity()); + } + if self.strip_token("-inf") { + return Ok(Float::neg_infinity()); + } + let mut len = 0; + for c in self.buf.chars() { + match c { + '0'..='9' | '-' | '.' => {} + _ => { + break; + } + } + len += 1; + } + eyre::ensure!(len > 0, "`{}` is not float`", self.buf); + let token = &self.buf[..len]; + self.buf = &self.buf[len..]; + self.trim_start(); + match token.parse() { + Ok(v) => Ok(v), + Err(_) => Err(eyre::eyre!("fail to parse `{}` as float", token)), + } + } + + pub fn parse_number(&mut self) -> eyre::Result { + let mut len = 0; + for c in self.buf.chars() { + match c { + '0'..='9' | '-' => {} + _ => { + break; + } + } + len += 1; + } + eyre::ensure!(len > 0, "`{}` is not number`", self.buf); + let token = &self.buf[..len]; + self.buf = &self.buf[len..]; + self.trim_start(); + match token.parse() { + Ok(v) => Ok(v), + Err(_) => Err(eyre::eyre!("fail to parse `{}` as number", token)), + } + } + + pub fn parse_string(&mut self) -> eyre::Result { + let mut len = 0; + for c in self.buf.chars() { + match c { + '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => {} + _ => { + break; + } + } + len += 1; + } + eyre::ensure!(len > 0, "`{}` is not string`", self.buf); + let token = &self.buf[..len]; + self.buf = &self.buf[len..]; + self.trim_start(); + Ok(token.to_string()) + } + + pub fn trim_start(&mut self) { + self.buf = self.buf.trim_start(); + } + + pub fn as_str(&self) -> &str { + self.buf + } +} + +#[inline] +fn serialize_option_str(out: &mut String, opt: &Option) { + if let Some(inner) = opt { + out.push_str(&inner.to_string()); + } else { + out.push_str("None"); + } +} + +/// Serialize FuzzProgram +/// +/// The output looks like: +/// ID: 1, Prarent: None +/// <0> load [type] = [data] +/// <1> call [func_name] (<1>, <2>) +/// +/// ... +impl Serialize for FuzzProgram { + fn serialize(&self) -> eyre::Result { + let mut out = String::with_capacity(1024); + out.push_str("
ID: "); + out.push_str(&self.id.to_string()); + out.push_str(", Parent: "); + serialize_option_str(&mut out, &self.parent); + out.push_str(",\n"); + for indexed_stmt in self.stmts.iter() { + let stmt_str = &indexed_stmt + .serialize() + .with_context(|| format!("fail to serialize stmt: {indexed_stmt:?}"))?; + out.push_str(stmt_str); + } + out.push_str("\n"); + Ok(out) + } +} + +/// Deseialize FuzzProgram +/// +/// We do implement the deserialize function here, and use `read_program` instead. +impl Deserialize for FuzzProgram { + fn deserialize(_de: &mut Deserializer) -> eyre::Result { + // use `read_program` for deserialize + unimplemented!(); + } +} + +/// Read program from a reader argument. +/// +/// Invokes deserialize function recursively. +pub fn read_program(buf: &str, canary: bool) -> eyre::Result { + let mut program = FuzzProgram::default(); + let mut lines = buf.lines(); + let first = match lines.next() { + Some(l) => l, + _ => { + eyre::bail!("fail to read first line from buffer"); + } + }; + // parse header + let mut de = Deserializer::new(first, None); + de.eat_token("
")?; + de.eat_token("ID:")?; + let id: usize = de.parse_number().context("fail to parse id")?; + de.eat_token(",")?; + de.eat_token("Parent:")?; + let mut parent = None; + if !de.strip_token("None") { + parent = Some(de.parse_number().context("fail to parse parent")?); + } + de.eat_token(",")?; + program.id = id; + program.parent = parent; + // parse stmts + for line in lines { + let mut de = Deserializer::new(line, Some(&mut program)); + de.canary = canary; + if de.strip_token("") { + break; + } + let is = IndexedStmt::deserialize(&mut de) + .with_context(|| format!("fail to deserialize: {line}"))?; + program.stmts.push(is); + } + program.clear_tmp_indices()?; + Ok(program) +} + +/// Parse ops, flags, and rng state +pub fn parse_program_extra(buf: &str, program: &mut FuzzProgram) -> eyre::Result<()> { + for line in buf.lines() { + let mut de = Deserializer::new(line, Some(program)); + if de.strip_token("") { + de.trim_start(); + let rng_state = crate::RngState::deserialize(&mut de)?; + program.rng = Some(rng_state); + } else if de.strip_token("") { + de.trim_start(); + program.mutate_flag = de.parse_number()?; + } else if de.strip_token("") { + de.trim_start(); + program.ops = Vec::::deserialize(&mut de)?; + } + } + Ok(()) +} + +/// Read value based on its type +pub fn read_value( + de: &mut Deserializer, + ty: &str, + state: &mut ObjectState, +) -> eyre::Result { + // case for vec + if let Some(v_ty) = ty.strip_prefix("alloc::vec::Vec<") { + let ty = v_ty + .strip_suffix('>') + .ok_or_else(|| eyre::eyre!("should end with `>`"))?; + global_gadgets::get_instance() + .get_object_builder(ty)? + .deserialize_vec(de, state) + } else { + global_gadgets::get_instance() + .get_object_builder(ty)? + .deserialize(de, state) + } +} + +#[test] +fn test_program_serde() { + fn serialize_and_deserialize(target: &'static str) { + crate::config::get_config_mut().func_target = Some(target); + let program = FuzzProgram::generate_program(None, false).unwrap(); + let output = program.serialize().unwrap(); + println!("program {target}"); + println!("{}", &output); + + let read_back = read_program(&output, false).unwrap(); + let output2 = read_back.serialize().unwrap(); + println!("read back: {target}"); + println!("{output2}"); + assert_eq!(output, output2); + } + + for _ in 0..16 { + serialize_and_deserialize("func_add"); + serialize_and_deserialize("func_create"); + serialize_and_deserialize("func_use"); + serialize_and_deserialize("func_struct"); + } +} + +#[test] +fn test_freed_serde() { + let mut p = FuzzProgram::default(); + p.append_stmt(FuzzStmt::Assert(AssertStmt::default().into())); + p.append_stmt(FuzzStmt::Assert(AssertStmt::default().into())); + let last = p.append_stmt(FuzzStmt::Assert(AssertStmt::default().into())); + p.stmts[0].freed = Some(last.downgrade()); + let output = p.serialize().unwrap(); + println!("{output}"); + let read_back = read_program(&output, false).unwrap(); + let output2 = read_back.serialize().unwrap(); + assert_eq!(output, output2); +} diff --git a/hopper-core/src/runtime/stmt/assert.rs b/hopper-core/src/runtime/stmt/assert.rs new file mode 100644 index 0000000..9c1aa70 --- /dev/null +++ b/hopper-core/src/runtime/stmt/assert.rs @@ -0,0 +1,245 @@ +//! Assert statement +//! Used for assert some attributes +//! format: assset target + +use eyre::ContextCompat; +use hopper_derive::Serde; + +use crate::{feedback::ResourceStates, runtime::*, utils}; + +#[derive(Debug, Clone, Serde)] +pub enum AssertRule { + /// Assert nothing + None, + /// Check pointer non-null + NonNull { stmt: WeakStmtIndex }, + /// Initialized + Initialized { + stmt: WeakStmtIndex, + call: WeakStmtIndex, + }, + Eq { + stmt: WeakStmtIndex, + expected: StmtIndex, + }, + Neq { + stmt: WeakStmtIndex, + expected: StmtIndex, + } +} + +#[derive(Debug, Default)] +pub struct AssertStmt { + pub rule: AssertRule, +} + +impl Default for AssertRule { + fn default() -> Self { + Self::None + } +} + +impl AssertStmt { + pub fn assert_non_null(stmt: StmtIndex) -> Self { + let stmt = stmt.downgrade(); + Self { + rule: AssertRule::NonNull { stmt }, + } + } + pub fn assert_initialized(stmt: StmtIndex, call: StmtIndex) -> Self { + let stmt = stmt.downgrade(); + let call = call.downgrade(); + Self { + rule: AssertRule::Initialized { stmt, call }, + } + } + pub fn assert_eq(stmt: StmtIndex, expected: StmtIndex) -> Self { + Self { + rule: AssertRule::Eq { stmt: stmt.downgrade(), expected }, + } + } + pub fn assert_neq(stmt: StmtIndex, expected: StmtIndex) -> Self { + Self { + rule: AssertRule::Neq { stmt: stmt.downgrade(), expected }, + } + } + pub fn get_stmt(&self) -> Option<&WeakStmtIndex> { + match &self.rule { + AssertRule::NonNull { stmt } => Some(stmt), + AssertRule::Eq { stmt, expected: _ } => Some(stmt), + AssertRule::Neq { stmt, expected: _ } => Some(stmt), + _ => None + } + } +} + +impl StmtView for AssertStmt { + const KEYWORD: &'static str = "assert"; + + fn eval( + &mut self, + used_stmts: &mut [IndexedStmt], + _resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + match &self.rule { + AssertRule::NonNull { stmt } => { + let index = stmt.get(); + if let FuzzStmt::Call(call) = &used_stmts[index].stmt { + if let Some(val) = &call.ret { + let type_name = val.type_name(); + if utils::is_pointer_type(type_name) { + let ptr = val.get_ptr_by_keys(&[FieldKey::Pointer])?; + if ptr.is_null() || (ptr as usize) < 0x30_0000usize { + eyre::bail!(crate::HopperError::AssertError{ + msg: format!( + "assert non-null failure for {} at {index}, return address: {ptr:?}", + call.fg.f_name + ), + silent: true}); + } + } + } + } + } + AssertRule::Initialized { stmt, call: _ } => { + let index = stmt.get(); + if let FuzzStmt::Load(load) = &used_stmts[index].stmt { + let ty = load.state.ty; + let ptr = if utils::is_vec_type(ty) { + load.value.get_ptr_by_keys(&[FieldKey::Index(0), FieldKey::Pointer])? + } else { + load.value.get_ptr_by_keys(&[FieldKey::Pointer])? + }; + if ptr.is_null() { + eyre::bail!(crate::HopperError::AssertError { + msg: format!( + "assert initialized failure for {} at {index}", + load.value.type_name() + ), + silent: true + }); + } + } + } + AssertRule::Eq { stmt, expected } => { + let index = stmt.get(); + let expected = expected.get(); + if let FuzzStmt::Call(call) = &used_stmts[index].stmt { + if let Some(val) = &call.ret { + let expected_val = match &used_stmts[expected].stmt { + FuzzStmt::Call(call) => { + call.ret.as_ref().context("call should return value")? + } + FuzzStmt::Load(load) => &load.value, + _ => { + eyre::bail!("expected statement should be call or load.") + } + }; + eyre::ensure!( + val.type_id() == expected_val.type_id(), + "the compare values should have the same types" + ); + let val_str = val.serialize()?; + let expected_str = expected_val.serialize()?; + if val_str != expected_str { + eyre::bail!(crate::HopperError::AssertError { + msg: format!( + "assert equal but {val_str} != {expected_str}", + ), + silent: false + }); + } + } + } + } + AssertRule::Neq { stmt, expected } => { + let index = stmt.get(); + let expected = expected.get(); + if let FuzzStmt::Call(call) = &used_stmts[index].stmt { + if let Some(val) = &call.ret { + let expected_val = match &used_stmts[expected].stmt { + FuzzStmt::Call(call) => { + call.ret.as_ref().context("call should return value")? + } + FuzzStmt::Load(load) => &load.value, + _ => { + eyre::bail!("expected statement should be call or load.") + } + }; + eyre::ensure!( + val.type_id() == expected_val.type_id(), + "the compare values should have the same types" + ); + let val_str = val.serialize()?; + let expected_str = expected_val.serialize()?; + if val_str == expected_str { + eyre::bail!(crate::HopperError::AssertError { + msg: format!( + "assert not equal but {val_str} == {expected_str}", + ), + silent: false + }); + } + } + } + } + AssertRule::None => {} + } + Ok(()) + } + + fn get_value(&self) -> Option<&FuzzObject> { + None + } +} + +impl CloneProgram for AssertStmt { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + Self { + rule: self.rule.clone_with_program(program), + } + } +} + +impl CloneProgram for AssertRule { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + match self { + AssertRule::NonNull { stmt } => AssertRule::NonNull { + stmt: stmt.clone_with_program(program), + }, + AssertRule::Initialized { stmt, call } => AssertRule::Initialized { + stmt: stmt.clone_with_program(program), + call: call.clone_with_program(program), + }, + AssertRule::Eq { stmt, expected } => AssertRule::Eq { + stmt: stmt.clone_with_program(program), + expected: expected.clone_with_program(program), + }, + AssertRule::Neq { stmt, expected } => AssertRule::Neq { + stmt: stmt.clone_with_program(program), + expected: expected.clone_with_program(program), + }, + _ => self.clone(), + } + } +} + +impl From for FuzzStmt { + fn from(stmt: AssertStmt) -> Self { + FuzzStmt::Assert(Box::new(stmt)) + } +} + +impl Serialize for AssertStmt { + fn serialize(&self) -> eyre::Result { + Ok(format!("{} {}", Self::KEYWORD, self.rule.serialize()?,)) + } +} + +impl Deserialize for AssertStmt { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + // de.strip_token(Self::KEYWORD); + let rule = AssertRule::deserialize(de)?; + Ok(Self { rule }) + } +} diff --git a/hopper-core/src/runtime/stmt/call.rs b/hopper-core/src/runtime/stmt/call.rs new file mode 100644 index 0000000..3936938 --- /dev/null +++ b/hopper-core/src/runtime/stmt/call.rs @@ -0,0 +1,435 @@ +//! Call statement +//! format: call fn_name(args) + +use std::{cell::RefCell, rc::Rc}; + +use super::*; +use crate::{feedback, global_gadgets, runtime::*, utils}; + +// det_index: better to be independent of program? +#[derive(Debug)] +pub struct CallStmt { + /// ident name + pub ident: String, + /// function name + pub name: String, + /// function gadget + pub fg: FnGadget, + /// arguments + pub args: Vec, + /// return object + pub ret: Option, + /// ir of return object + pub ret_ir: Vec, + /// affect by relative or implicit calls + pub contexts: Vec, + /// track its coverage or not + pub track_cov: bool, + /// failure: crash or hang at this call + pub failure: bool, + /// Index for deterministic mutation + pub det_index: Rc>, +} + +#[derive(Debug)] +pub struct CallRetIR { + pub fields: LocFields, + pub value: FuzzObject, + pub state: Box, + pub used: Option, +} + +impl StmtView for CallStmt { + const KEYWORD: &'static str = "call"; + + fn get_value(&self) -> Option<&FuzzObject> { + self.ret.as_ref() + } + + fn eval( + &mut self, + used_stmts: &mut [IndexedStmt], + resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + crate::log!( + trace, + "call {} with args: {}", + self.name, + self.args.serialize()? + ); + let p_start_at = std::time::Instant::now(); + resource_states.check_arguments(&self.args, used_stmts)?; + let check_secs = p_start_at.elapsed().as_micros(); + crate::log!(trace, "check arg time: {} micro seconds", check_secs); + let arguments: Vec<&FuzzObject> = self + .args + .iter() + .map(|i| i.get_stmt_value(used_stmts).unwrap()) + .collect(); + crate::log!(trace, "arguments: {:?}", arguments); + // enable track coverage for this function call or not + #[cfg(all(feature = "e9_mode", not(test)))] + if self.track_cov { + feedback::enable_coverage_feedback(); + if crate::config::get_api_sensitive_cov() { + let context = hash_context(self.fg.f_name); + // crate::log!(trace, "context: {context}"); + feedback::set_coverage_context(context); + } else { + feedback::set_coverage_context(0); + } + } else { + feedback::disable_coverage_feedback(); + } + let ret = self.fg.f.eval(&arguments); + let eval_secs = p_start_at.elapsed().as_micros() - check_secs; + crate::log!(trace, "eval time: {} micro seconds", eval_secs); + feedback::disable_coverage_feedback(); + crate::log!(trace, "return ({}): {:?}", ret.type_name(), ret); + self.ret = Some(ret); + #[cfg(all(feature = "e9_mode", not(test)))] + resource_states.update_pointers_after_call()?; + let update_secs = p_start_at.elapsed().as_micros() - eval_secs; + crate::log!(trace, "update time: {} micro seconds", update_secs); + Ok(()) + } +} + +impl CallStmt { + pub fn new(ident: String, name: String, fg: FnGadget) -> Self { + Self { + ident, + name, + fg, + args: vec![], + ret: None, + ret_ir: vec![], + contexts: vec![], + track_cov: false, + failure: false, + det_index: Rc::new(RefCell::new(0)), + } + } + + /// Set argument + pub fn set_arg(&mut self, arg_pos: usize, stmt: StmtIndex) { + if self.args.len() <= arg_pos { + self.args.push(stmt) + } else { + self.args[arg_pos] = stmt; + } + } + + /// If previous call contains any context or not + pub fn has_any_context(&self, program: &FuzzProgram, f_name: &str) -> bool { + self.has_implicit_context(program, f_name) || self.has_relative_context(program, f_name) + } + + /// If previous call contains relative context or not + pub fn has_relative_context(&self, program: &FuzzProgram, f_name: &str) -> bool { + self.args + .iter() + .any(|arg_stmt| Self::has_relative_context_for_stmt(program, f_name, arg_stmt)) + } + + /// If previous call contains relative context for specific statement or not + pub fn has_relative_context_for_stmt( + program: &FuzzProgram, + f_name: &str, + arg_stmt: &StmtIndex, + ) -> bool { + for is in program.stmts.iter() { + if is.stmt.is_stub() { + break; + } + if let FuzzStmt::Call(call) = &is.stmt { + if call.fg.f_name == f_name + && call.is_relative() + && call.is_related_call_for_stmt(arg_stmt, program) + { + return true; + } + } + } + false + } + + /// Check if `self` is `call`'s relative call, find the overlap arg + pub fn has_overlop_arg(&self, program: &FuzzProgram, call: &CallStmt) -> Option { + self.args.iter().position(|arg_stmt| call.is_related_call_for_stmt(arg_stmt, program)) + } + + /// If the call contains implicit context or not + pub fn has_implicit_context(&self, program: &FuzzProgram, f_name: &str) -> bool { + for ctx in &self.contexts { + if let Some(is) = program.get_stmt_by_index_uniq(ctx) { + if let FuzzStmt::Call(call) = &is.stmt { + if call.fg.f_name == f_name { + crate::log!(trace, "call has context"); + return true; + } + } else { + // eyre::bail!(format!("stmt is not a call for implicit context: {ctx:?}")); + } + } + } + false + } + + /// Is this call is relative for a specific statement, + /// We search the call's arguments recursively. + pub fn is_related_call_for_stmt(&self, stmt: &StmtIndex, program: &FuzzProgram) -> bool { + if self.args.contains(stmt) { + return true; + } + let mut relative_indices: Vec = self.args.iter().map(|i| i.use_index()).collect(); + while let Some(i) = relative_indices.pop() { + if let Some(is) = program.get_stmt_by_index_uniq(&i) { + if let FuzzStmt::Load(load) = &is.stmt { + let cb = |index: &StmtIndex| { + relative_indices.push(index.use_index()); + stmt == index + }; + if load.state.find_any_stmt_in_state_with(cb) { + return true; + } + } + } + } + false + } + + /// Is this call is relative for a specific statement, which uses a pointer to the statement, + /// Only check the pointee itself, we do not check recursively in this function. + pub fn is_related_call_for_ptee(&self, ptee_stmt: usize, program: &FuzzProgram) -> bool { + for cur_arg in self.args.iter() { + if let Some(is) = program.get_stmt_by_index_uniq(cur_arg) { + if let FuzzStmt::Load(load) = &is.stmt { + if let Some(dst_index) = load.state.get_pointer_stmt_index() { + if dst_index.get() == ptee_stmt { + return true; + } + } + } else { + // is not a load + } + } + } + false + } + + /// If current arguments has one that been used multiple times + pub fn has_reused_args(&self, program: &FuzzProgram) -> Option { + for (i, arg_index) in self.args.iter().enumerate() { + if arg_index.get_ref_used() > 2 { + // crate::log!(trace, "arg_i {i} has >2 ref"); + return Some(i); + } + if let Some(is) = program.get_stmt_by_index_uniq(arg_index) { + if let FuzzStmt::Load(load) = &is.stmt { + //load.state.find_any_stmt_in_state_with(|ptee| ptee.get_ref_used() > 2) + if let Some(dst_index) = load.state.get_pointer_stmt_index() { + if dst_index.get_ref_used() > 2 { + // crate::log!(trace, "{} in arg_i {i} has >2 ref", dst_index.get()); + return Some(i); + } + } + } + } + } + None + } + + /// If it is a function for init opaque pointer + pub fn is_init_opaque_ptr_func(&self) -> bool { + if !self.is_relative() { + return false; + } + for i in 0..self.args.len() { + if let Some(inner) = utils::get_pointer_inner(self.fg.arg_types[i]) { + if utils::is_opaque_pointer(inner) { + return true; + } + } + } + false + } + + /// target call + pub const TARGET: &'static str = "$target"; + /// relative call + pub const RELATIVE: &'static str = "$relative"; + /// implicit call + pub const IMPLICIT: &'static str = "$implicit"; + + /// Is target call + pub fn is_target(&self) -> bool { + self.ident == Self::TARGET + } + + /// Is relative call + pub fn is_relative(&self) -> bool { + self.ident == Self::RELATIVE + } + + /// Is implicit call + pub fn is_implicit(&self) -> bool { + self.ident == Self::IMPLICIT + } + + /// Call that can be leaf node + pub fn is_leaf(&self) -> bool { + self.is_target() || self.is_relative() || self.is_implicit() + } +} + +impl CloneProgram for CallStmt { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + Self { + ident: self.ident.clone(), + name: self.name.clone(), + fg: self.fg.clone(), + args: self.args.clone_with_program(program), + ret: self.ret.clone(), + ret_ir: self + .ret_ir + .iter() + .map(|c| c.clone_with_program(program)) + .collect(), + contexts: self.contexts.clone_with_program(program), + track_cov: self.track_cov, + failure: self.failure, + det_index: self.det_index.clone(), + } + } +} + +impl CloneProgram for CallRetIR { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + Self { + fields: self.fields.clone(), + value: self.value.clone(), + state: self.state.clone_with_program(program), + used: self.used.clone_with_program(program), + } + } +} + +impl CloneProgram for Vec { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + self.iter().map(|i| i.clone_with_program(program)).collect() + } +} + +impl From for FuzzStmt { + fn from(stmt: CallStmt) -> Self { + FuzzStmt::Call(Box::new(stmt)) + } +} + +impl Serialize for CallStmt { + fn serialize(&self) -> eyre::Result { + let args = serialize_args(&self.args)?; + let track_sym = option_sym(self.track_cov, "? "); + let failure_sym = option_sym(self.failure, "! "); + let mut implicit_calls = String::new(); + if !self.contexts.is_empty() { + implicit_calls.push_str("<- "); + implicit_calls.push_str(&serialize_args(&self.contexts)?); + } + Ok(format!( + "{} {}: {} {}{}{} {}", + Self::KEYWORD, + self.ident, + self.name, + track_sym, + failure_sym, + args, + implicit_calls + )) + } +} + +impl Deserialize for CallStmt { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + // de.strip_token(Self::KEYWORD); + de.trim_start(); + let ident = de.next_token_until(":")?; + let fn_name = de.next_token_until(" ")?; + let track_sym = parse_sym(de, "?"); + let failure_sym = parse_sym(de, "!"); + let args = deserialize_args(de)?; + let mut implicit_calls = vec![]; + if de.strip_token("<-") { + de.trim_start(); + implicit_calls = deserialize_args(de)?; + } + let fg = global_gadgets::get_instance() + .get_func_gadget(fn_name)? + .clone(); + Ok(CallStmt { + ident: ident.to_string(), + name: fn_name.to_string(), + fg, + args, + ret: None, + ret_ir: vec![], + contexts: implicit_calls, + track_cov: track_sym, + failure: failure_sym, + det_index: Rc::new(RefCell::new(0)), + }) + } +} + +fn option_sym(cond: bool, sym: &str) -> &str { + if cond { + sym + } else { + "" + } +} + +fn parse_sym(de: &mut Deserializer, sym: &str) -> bool { + let mut has_sym = false; + if de.strip_token(sym) { + has_sym = true; + } + de.trim_start(); + has_sym +} + +fn serialize_args(args: &[StmtIndex]) -> eyre::Result { + let mut content = String::new(); + content.push('('); + for i in args { + content.push_str(&i.serialize()?); + content.push_str(", "); + } + content.push(')'); + Ok(content) +} + +fn deserialize_args(de: &mut Deserializer) -> eyre::Result> { + let mut args = vec![]; + de.eat_token("(")?; + loop { + if de.strip_token(")") { + break; + } + let stmt_index = StmtIndex::deserialize(de)?; + de.eat_token(",")?; + args.push(stmt_index); + } + de.trim_start(); + Ok(args) +} + +#[cfg(all(feature = "e9_mode", not(test)))] +fn hash_context(f_name: &str) -> u32 { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + let mut hasher = DefaultHasher::new(); + f_name.hash(&mut hasher); + (hasher.finish() & (crate::config::BRANCHES_SIZE as u64 - 1)) as u32 +} diff --git a/hopper-core/src/runtime/stmt/file.rs b/hopper-core/src/runtime/stmt/file.rs new file mode 100644 index 0000000..d2ecb9d --- /dev/null +++ b/hopper-core/src/runtime/stmt/file.rs @@ -0,0 +1,209 @@ +//! Helper statement +//! provide some built-in usages, e.g. crate a file with buffer + +use std::{ffi::CString, io::Write}; + +use eyre::ContextCompat; + +use super::*; +use crate::runtime::*; + +#[derive(Debug)] +pub struct FileStmt { + pub ident: String, + pub file: Option, + pub buf_stmt: Option, + pub is_mut: bool, + pub is_fd: bool, + // pub name: Option, +} + +impl FileStmt { + pub fn new(ident: &str, is_mut: bool, is_fd: bool) -> Self { + Self { + ident: ident.to_string(), + file: None, + buf_stmt: None, + is_mut, + is_fd, + // name: None, + } + } + + pub fn set_buf_index(&mut self, index: StmtIndex) { + self.buf_stmt = Some(index); + } + + pub fn get_file_name(&self) -> String { + // if let Some(name) = &self.name { + // return name.to_string(); + // } + let f = format!("file_{}", &self.ident); + if let Some(i) = &self.buf_stmt { + return format!("{}_{}", f, i.get()); + } + f + } +} + +#[repr(transparent)] +#[derive(Debug, Default, Clone)] +pub struct FileFd(libc::c_int); + +impl FileFd { + pub fn new(file_name: *const i8, _index: usize) -> Self { + #![allow(clippy::not_unsafe_ptr_arg_deref)] + #[cfg(target_family = "unix")] + unsafe { + let fd = libc::open(file_name, libc::O_RDWR | libc::O_APPEND); + Self(fd) + } + #[cfg(target_os = "windows")] + unimplemented!("should use HANDLE in windows") + } + pub fn inner(&self) -> i32 { + self.0 + } +} + +impl Drop for FileFd { + fn drop(&mut self) { + if self.0 > 0 { + #[cfg(target_family = "unix")] + unsafe { + libc::close(self.0); + } + } + } +} + +impl StmtView for FileStmt { + const KEYWORD: &'static str = "file"; + + fn eval( + &mut self, + used_stmts: &mut [IndexedStmt], + _resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + let temp_file = crate::config::tmp_file_path(&self.get_file_name()); + crate::log!(trace, "crate file: {:?}", temp_file); + if let Some(index) = &self.buf_stmt { + let stmt = &used_stmts[index.get()].stmt; + let mut f = std::fs::File::create(&temp_file)?; + if let Some(v) = stmt.get_value() { + if crate::config::USE_CANARY { + let buf = v + .downcast_ref::>() + .context("downcast buf")?; + f.write_all(buf.as_slice())?; + } else { + let buf = v.downcast_ref::>().context("downcast buf")?; + f.write_all(buf.as_slice())?; + }; + } + } + let file_name = CString::new(temp_file.to_string_lossy().to_string())?; + let ptr = file_name.into_raw(); + crate::log!(trace, "file name ptr: {:?}", ptr); + if self.is_fd { + self.file = Some(Box::new(FileFd::new(ptr, used_stmts.len()))); + } else if self.is_mut { + self.file = Some(Box::new(FuzzMutPointer::new(ptr)) as Box); + } else { + self.file = Some(Box::new(FuzzConstPointer::new(ptr)) as Box); + } + Ok(()) + } + + fn get_value(&self) -> Option<&FuzzObject> { + self.file.as_ref() + } +} + +impl CloneProgram for FileStmt { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + let buf_stmt = self.buf_stmt.clone_with_program(program); + Self { + ident: self.ident.clone(), + file: None, + buf_stmt, + is_mut: self.is_mut, + is_fd: self.is_fd, + // name: self.name.clone(), + } + } +} + +impl From for FuzzStmt { + fn from(stmt: FileStmt) -> Self { + FuzzStmt::File(Box::new(stmt)) + } +} + +impl Serialize for FileStmt { + fn serialize(&self) -> eyre::Result { + let mut extra_sym = ""; + if self.is_fd { + extra_sym = "fd "; + } else if self.is_mut { + extra_sym = "mut "; + } + Ok(format!( + "{} {}: {}{}", + Self::KEYWORD, + self.ident, + extra_sym, + self.buf_stmt.serialize()? + )) + } +} + +impl Deserialize for FileStmt { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + // de.strip_token(Self::KEYWORD); + let ident = de.next_token_until(":")?; + let mut is_mut = false; + let mut is_fd = false; + if de.strip_token("mut ") { + is_mut = true; + } + if de.strip_token("fd ") { + is_fd = true; + } + let buf_stmt = Option::::deserialize(de)?; + let mut file_stmt = Self::new(ident, is_mut, is_fd); + file_stmt.buf_stmt = buf_stmt; + /* + if de.strip_token(",") { + de.trim_start(); + let name = de.buf.to_string(); + file_stmt.name = Some(name); + } + */ + Ok(file_stmt) + } +} + +/// Just some code to make FileFd to be Fuzzable +impl ObjFuzzable for FileFd {} +impl ObjValue for FileFd {} +impl ObjType for FileFd {} +impl ObjectTranslate for FileFd { + fn translate_obj_to_c( + &self, + _state: &ObjectState, + _program: &FuzzProgram, + ) -> eyre::Result { + unreachable!("can not translate fd"); + } +} +impl Serialize for FileFd { + fn serialize(&self) -> eyre::Result { + unreachable!("can not serialize fd"); + } +} +impl ObjectSerialize for FileFd { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + unreachable!("can not serialize fd"); + } +} diff --git a/hopper-core/src/runtime/stmt/index.rs b/hopper-core/src/runtime/stmt/index.rs new file mode 100644 index 0000000..2f6e7cf --- /dev/null +++ b/hopper-core/src/runtime/stmt/index.rs @@ -0,0 +1,267 @@ +//! Index of statements + +use std::{ + cell::Cell, + fmt, + rc::{Rc, Weak}, +}; + +use eyre::ContextCompat; + +use crate::runtime::*; + +/// Index with reference counting +pub trait RcIndex { + /// Get the index + fn get(&self) -> usize; + /// Get its unique ID + fn get_uniq(&self) -> u64; + /// Get reference counting + fn get_ref_used(&self) -> usize; +} + +/// Index of statements +#[derive(Debug, Clone)] +pub struct StmtIndex(Rc>); + +/// non-owning reference pointer for StmtIndex +#[derive(Debug, Clone)] +pub struct WeakStmtIndex(Weak>); + +impl RcIndex for StmtIndex { + fn get(&self) -> usize { + self.0.get().0 + } + fn get_uniq(&self) -> u64 { + self.0.get().1 + } + fn get_ref_used(&self) -> usize { + Rc::strong_count(&self.0) + } +} + +impl RcIndex for WeakStmtIndex { + fn get(&self) -> usize { + let holder = self.0.upgrade().unwrap(); + holder.get().0 + } + fn get_uniq(&self) -> u64 { + let holder = self.0.upgrade().unwrap(); + holder.get().1 + } + fn get_ref_used(&self) -> usize { + self.0.strong_count() + } +} + +impl StmtIndex { + pub fn new(index: usize) -> Self { + let uniq: u64 = crate::fuzz::gen(); + Self(Rc::new(Cell::new((index, uniq)))) + } + + pub fn dup(&self) -> Self { + Self(Rc::new(Cell::new((self.get(), self.get_uniq())))) + } + + /// Update the index with new value + pub fn set(&self, index: usize) { + let mut val = self.0.get(); + val.0 = index; + self.0.replace(val); + } + + /// Update the uniq with new value + pub fn set_uniq(&mut self, uniq: u64) { + let mut val = self.0.get(); + val.1 = uniq; + self.0.replace(val); + } + + /// Reset uniq + pub fn reset_uniq(&mut self) { + let uniq: u64 = crate::fuzz::gen(); + self.set_uniq(uniq); + } + + /// Pass the index to other statment + /// thus we can counting how many statement using it, + /// or mutate the index from its original place. + pub fn use_index(&self) -> StmtIndex { + // clone rc + Self(self.0.clone()) + } + + /// Downgrade to WeakStmtIndex + pub fn downgrade(&self) -> WeakStmtIndex { + WeakStmtIndex(Rc::downgrade(&self.0)) + } + + /// Get value at the index of statement list + pub fn get_stmt_value<'b>(&self, stmts: &'b [IndexedStmt]) -> Option<&'b FuzzObject> { + let index = self.get(); + if let Some(indexed_stmt) = stmts.get(index) { + return indexed_stmt.stmt.get_value(); + } + None + } + + /// Get type of statment at the index of statement list + pub fn get_stmt_type(&self, stmts: &[IndexedStmt]) -> Option<&'static str> { + let index = self.get(); + if let Some(indexed_stmt) = stmts.get(index) { + let ty = indexed_stmt.stmt.get_type(); + return Some(ty); + } + None + } +} + +impl WeakStmtIndex { + pub fn upgrade(&self) -> eyre::Result { + let inner = self.0.upgrade().context("can upgrade")?; + Ok(StmtIndex(inner)) + } + + pub fn is_released(&self) -> bool { + self.0.strong_count() == 0 + } +} + +macro_rules! impl_stmt_index { + ($ty:ident) => { + impl fmt::Display for $ty { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "<{}>", self.get()) + } + } + impl PartialEq for $ty { + fn eq(&self, other: &Self) -> bool { + self.get() == other.get() && self.get_uniq() == other.get_uniq() + } + } + + impl ObjectSerialize for $ty { + fn serialize_obj(&self, _state: &ObjectState) -> eyre::Result { + self.serialize() + } + } + + impl ObjectDeserialize for $ty { + fn deserialize_obj( + de: &mut Deserializer, + _state: &mut ObjectState, + ) -> eyre::Result { + Self::deserialize(de) + } + } + }; +} + +impl Serialize for StmtIndex { + fn serialize(&self) -> eyre::Result { + Ok(format!("<{}>", self.get())) + } +} + +impl Serialize for WeakStmtIndex { + fn serialize(&self) -> eyre::Result { + eyre::ensure!(!self.is_released(), "fail to serialize released index"); + Ok(format!("<{}>", self.get())) + } +} + +impl_stmt_index!(StmtIndex); +impl_stmt_index!(WeakStmtIndex); + +impl CloneProgram for Option { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + self.as_ref().map(|v| v.clone_with_program(program)) + } +} + +impl CloneProgram for Option { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + if let Some(index) = self.as_ref() { + if index.is_released() { + return None; + } + return Some(index.clone_with_program(program)); + } + None + } +} + +impl CloneProgram for StmtIndex { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + // if index is in program + if let Some(is) = program.get_stmt_by_index_uniq(self) { + return is.index.use_index(); + } + // if index is in tmp_indices + let index_uniq = self.get_uniq(); + let index = program + .tmp_indices + .iter() + .find(|i| i.get_uniq() == index_uniq); + if let Some(stmt_index) = index { + return stmt_index.use_index(); + } + // create new one + let stmt_index = self.dup(); + program.tmp_indices.push(stmt_index.use_index()); + stmt_index + } +} + +impl CloneProgram for WeakStmtIndex { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + self.upgrade() + .unwrap() + .clone_with_program(program) + .downgrade() + } +} + +impl Deserialize for StmtIndex { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + de.eat_token("<")?; + let index: usize = de.parse_next_until(">")?; + let p = de.program.as_mut().context("deserializer need program")?; + if index < p.stmts.len() { + let stmt = p.stmts.get(index).ok_or_else(|| { + eyre::eyre!("stmt index `{}` is out of bound `{}`", index, p.stmts.len()) + })?; + let use_index = stmt.index.use_index(); + eyre::ensure!( + use_index.get() == index, + "want index {}, but get {:?}", + index, + stmt + ); + Ok(use_index) + } else if let Some(stmt_index) = p.tmp_indices.iter().find(|i| i.get() == index) { + Ok(stmt_index.use_index()) + } else { + let stmt_index = StmtIndex::new(index); + p.tmp_indices.push(stmt_index.use_index()); + Ok(stmt_index) + } + } +} + +impl Deserialize for WeakStmtIndex { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + let index = StmtIndex::deserialize(de)?; + Ok(index.downgrade()) + } +} + +#[test] +fn test_check_used() { + let index = StmtIndex::new(1); + assert!(index.get_ref_used() == 1); + let index2 = index.use_index(); + assert!(index.get_ref_used() == 2); + assert!(index2.get_ref_used() == 2); +} diff --git a/hopper-core/src/runtime/stmt/load.rs b/hopper-core/src/runtime/stmt/load.rs new file mode 100644 index 0000000..c15bcfe --- /dev/null +++ b/hopper-core/src/runtime/stmt/load.rs @@ -0,0 +1,189 @@ +//! Load variable or constant +//! Load an object from memory which stored in the value field +//! format: load type: ident = value, + +use eyre::{Context, ContextCompat}; + +use super::*; +use crate::{runtime::*, utils}; + +#[derive(Debug)] +pub struct LoadStmt { + /// Value stored in memory + pub value: FuzzObject, + /// State of the value + pub state: Box, + // is const or not + pub is_const: bool, +} + +impl StmtView for LoadStmt { + const KEYWORD: &'static str = "load"; + + fn get_value(&self) -> Option<&FuzzObject> { + Some(&self.value) + } + + fn eval( + &mut self, + used_stmts: &mut [IndexedStmt], + resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + crate::log!( + trace, + "load `{}` at addr {:?}", + self.value.type_name(), + self.value.get_ptr_by_keys(&[]) + ); + let size = self.value.get_length(); + let ptr = self.value.get_ptr_by_keys(&[])?; + resource_states.insert_ptr_size(ptr, size); + Self::fill_pointer(&mut self.value, &self.state, used_stmts, resource_states) + } +} + +impl LoadStmt { + pub fn new(value: FuzzObject, state: Box) -> Self { + Self { + value, + state, + is_const: false, + } + } + + pub fn new_const(value: FuzzObject, state: Box) -> Self { + Self { + value, + state, + is_const: true, + } + } + + pub fn new_state(ident: T, ty: &str) -> Box { + Box::new(ObjectState::root(ident, utils::get_static_ty(ty))) + } + + pub fn get_ident(&self) -> &str { + self.state.key.as_str().unwrap() + } + + /// Fill pointer address + /// + /// Address of pointer will be changed during move\ clone objects, + /// so we assign them before executing. + fn fill_pointer( + load_obj: &mut FuzzObject, + state: &ObjectState, + used_stmts: &[IndexedStmt], + resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + if let Some(ps) = &state.pointer { + let loc = &ps.pointer_location; + if loc.is_null() { + return Ok(()); + } + // address of this statement + let fields = state.get_location_fields(); + let dst_ptr = load_obj + .get_ptr_by_keys(fields.as_slice()) + .with_context(|| format!("dst ptr fields: {}", fields.serialize().unwrap()))? + as *mut *mut u8; + // it is non-null + if let Some(stmt_index) = &loc.stmt_index { + // the address where the stmt pointer directing + let src_obj = stmt_index + .get_stmt_value(used_stmts) + .context("fail to find statement by index")?; + let src_ptr = src_obj + .get_ptr_by_keys(loc.fields.as_slice()) + .with_context(|| { + format!("src ptr fields: {}", loc.fields.serialize().unwrap()) + })?; + resource_states.check_pointer(src_ptr)?; + crate::log!( + trace, + "fill `{:?}` ({}) for field `{:?}`", + src_ptr, + loc.serialize()?, + fields + ); + // assign it + unsafe { *dst_ptr = src_ptr }; + } else { + // NULL + unsafe { *dst_ptr = std::ptr::null_mut::() }; + } + } + // optimize for primitive arrays + if let Some(st) = state.children.first() { + if let FieldKey::Index(_) = st.key { + if utils::is_primitive_type(st.ty) { + return Ok(()); + } + } + } + for st in &state.children { + Self::fill_pointer(load_obj, st, used_stmts, resource_states)?; + } + Ok(()) + } + + /// Check if it is a pointer that point-to freed resources. + pub fn point_to_freed_resource(&self, program: &FuzzProgram) -> bool { + if let Some(stmt_index) = self.state.get_pointer_stmt_index() { + return program.stmts[stmt_index.get()].freed.is_some(); + } + false + } +} + +impl CloneProgram for LoadStmt { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + Self::new(self.value.clone(), self.state.clone_with_program(program)) + } +} + +impl From for FuzzStmt { + fn from(stmt: LoadStmt) -> Self { + FuzzStmt::Load(Box::new(stmt)) + } +} + +impl Serialize for LoadStmt { + fn serialize(&self) -> eyre::Result { + if self.is_const { + Ok(format!( + "{} const {}: {} = {}", + Self::KEYWORD, + self.get_ident(), + self.value.type_name(), + self.value.serialize_obj(&self.state)? + )) + } else { + Ok(format!( + "{} {}: {} = {}", + Self::KEYWORD, + self.get_ident(), + self.value.type_name(), + self.value.serialize_obj(&self.state)? + )) + } + } +} + +impl Deserialize for LoadStmt { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + // de.strip_token(Self::KEYWORD); + let is_const = de.strip_token("const"); + de.trim_start(); + let ident = de.next_token_until(":")?; + let ty = de.next_token_until(" =")?; + let mut state = Self::new_state(ident, ty); + let value = read_value(de, ty, &mut state)?; + Ok(Self { + value, + state, + is_const, + }) + } +} \ No newline at end of file diff --git a/hopper-core/src/runtime/stmt/mod.rs b/hopper-core/src/runtime/stmt/mod.rs new file mode 100644 index 0000000..b163b8f --- /dev/null +++ b/hopper-core/src/runtime/stmt/mod.rs @@ -0,0 +1,235 @@ +mod assert; +mod call; +mod file; +mod index; +mod load; +mod update; + +pub use assert::*; +pub use call::*; +pub use file::*; +pub use index::*; +pub use load::*; +pub use update::*; + +use crate::{feedback::ResourceStates, runtime::*, CloneProgram}; + +/// Statements for fuzzing program +/// +#[derive(Debug)] +pub enum FuzzStmt { + /// Variable + Load(Box), + /// Function call + Call(Box), + /// Assert + Assert(Box), + /// Crate a File + File(Box), + /// Update return value + Update(Box), + /// Stub, do nothing + Stub, +} + +/// Define struct that can be viewed as statament +pub trait StmtView: Into + CloneProgram { + /// Keyword for serde + const KEYWORD: &'static str; + /// Get its value + fn get_value(&self) -> Option<&FuzzObject> { + None + } + /// Evaluate this statement + fn eval( + &mut self, + _used_stmts: &mut [IndexedStmt], + _resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + // do nothing + Ok(()) + } + /// Get its type + fn get_type(&self) -> &'static str { + Self::KEYWORD + } +} + +/// Statement with index +#[derive(Debug)] +pub struct IndexedStmt { + pub index: StmtIndex, + pub stmt: FuzzStmt, + pub freed: Option, +} + +impl IndexedStmt { + pub fn new(stmt: FuzzStmt, index: usize) -> Self { + Self { + index: StmtIndex::new(index), + stmt, + freed: None, + } + } +} + +impl CloneProgram for IndexedStmt { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + Self { + index: self.index.clone_with_program(program), + stmt: self.stmt.clone_with_program(program), + freed: self.freed.clone_with_program(program), + } + } +} + +impl Serialize for IndexedStmt { + fn serialize(&self) -> eyre::Result { + let mut out = String::new(); + out.push_str(&self.index.serialize()?); + out.push(' '); + if let Some(i) = &self.freed { + out.push('~'); + out.push_str(&i.serialize()?); + out.push(' '); + } + out.push_str(&self.stmt.serialize()?); + out.push('\n'); + Ok(out) + } +} + +impl Deserialize for IndexedStmt { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + let stmt_index = StmtIndex::deserialize(de)?; + let mut freed = None; + if de.strip_token("~") { + let index = WeakStmtIndex::deserialize(de)?; + freed = Some(index); + } + let stmt = FuzzStmt::deserialize(de)?; + let is = IndexedStmt { + index: stmt_index, + stmt, + freed, + }; + Ok(is) + } +} + +#[macro_export] +macro_rules! impl_stmt_match { + (@inner, $name:ident, $f:ident, $($ty:ident),* ) => { + match $name { + $( FuzzStmt::$ty(inner) => inner.$f(), )* + _ => { unreachable!() } + } + }; + (@inner, $name:ident, $f:ident($arg1:ident), $($ty:ident),* ) => { + match $name { + $( FuzzStmt::$ty(inner) => inner.$f($arg1), )* + _ => { unreachable!() } + } + }; + (@inner, $name:ident, $f:ident($arg1:ident).into(), $($ty:ident),* ) => { + match $name { + $( FuzzStmt::$ty(inner) => inner.$f($arg1).into(), )* + _ => { unreachable!() } + } + }; + (@inner, $name:ident, $f:ident($arg1:ident, $arg2:ident), $($ty:ident),* ) => { + match $name { + $( FuzzStmt::$ty(inner) => inner.$f($arg1, $arg2), )* + _ => { unreachable!() } + } + }; + (@inner, $name:ident, $f:ident($arg1:ident, $arg2:ident, $arg3:ident), $($ty:ident),* ) => { + match $name { + $( FuzzStmt::$ty(inner) => inner.$f($arg1, $arg2, $arg3), )* + _ => { unreachable!() } + } + }; + (@inner, $name:ident, $f:ident($arg1:ident, $arg2:ident, $arg3:ident, $arg4:ident), $($ty:ident),* ) => { + match $name { + $( FuzzStmt::$ty(inner) => inner.$f($arg1, $arg2, $arg3, $arg4), )* + _ => { unreachable!() } + } + }; + ($($t:tt)*) => { + impl_stmt_match!(@inner, $($t)*, Load, Update, Call, Assert, File) + }; +} + +impl FuzzStmt { + pub fn eval( + &mut self, + used_stmts: &mut [IndexedStmt], + resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + impl_stmt_match!(self, eval(used_stmts, resource_states)) + } + + pub fn get_value(&self) -> Option<&FuzzObject> { + impl_stmt_match!(self, get_value) + } + + pub fn get_type(&self) -> &'static str { + impl_stmt_match!(self, get_type) + } + + /// Replace itself with stub stmt + pub fn lend(&mut self) -> FuzzStmt { + let stub = FuzzStmt::Stub; + std::mem::replace(self, stub) + } + + /// Withdraw statement + pub fn withdraw(&mut self, stmt: FuzzStmt) { + let _ = std::mem::replace(self, stmt); + } + + pub fn is_stub(&self) -> bool { + matches!(self, FuzzStmt::Stub) + } + + pub fn is_load(&self) -> bool { + matches!(self, Self::Load(_)) + } + + pub fn is_call(&self) -> bool { + matches!(self, Self::Call(_)) + } +} + +impl CloneProgram for FuzzStmt { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + if matches!(self, FuzzStmt::Stub) { + return FuzzStmt::Stub; + } + impl_stmt_match!(self, clone_with_program(program).into()) + } +} + +impl Serialize for FuzzStmt { + fn serialize(&self) -> eyre::Result { + if self.is_stub() { + return Ok("stub".to_string()); + } + impl_stmt_match!(self, serialize) + } +} + +impl Deserialize for FuzzStmt { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + let stmt_type = de.next_token_until(" ")?; + let stmt = match stmt_type { + LoadStmt::KEYWORD => LoadStmt::deserialize(de)?.into(), + UpdateStmt::KEYWORD => UpdateStmt::deserialize(de)?.into(), + CallStmt::KEYWORD => CallStmt::deserialize(de)?.into(), + AssertStmt::KEYWORD => AssertStmt::deserialize(de)?.into(), + FileStmt::KEYWORD => FileStmt::deserialize(de)?.into(), + _ => eyre::bail!("Unknow statement type: `{}`", stmt_type), + }; + Ok(stmt) + } +} diff --git a/hopper-core/src/runtime/stmt/update.rs b/hopper-core/src/runtime/stmt/update.rs new file mode 100644 index 0000000..1ddd63c --- /dev/null +++ b/hopper-core/src/runtime/stmt/update.rs @@ -0,0 +1,195 @@ +//! Update statement +//! Update object that retuan from function calls + +use super::*; +use crate::runtime::*; + +/// Update: dst <= src +#[derive(Debug)] +pub struct UpdateStmt { + /// statement which we used for update + pub src: StmtIndex, + /// the target we will update + pub dst: WeakLocation, +} + +impl UpdateStmt { + pub fn new(src: StmtIndex, dst: WeakLocation) -> Self { + Self { src, dst } + } +} + +impl StmtView for UpdateStmt { + const KEYWORD: &'static str = "update"; + + fn eval( + &mut self, + used_stmts: &mut [IndexedStmt], + _resource_states: &mut ResourceStates, + ) -> eyre::Result<()> { + let (used_stmts, rest) = used_stmts.split_at_mut(self.src.get()); + let dst_i = self.dst.get_index()?.get(); + match &mut used_stmts[dst_i].stmt { + FuzzStmt::Call(dst_call) => { + if let Some(call_ret) = &mut dst_call.ret { + if let FuzzStmt::Load(src_load) = &mut rest[0].stmt { + self.fill_update_pointer(src_load, call_ret)?; + } + } + } + FuzzStmt::Load(load_stmt) => { + if let Some(src_value) = rest[0].stmt.get_value() { + let val = *u64::cast_from(src_value); + let op = crate::MutateOperation::IntSet { val: val.into() }; + load_stmt.value.mutate_by_op( + &mut load_stmt.state, + self.dst.fields.as_slice(), + &op, + )?; + } + } + _ => { + crate::log!(warn, "can not update : {:?}", self); + } + } + Ok(()) + } +} + +impl UpdateStmt { + /// update call ret's pointer from load statement + fn fill_update_pointer( + &mut self, + src_load: &mut LoadStmt, + call_ret: &FuzzObject, + ) -> eyre::Result<()> { + let src_ptr = src_load.value.get_ptr_by_keys(&[])?; + fill_stub_pointer( + &mut src_load.value, + &src_load.state, + call_ret, + self.dst.fields.as_slice(), + )?; + let (_, dst_fields) = self + .dst + .fields + .as_slice() + .split_last() + .ok_or_else(|| eyre::eyre!("remove last (Fieldkey::Pointer)"))?; + if let Ok(dst_ptr) = call_ret.get_ptr_by_keys(dst_fields) { + let dst_ptr = dst_ptr as *mut *mut u8; + crate::log!( + trace, + "update ({}) value in loc {:?} as : {:?}", + self.serialize()?, + dst_ptr, + src_ptr + ); + unsafe { *dst_ptr = src_ptr }; + return Ok(()); + } + Ok(()) + } +} + +/// Fill pointer with stub flag, which used in load object clone from call's return irs +fn fill_stub_pointer( + load_obj: &mut FuzzObject, + state: &ObjectState, + call_ret: &FuzzObject, + prefix: &[FieldKey], +) -> eyre::Result<()> { + if let Some(ps) = &state.pointer { + if ps.stub { + let mut sub_fields = state.get_location_fields().list; + crate::log!(trace, "fill stub: {:?}, prefix: {:?}", sub_fields, prefix); + let dst_ptr = load_obj.get_ptr_by_keys(sub_fields.as_slice())? as *mut *mut u8; + // crate::log!(trace, "dst_ptr: {:?}", dst_ptr); + let mut fields = prefix.to_vec(); + fields.append(&mut sub_fields); + fields.push(FieldKey::Pointer); + let src_ptr = call_ret.get_ptr_by_keys(fields.as_slice())?; + // crate::log!(trace, "src_ptr: {:?}", src_ptr); + unsafe { *dst_ptr = src_ptr }; + } + } + for st in &state.children { + fill_stub_pointer(load_obj, st, call_ret, prefix)?; + } + Ok(()) +} + +impl CloneProgram for UpdateStmt { + fn clone_with_program(&self, program: &mut FuzzProgram) -> Self { + let src = self.src.clone_with_program(program); + let dst = self.dst.clone_with_program(program); + Self::new(src, dst) + } +} + +impl From for FuzzStmt { + fn from(stmt: UpdateStmt) -> Self { + FuzzStmt::Update(Box::new(stmt)) + } +} + +impl Serialize for UpdateStmt { + fn serialize(&self) -> eyre::Result { + Ok(format!( + "{} {} <= {}", + Self::KEYWORD, + self.dst.serialize()?, + self.src.serialize()? + )) + } +} + +impl Deserialize for UpdateStmt { + fn deserialize(de: &mut Deserializer) -> eyre::Result { + // de.strip_token(Self::KEYWORD); + let dst = WeakLocation::deserialize(de)?; + de.eat_token("<=")?; + let src = StmtIndex::deserialize(de)?; + Ok(Self::new(src, dst)) + } +} + +#[test] +fn test_update_eval() { + use crate::test; + let mut program = FuzzProgram::default(); + let call = test::generate_call_stmt("create_test_ptr"); + let call_i = program.append_stmt(call); + let load_arr = test::generate_load_stmt::>("test", ""); + let load_ptr = load_arr.value.get_ptr_by_keys(&[]).unwrap(); + let load_arr_i = program.append_stmt(load_arr); + let arr_dst_fields = LocFields::new(vec![ + FieldKey::Pointer, + FieldKey::Field("p".to_string()), + FieldKey::Pointer, + ]); + let arr_dst_loc = Location::new(call_i.use_index(), arr_dst_fields); + let update_arr = UpdateStmt::new(load_arr_i.use_index(), arr_dst_loc.to_weak_loc()); + let _update_arr_i = program.append_stmt(update_arr); + let mut load_test = test::generate_load_stmt::>("test", ""); + let sub_keys = vec![FieldKey::Index(0), FieldKey::Field("p".to_string())]; + let sub_state = load_test.state.get_child_mut_by_fields(&sub_keys).unwrap(); + if let Some(ps) = sub_state.pointer.as_mut() { + ps.stub = true; + } + let load_test_i = program.append_stmt(load_test); + let test_dst_fields = LocFields::new(vec![FieldKey::Pointer]); + let test_dst_loc = Location::new(call_i.use_index(), test_dst_fields); + let update_test = UpdateStmt::new(load_test_i.use_index(), test_dst_loc.to_weak_loc()); + let _update_test_i = program.append_stmt(update_test); + println!("program: {}", program.serialize().unwrap()); + program.eval().unwrap(); + assert_eq!( + call_i + .get_stmt_value(&program.stmts) + .unwrap() + .get_ptr_by_keys(arr_dst_loc.fields.as_slice()) + .unwrap(), + load_ptr + ); +} diff --git a/hopper-core/src/runtime/translate.rs b/hopper-core/src/runtime/translate.rs new file mode 100644 index 0000000..3a5a99d --- /dev/null +++ b/hopper-core/src/runtime/translate.rs @@ -0,0 +1,293 @@ +use std::collections::HashSet; +use std::fmt::Write; + +use super::*; +use crate::utils; +use eyre::{Context, ContextCompat}; +use regex::{self, Regex}; + +pub trait Translate { + fn translate_to_c(&self) -> eyre::Result; +} + +pub trait ObjectTranslate: ObjectSerialize { + fn translate_obj_to_c( + &self, + state: &ObjectState, + _program: &FuzzProgram, + ) -> eyre::Result { + self.serialize_obj(state) + } +} + +static HEADER: &str = r#"#include +#include +#include +#include +#include +#include +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef unsigned int usize; +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; +typedef int isize; +typedef float f32; +typedef double f64; +"#; + +impl Translate for FuzzProgram { + fn translate_to_c(&self) -> eyre::Result { + // Compile the regular expression + let callback_re = + Regex::new(format!(r"{}([^0-9]*)(\d+)", crate::FN_POINTER_PREFIX).as_str()) + .context("failed to compile regular expression")?; + let mut out = String::new(); + out.push_str(HEADER); + out.push_str("int main() {\n"); + let mut callbacks = HashSet::new(); + for is in self.stmts.iter() { + let value_name = format!("v{}", is.index.get()); + match &is.stmt { + FuzzStmt::Load(load) => { + let type_name = load.value.type_name(); + let left = get_c_left_operand(&value_name, type_name); + let right = load.value.translate_obj_to_c(&load.state, self)?; + // Use regular expression to capture all the appearance of a generated callback. + for caps in callback_re.captures_iter(&right) { + if let Some(captured) = caps.get(0) { + callbacks.insert(captured.as_str().to_owned()); + } + } + let ident = load.get_ident(); + let line = format!(" {left} = {right}; // {ident}\n"); + // The intialization of vec before is stored in stack, + // we should restore it in heap. + if let Some(start) = find_next(type_name, "alloc::vec::Vec<") { + let tmp_value_name = value_name.clone() + "_tmp"; + let line = line.replace(&value_name, &tmp_value_name); + out.push_str(&line); + let pointer_type = + "hopper::runtime::FuzzMutPointer<".to_string() + &type_name[start..]; + let left = get_c_left_operand(&value_name, &pointer_type); + let line = + format!(" {} = malloc(sizeof {});\n", &left, &tmp_value_name,); + out.push_str(&line); + let line = format!( + " memcpy({}, {}, sizeof {});\n", + &value_name, &tmp_value_name, &tmp_value_name, + ); + out.push_str(&line); + } else { + out.push_str(&line); + } + } + FuzzStmt::Call(call) => { + let left = if let Some(ty) = call.fg.ret_type { + format!("{} = ", get_c_left_operand(&value_name, ty)) + } else { + "".to_string() + }; + let args: Vec = + call.args.iter().map(|i| format!("v{}", i.get())).collect(); + let line = format!( + " {}{}({}); // {}\n", + &left, + call.name, + args.join(", "), + call.ident + ); + out.push_str(&line); + } + FuzzStmt::Update(update) => { + let state = ObjectState::root("update", ""); + let left = update.dst.translate_obj_to_c(&state, self)?; + let line = format!(" {} = v{};\n", left, update.src.get()); + out.push_str(&line); + } + FuzzStmt::Assert(assert) => { + if let AssertRule::NonNull { stmt } = &assert.rule { + let line = format!(" if (v{} == NULL) return 0;\n", stmt.get()); + out.push_str(&line); + } + } + FuzzStmt::File(file) => { + let line = format!( + " char* path_{} = \"{}\";\n", + value_name, + file.get_file_name() + ); + out.push_str(&line); + if let Some(i) = &file.buf_stmt { + let line = + format!(" FILE *f_{value_name} = fopen(path_{value_name}, \"wb\");\n"); + out.push_str(&line); + let line = format!( + " fwrite(v{}, sizeof v{}_tmp, 1, f_{});\n", + i.get(), + i.get(), + value_name + ); + out.push_str(&line); + if !file.is_fd { + let line = format!(" fclose(f_{value_name});\n"); + out.push_str(&line); + } + } + if file.is_fd { + let line = format!(" int {value_name} = fileno(f_{value_name}); // {}\n", file.ident); + out.push_str(&line); + } else { + let line = format!(" char* {value_name} = path_{value_name}; // {}\n", file.ident); + out.push_str(&line); + } + } + _ => {} + } + } + out.push_str("}\n"); + let beginning = out.find("int main() {").unwrap(); + let callbacks = translate_callbacks_to_c(&callbacks)?; + out.insert_str(beginning, &callbacks); + Ok(out) + } +} + +fn translate_callbacks_to_c(callbacks: &HashSet) -> eyre::Result { + let mut ret = String::new(); + + let mut alias_cnt = 0; + for callback in callbacks { + crate::log!(trace, "translating {callback}"); + let fn_g = global_gadgets::get_instance() + .functions + .get(callback) + .context("no such callback")?; + + let mut arg_list = String::new(); + for (arg_type, arg_ident) in fn_g.arg_types.iter().zip(fn_g.arg_idents.iter()) { + let arg = get_c_left_operand(arg_ident, arg_type); + arg_list.push_str(format!("{arg}, ").as_str()); + } + // leave the ending comma and whitespace + arg_list.pop(); + arg_list.pop(); + + let (ret_ty_and_fn_name, body) = { + let mut ret_ty_and_fn_name = String::new(); + let mut body = String::new(); + let ret_ty = fn_g.ret_type.unwrap_or("void"); + + if ret_ty.contains("core::option::Option String { + let mut next = ty.trim(); + let mut value_name = value_name.to_string(); + let c_ty; + loop { + if let Some(start) = find_next(next, "alloc::vec::Vec<") { + // should stored in heap + let end = next.len() - 1; + next = &next[start..end]; + value_name += "[]"; + } else if let Some(start) = find_next(next, "[") { + let end = next.rfind(';').unwrap(); + // let array_len = + next = &next[start..end]; + value_name += "[]"; + } else if let Some(start) = find_next(next, "hopper::runtime::FuzzMutPointer<") { + let end = next.len() - 1; + next = &next[start..end]; + value_name = "*".to_string() + &value_name; + } else if let Some(start) = find_next(next, "hopper::runtime::FuzzConstPointer<") { + let end = next.len() - 1; + next = &next[start..end]; + value_name = "*".to_string() + &value_name; + } else if let Some(start) = find_next(next, "hopper::runtime::FuzzFrozenPointer<") { + let end = next.len() - 1; + next = &next[start..end]; + } else if let Some(start) = find_next(next, "core::option::Option>() + .join(","); + let ret = ret.replace("->", ""); + let ret = ret.trim(); + if ret.is_empty() { + c_ty = "void".to_string(); + } else { + c_ty = get_c_left_operand("", ret); + } + value_name = format!("(*{value_name})({arg_tys})"); + break; + } + } else if utils::is_void_type(next) { + c_ty = "void".to_string(); + break; + } else { + if let Some(n) = next.strip_prefix("hopper_harness::") { + // c_ty = format!("struct {}", n); + c_ty = n.to_string(); + } else { + c_ty = next.to_string(); + } + break; + } + } + crate::log!( + trace, + "rust: {}, ty: {}, value_name: {}", + ty, + c_ty, + value_name + ); + format!("{c_ty} {value_name}") +} + +fn find_next(cur: &str, pat: &str) -> Option { + if cur.starts_with(pat) { + return Some(pat.len()); + } + None +} diff --git a/hopper-core/src/slices.rs b/hopper-core/src/slices.rs new file mode 100644 index 0000000..ba39272 --- /dev/null +++ b/hopper-core/src/slices.rs @@ -0,0 +1,1090 @@ +//! Optional feature +//! used for slice API calls from real-world code, and use them as Hopper's input. + +use eyre::{ContextCompat, Report, Result, WrapErr}; +use once_cell::sync::OnceCell; +use std::{ + cell::RefCell, + collections::HashMap, + hash::{Hash, Hasher}, + rc::{Rc, Weak}, +}; + +pub type NodeTy = Rc>; +pub type ApiTreeTy = HashMap; +// API_TREE: A parent-childs tree which records the call relationship between APIs. +// Key: c-style type, Value: TreeNode (value-sensitive) +// Example: two slice of type "cJSON *": "A(v_0) B(v_1) C(v_3) D(v_4)" and "A(0) B(v_2) B(v_2) C(v_3)" +// API_TREE["cJSON *"] = Node(A(v_0)) -> Node(B(v_1)) -> Node(C(v_3)) -> Node(D(v_4)) +// -> Node(B(v_2)) -> Node(B(v_2)) -> Node(C(v_3)) +pub fn get_slices_path() -> Option { + pub static SLICES_ENV: OnceCell> = OnceCell::new(); + SLICES_ENV + .get_or_init(|| std::env::var(crate::SLICES_PATH).ok()) + .clone() +} + +pub fn is_using_slice() -> bool { + get_slices_path().is_some() +} + +// if ONLY_USE_SLICES_VAR is set: generate program only by slices, which means the func without slices will not be generated with program. +// Otherwise, the func without slices will be generated randomly. +// pub fn is_only_using_slics() -> bool { +// pub static ONLY_USE_SLICE_ENV: OnceCell = OnceCell::new(); +// *ONLY_USE_SLICE_ENV.get_or_init(|| std::env::var(crate::ONLY_USE_SLICES_VAR).is_ok()) +// } + +pub fn get_api_tree() -> Option<&'static ApiTreeTy> { + pub static mut API_TREE: OnceCell> = OnceCell::new(); + if let Some(path) = get_slices_path() { + let api_tree = unsafe { + API_TREE.get_or_init(|| { + Some(slice_utils::read_api_slices(path).expect("expect to get a non-none api tree")) + }) + }; + return api_tree.as_ref(); + } + None +} + +fn get_func_node_map() -> &'static HashMap> { + pub static mut FUNC_NODE_MAP: OnceCell>> = OnceCell::new(); + let func_node_map = unsafe { FUNC_NODE_MAP.get_or_init(slice_utils::init_func_node_map) }; + func_node_map +} + +pub fn is_func_in_slice(f_name: &str) -> bool { + let func_node_map = get_func_node_map(); + if func_node_map.contains_key(f_name) { + return true; + } + false +} + +// Structure to represent a function node in a tree. +#[derive(Default, Debug, Clone)] +pub struct TreeNode { + pub is_root: bool, + pub childs: Vec, + pub parent: Weak>, + pub index: Option, // the dataflow related arg index + pub func: Function, +} + +impl TreeNode { + pub fn set_func(&mut self, func: Function) { + self.func = func; + } + + pub fn set_parent(&mut self, parent: Weak>) { + self.parent = parent; + } + + pub fn set_root(&mut self, is_root: bool) { + self.is_root = is_root; + } + + pub fn _set_childs(&mut self, childs: Vec) { + self.childs = childs; + } + + pub fn push_child(&mut self, child: NodeTy) { + self.childs.push(child); + } + + pub fn set_index(&mut self, index: Option) { + self.index = index; + } +} + +#[derive(Debug, Clone)] +pub struct Param { + pub cvrmask: u64, + pub param_name: String, + pub param_type: String, + pub raw_param_type: String, +} + +#[derive(Debug, Clone)] +pub struct Arg { + pub cvrmask: u64, + pub arg_name: String, + pub arg_type: String, + pub raw_arg_type: String, + pub value: ArgValue, +} + +#[derive(Debug, Clone)] +pub struct FloatValue(f64); +impl FloatValue { + fn key(&self) -> u64 { + self.0.to_bits() + } + fn value(&self) -> f64 { + self.0 + } +} +impl Hash for FloatValue { + fn hash(&self, state: &mut H) { + self.key().hash(state); + } +} + +impl PartialEq for FloatValue { + fn eq(&self, other: &Self) -> bool { + self.key() == other.key() + } +} + +impl Eq for FloatValue {} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ArgValue { + Integer(i64), + Float(FloatValue), + Character(u8), + StringValue(String), + NULLPointer, + None, +} + +#[derive(Debug, Clone, Default)] +pub struct Function { + pub cvrmask: u64, + pub call: Vec, + pub decl: Vec, + pub func_name: String, + pub ret_type: String, + pub raw_ret_type: String, + pub t_index: i64, +} + +#[derive(Debug, Clone, thiserror::Error)] +pub enum SliceError { + #[error("The function `{0}` is not found in slices.")] + SliceNotFound(String), + #[error("The function `{0}` is not found in gadgets.")] + GadgetNotFound(String), + #[error("Attempt to access a invalid `{1}`-th arg of `{0}`.")] + ArgIndexError(String, usize), + #[error( + "Error to insert the possible free func {1} which is the child of {0} as dyn call context." + )] + InsertFreeError(String, String), + #[error("Error to parse the input: {0}")] + ParseError(String), +} + +pub fn is_slice_not_found_err(res: &Result) -> bool { + if let Err(e) = res { + if let Some(SliceError::SliceNotFound(_)) = e.downcast_ref::() { + return true; + } + } + false +} + +pub fn is_arg_index_err(res: Option) -> Result { + if let Some(e) = res { + if let Some(SliceError::ArgIndexError(_, _)) = e.downcast_ref::() { + return Ok(true); + } + // return other errs + return Err(e); + } + // not err + Ok(false) +} + +pub mod slice_utils { + use super::*; + use regex::Regex; + use serde_json::Value; + use std::collections::VecDeque; + + // Parse the raw string to a vector of api slices. + pub fn parse_to_api_slices(str: String) -> Vec> { + let mut api_slices: Vec> = Vec::new(); + let mut api_slice: Vec = Vec::new(); + for line in str.lines() { + let line = line.trim(); + if line.is_empty() { + api_slices.push(api_slice); + api_slice = Vec::new(); + } else { + api_slice.push(line.to_string()); + } + } + if api_slice.len() > 1 { + api_slices.push(api_slice); + } + crate::log!(trace, "slices: {api_slices:?}"); + api_slices + } + + // unify array type to pointer type. like, char[100][33] to char **. + pub fn unify_arg_type(arg_type: String) -> String { + let re = Regex::new(r"(\[\d+\])").unwrap(); + let arg_type = re.replace(&arg_type, " *").to_string(); + let ary_type = re.replace_all(&arg_type, "*").to_string(); + ary_type + } + + // Parse a arg string to (arg_type, arg_name), like "cJSON *root" to ("cJSON *", "root") + pub fn parse_api_arg(arg_string: String, arg_json: Option) -> Result { + let err: SliceError; + let arg = if let Some(arg) = &arg_json { + err = SliceError::ParseError(format!("{arg_json:?}")); + arg.clone() + } else { + err = SliceError::ParseError(arg_string.clone()); + serde_json::from_str(&arg_string)? + }; + + Ok(Arg { + cvrmask: arg["CVRMask"] + .as_u64() + .ok_or_else(|| Report::new(err.clone()))?, + arg_name: arg["arg_name"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + arg_type: unify_arg_type( + arg["arg_type"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + ), + raw_arg_type: unify_arg_type( + arg["raw_arg_type"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + ), + value: match &arg["value"] { + Value::Number(v) => { + if v.is_i64() { + let arg_name = arg["arg_name"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))?; + if arg_name == "" { + ArgValue::Character(v.as_u64().ok_or_else(|| Report::new(err))? as u8) + } else if arg_name == "" { + ArgValue::NULLPointer + } else if arg_name == "" { + ArgValue::Integer(v.as_i64().ok_or_else(|| Report::new(err))?) + } else { + ArgValue::None + } + } else if v.is_f64() { + ArgValue::Float(FloatValue(v.as_f64().ok_or_else(|| Report::new(err))?)) + } else if v.is_u64() { + ArgValue::Integer(v.as_i64().ok_or_else(|| Report::new(err))?) + } else { + return Err(Report::new(SliceError::ParseError(format!( + "Invalid arg value `{v:?}`" + )))); + } + } + Value::String(s) => ArgValue::StringValue(s.clone()), + _ => { + return Err(Report::new(SliceError::ParseError(format!( + "Invalid arg value `{:?}`", + arg["value"] + )))) + } + }, + }) + } + + pub fn parse_api_param(param_json: Value) -> Result { + let err = SliceError::ParseError(format!("{param_json:?}")); + let param: Value = param_json; + + Ok(Param { + cvrmask: param["CVRMask"] + .as_u64() + .ok_or_else(|| Report::new(err.clone()))?, + param_name: param["param_name"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + param_type: param["param_type"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + raw_param_type: param["raw_param_type"] + .as_str() + .ok_or_else(|| Report::new(err))? + .to_string(), + }) + } + + pub fn parse_api_fn(func_str: String) -> Result { + let func: Value = serde_json::from_str(func_str.trim())?; + let err = SliceError::ParseError(func_str); + let mut call: Vec = Vec::new(); + let mut decl: Vec = Vec::new(); + let call_json = func["Call"] + .as_array() + .ok_or_else(|| Report::new(err.clone()))?; + for value in call_json { + let arg = parse_api_arg(String::new(), Some(value.clone()))?; + call.push(arg); + } + + let decl_json = func["Decl"] + .as_array() + .ok_or_else(|| Report::new(err.clone()))?; + for value in decl_json { + let param = parse_api_param(value.clone())?; + decl.push(param); + } + + Ok(Function { + cvrmask: func["CVRMask"] + .as_u64() + .ok_or_else(|| Report::new(err.clone()))?, + call, + decl, + func_name: func["func_name"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + ret_type: func["ret_type"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + raw_ret_type: func["raw_ret_type"] + .as_str() + .ok_or_else(|| Report::new(err.clone()))? + .to_string(), + t_index: func["t_index"].as_i64().ok_or_else(|| Report::new(err))?, + }) + } + + fn has_same_value(func1: &Function, func2: &Function) -> bool { + if func1.call.len() != func2.call.len() { + return false; + } + for index in 0..func1.call.len() { + let arg1 = &func1.call[index]; + let arg2 = &func2.call[index]; + if arg1.value != arg2.value { + return false; + } + } + true + } + + // Get the index of arg in func. + pub fn get_api_arg_index(func: &Function) -> Option { + let index = func.t_index; + if index == -1 { + None + } else { + Some(index as usize) + } + } + + // Insert to function to the API_TREE, and reuturn the node of this funciton. + pub fn insert_fn_to_tree(_layer: i32, func: Function, parent: NodeTy) -> Result { + // if find an existing node in parent.childs, return it + for child in parent.borrow().childs.iter() { + if child.borrow().func.func_name == func.func_name + && has_same_value(&child.borrow().func, &func) + { + return Ok(child.clone()); + } + } + + // if not find, create a new node and insert it to parent.childs + let new_node = Rc::new(RefCell::new(TreeNode::default())); + let index = get_api_arg_index(&func); + + new_node.borrow_mut().set_index(index); + (*new_node.borrow_mut()).set_func(func); + (*new_node.borrow_mut()).set_parent(Rc::downgrade(&parent)); + (*new_node.borrow_mut()).set_root(false); + (*parent.borrow_mut()).push_child(new_node.clone()); + + Ok(new_node) + } + + // Insert a slice of functions to the API_TREE. + pub fn insert_fns_to_tree(api_slice: Vec, root: Rc>) -> Result<()> { + let mut parent = root; + let mut curr_node: Rc>; + let mut layer = 0; + for api in api_slice { + if api.starts_with('#') || api.trim().is_empty() { + continue; + } + layer += 1; + let func = parse_api_fn(api)?; + curr_node = insert_fn_to_tree(layer, func, parent)?; + parent = curr_node; + } + Ok(()) + } + + // Insert one api slice to the API_TREE. + pub fn insert_one_api_slice(api_slice: Vec, api_tree: &mut ApiTreeTy) -> Result<()> { + let first_arg = api_slice[0].clone(); + let taint_arg = parse_api_arg(first_arg, None)?; + let arg_type = taint_arg.arg_type; + if let Some(root_node) = api_tree.get(&arg_type) { + insert_fns_to_tree(api_slice[1..].to_vec(), root_node.clone()) + } else { + let root_node = Rc::new(RefCell::new(TreeNode::default())); + (*root_node.borrow_mut()).set_root(true); + insert_fns_to_tree(api_slice[1..].to_vec(), root_node.clone())?; + api_tree.insert(arg_type, root_node); + Ok(()) + } + } + + // Read and parse the api_slice to the API_TREE. + pub fn read_api_slices(slice_path: String) -> Result { + crate::log!( + info, + "read api slices: this step will cost little minutes if your slice number is large." + ); + let contents = std::fs::read_to_string(slice_path) + .context("Invalid file path of input slices.".to_string())?; + let api_slices = parse_to_api_slices(contents); + let mut api_tree: ApiTreeTy = HashMap::new(); + for api_slice in api_slices { + insert_one_api_slice(api_slice, &mut api_tree)?; + } + Ok(api_tree) + } + + // Use the BFS way to traverse the given tree, and return the nodes which have the same func_name. + pub fn bfs_get_func_nodes(func_name: &str, root: Rc>) -> Vec { + let mut queue: VecDeque = VecDeque::new(); + let mut func_nodes: Vec = Vec::new(); + queue.push_back(root); + while !queue.is_empty() { + let curr_node = queue.pop_front().unwrap(); + if curr_node.borrow().func.func_name == func_name { + func_nodes.push(curr_node.clone()); + } + for child in curr_node.borrow().childs.iter() { + queue.push_back(child.clone()); + } + } + func_nodes + } + + // Use the BFS way to traverse the given tree, and insert the meet node. + pub fn bfs_init_root_tree( + func_node_map: &mut HashMap>, + root: Rc>, + ) { + let mut queue: VecDeque = VecDeque::new(); + queue.push_back(root); + while !queue.is_empty() { + let curr_node = queue.pop_front().unwrap(); + let func_name = &curr_node.borrow().func.func_name; + if let Some(node_vec) = func_node_map.get_mut(func_name) { + node_vec.push(curr_node.clone()); + } else { + let node_vec: Vec = vec![curr_node.clone()]; + func_node_map.insert(func_name.clone(), node_vec); + } + for child in curr_node.borrow().childs.iter() { + queue.push_back(child.clone()); + } + } + } + + // Get the nodes which have the same func_name in the API_TREE. + pub fn get_func_nodes(func_name: &str) -> Option> { + let api_tree = get_api_tree().expect("expect to get a non-none api tree"); + let mut func_nodes: Vec = Vec::new(); + for (_, root) in api_tree.iter() { + let mut nodes = bfs_get_func_nodes(func_name, root.clone()); + func_nodes.append(&mut nodes); + } + if !func_nodes.is_empty() { + return Some(func_nodes); + } + None + } + + pub fn init_func_node_map() -> HashMap> { + let mut func_node_map: HashMap> = HashMap::new(); + let api_tree = get_api_tree().expect("expect to get a non-none api tree"); + for (_, root) in api_tree.iter() { + bfs_init_root_tree(&mut func_node_map, root.clone()); + } + func_node_map + } + + // recursively get the parent nodes of the node. + pub fn get_parent_nodes_recursively(node: Option) -> Vec { + let mut parents: Vec = Vec::new(); + if node.is_none() { + return parents; + } + let node = node.unwrap(); + let mut parent = node.borrow().parent.upgrade(); + while parent.is_some() { + let cur_node = parent.unwrap(); + if cur_node.borrow().is_root { + break; + } + parents.push(cur_node.clone()); + parent = cur_node.borrow().parent.upgrade(); + } + parents + } + + // choose a node at API_TREE where has the same {func_name} and {index}. + pub fn rand_choose_func_index_node(func_name: &str, index: usize) -> Option { + if let Some(func_nodes) = get_func_node_map().get(&func_name.to_string()) { + crate::log!(trace, "Get the func node map."); + let func_nodes: Vec = func_nodes + .iter() + .filter(|node| { + node.borrow().index.is_some() && node.borrow().index.unwrap() == index + }) + .cloned() + .collect(); + crate::log!(trace, "Get the func nodes."); + if func_nodes.is_empty() { + return None; + } + let rand_index = crate::gen_range(0..func_nodes.len()); + let node = func_nodes[rand_index].clone(); + return Some(node); + } + None + } + + pub fn _verbose_node(node: Rc>) { + print!("{:#?} ", node.borrow()); + } + + pub fn _verbose_api_tree() { + let api_tree = get_api_tree().expect("expect to get a non-none api tree"); + for (arg_type, root_node) in api_tree { + println!("arg_type: {arg_type}"); + _verbose_node(root_node.clone()); + } + } + /* The human-readable test case are formated bellow, where the third call and some arg values are different. + "$int *,arr + void *,CRYPTO_malloc,int *,arr,char *,,int,538 + int,BN_GF2m_poly2arr,const BIGNUM *,p,int *,arr,int,max + int,BN_GF2m_mod_mul_arr,BIGNUM *,r,const BIGNUM *,a,const BIGNUM *,b,int *,arr,BN_CTX *,ctx + void,CRYPTO_free,int *,arr,char *,,int, + + $int *,arr + void *,CRYPTO_malloc,int *,arr,char *,,int,476 + int,BN_GF2m_poly2arr,const BIGNUM *,NULL,int *,arr,int,max + int,BN_GF2m_mod_sqrt_arr,BIGNUM *,r,const BIGNUM *,a,int *,arr,BN_CTX *,ctx + void,CRYPTO_free,int *,arr,char *,,int,"; + */ + #[cfg(test)] + static TEST_SLICES: &str = r#"{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":0,"arg_name":"","arg_type":"char *","raw_arg_type":"char *","value":"/home/loydlv/vbd/hopper_bench/openssl/src/crypto/bn/bn_gf2m.c"},{"CVRMask":0,"arg_name":"","arg_type":"int","raw_arg_type":"int","value":538}],"Decl":[{"CVRMask":0,"param_name":"num","param_type":"size_t","raw_param_type":"unsigned long"},{"CVRMask":0,"param_name":"file","param_type":"const char *","raw_param_type":"const char *"},{"CVRMask":0,"param_name":"line","param_type":"int","raw_param_type":"int"}],"func_name":"CRYPTO_malloc","raw_ret_type":"void *","ret_type":"void *","t_index":-1} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"p","arg_type":"const BIGNUM *","raw_arg_type":"const struct bignum_st *","value":0},{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":1,"arg_name":"max","arg_type":"int","raw_arg_type":"int","value":0}],"Decl":[{"CVRMask":0,"param_name":"a","param_type":"const BIGNUM *","raw_param_type":"const struct bignum_st *"},{"CVRMask":0,"param_name":"p","param_type":"int *","raw_param_type":"int *"},{"CVRMask":0,"param_name":"max","param_type":"int","raw_param_type":"int"}],"func_name":"BN_GF2m_poly2arr","raw_ret_type":"int","ret_type":"int","t_index":1} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"r","arg_type":"BIGNUM *","raw_arg_type":"struct bignum_st *","value":0},{"CVRMask":0,"arg_name":"a","arg_type":"const BIGNUM *","raw_arg_type":"const struct bignum_st *","value":0},{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":0,"arg_name":"ctx","arg_type":"BN_CTX *","raw_arg_type":"struct bignum_ctx *","value":0}],"Decl":[{"CVRMask":0,"param_name":"r","param_type":"BIGNUM *","raw_param_type":"struct bignum_st *"},{"CVRMask":0,"param_name":"a","param_type":"const BIGNUM *","raw_param_type":"const struct bignum_st *"},{"CVRMask":0,"param_name":"p","param_type":"const int *","raw_param_type":"const int *"},{"CVRMask":0,"param_name":"ctx","param_type":"BN_CTX *","raw_param_type":"struct bignum_ctx *"}],"func_name":"BN_GF2m_mod_sqr_arr","raw_ret_type":"int","ret_type":"int","t_index":2} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":0,"arg_name":"","arg_type":"char *","raw_arg_type":"char *","value":"/home/loydlv/vbd/hopper_bench/openssl/src/crypto/bn/bn_gf2m.c"},{"CVRMask":0,"arg_name":"","arg_type":"int","raw_arg_type":"int","value":551}],"Decl":[{"CVRMask":0,"param_name":"ptr","param_type":"void *","raw_param_type":"void *"},{"CVRMask":0,"param_name":"file","param_type":"const char *","raw_param_type":"const char *"},{"CVRMask":0,"param_name":"line","param_type":"int","raw_param_type":"int"}],"func_name":"CRYPTO_free","raw_ret_type":"void","ret_type":"void","t_index":0} + + {"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":0,"arg_name":"","arg_type":"char *","raw_arg_type":"char *","value":"/home/loydlv/vbd/hopper_bench/openssl/src/crypto/bn/bn_gf2m.c"},{"CVRMask":0,"arg_name":"","arg_type":"int","raw_arg_type":"int","value":476}],"Decl":[{"CVRMask":0,"param_name":"num","param_type":"size_t","raw_param_type":"unsigned long"},{"CVRMask":0,"param_name":"file","param_type":"const char *","raw_param_type":"const char *"},{"CVRMask":0,"param_name":"line","param_type":"int","raw_param_type":"int"}],"func_name":"CRYPTO_malloc","raw_ret_type":"void *","ret_type":"void *","t_index":-1} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"","arg_type":"const BIGNUM *","raw_arg_type":"const struct bignum_st *","value":0},{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":1,"arg_name":"max","arg_type":"int","raw_arg_type":"int","value":0}],"Decl":[{"CVRMask":0,"param_name":"a","param_type":"const BIGNUM *","raw_param_type":"const struct bignum_st *"},{"CVRMask":0,"param_name":"p","param_type":"int *","raw_param_type":"int *"},{"CVRMask":0,"param_name":"max","param_type":"int","raw_param_type":"int"}],"func_name":"BN_GF2m_poly2arr","raw_ret_type":"int","ret_type":"int","t_index":1} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"r","arg_type":"BIGNUM *","raw_arg_type":"struct bignum_st *","value":0},{"CVRMask":0,"arg_name":"a","arg_type":"const BIGNUM *","raw_arg_type":"const struct bignum_st *","value":0},{"CVRMask":0,"arg_name":"b","arg_type":"const BIGNUM *","raw_arg_type":"const struct bignum_st *","value":0},{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":0,"arg_name":"ctx","arg_type":"BN_CTX *","raw_arg_type":"struct bignum_ctx *","value":0}],"Decl":[{"CVRMask":0,"param_name":"r","param_type":"BIGNUM *","raw_param_type":"struct bignum_st *"},{"CVRMask":0,"param_name":"a","param_type":"const BIGNUM *","raw_param_type":"const struct bignum_st *"},{"CVRMask":0,"param_name":"b","param_type":"const BIGNUM *","raw_param_type":"const struct bignum_st *"},{"CVRMask":0,"param_name":"p","param_type":"const int *","raw_param_type":"const int *"},{"CVRMask":0,"param_name":"ctx","param_type":"BN_CTX *","raw_param_type":"struct bignum_ctx *"}],"func_name":"BN_GF2m_mod_mul_arr","raw_ret_type":"int","ret_type":"int","t_index":3} + {"CVRMask":0,"Call":[{"CVRMask":0,"arg_name":"arr","arg_type":"int *","raw_arg_type":"int *","value":0},{"CVRMask":0,"arg_name":"","arg_type":"char *","raw_arg_type":"char *","value":"/home/loydlv/vbd/hopper_bench/openssl/src/crypto/bn/bn_gf2m.c"},{"CVRMask":0,"arg_name":"","arg_type":"int","raw_arg_type":"int","value":489}],"Decl":[{"CVRMask":0,"param_name":"ptr","param_type":"void *","raw_param_type":"void *"},{"CVRMask":0,"param_name":"file","param_type":"const char *","raw_param_type":"const char *"},{"CVRMask":0,"param_name":"line","param_type":"int","raw_param_type":"int"}],"func_name":"CRYPTO_free","raw_ret_type":"void","ret_type":"void","t_index":0} + "#; + + #[test] + fn _test_parse_api_arg() -> Result<()> { + let arg_str = r#"{"CVRMask":0,"arg_name":"monitor","arg_type":"cJSON *","raw_arg_type":"struct cJSON *","value":0}"#; + let arg = parse_api_arg(arg_str.to_string(), None); + println!("{arg:#?}"); + Ok(()) + } + + #[test] + fn _test_parse_api_fn() -> Result<()> { + let poly2arr_str = TEST_SLICES.lines().nth(8).unwrap().to_string(); + let func = parse_api_fn(poly2arr_str)?; + assert_eq!(func.func_name, "BN_GF2m_poly2arr"); + assert_eq!(func.call.len(), 3); + assert_eq!(func.call[0].value, ArgValue::NULLPointer); + assert_eq!(func.call[1].value, ArgValue::None); + Ok(()) + } + + #[test] + fn _test_has_same_value() -> Result<()> { + let malloc_1 = TEST_SLICES.lines().nth(1).unwrap().to_string(); + let malloc_2 = TEST_SLICES.lines().nth(7).unwrap().to_string(); + let mut malloc_1 = parse_api_fn(malloc_1)?; + let malloc_2 = parse_api_fn(malloc_2)?; + assert_eq!(malloc_1.call[2].value, ArgValue::Integer(538)); + assert_eq!(malloc_2.call[2].value, ArgValue::Integer(476)); + assert!(!has_same_value(&malloc_1, &malloc_2)); + malloc_1.call[2].value = ArgValue::Integer(476); + assert!(has_same_value(&malloc_1, &malloc_2)); + Ok(()) + } + + #[test] + fn _test_read_api_tree() -> Result<()> { + let api_slices = parse_to_api_slices(TEST_SLICES.to_string()); + let mut api_tree: ApiTreeTy = HashMap::new(); + for api_slice in api_slices { + insert_one_api_slice(api_slice, &mut api_tree)?; + } + let inner_type = "int *".to_string(); + assert!(api_tree.contains_key(&inner_type)); + let root_node = api_tree.get(&inner_type).unwrap(); + assert!(root_node.borrow().is_root); + assert_eq!(root_node.borrow().childs.len(), 2); + + let malloc_node_1 = root_node.borrow().childs[0].clone(); + assert_eq!(malloc_node_1.borrow().childs.len(), 1); + assert_eq!(malloc_node_1.borrow().index, None); + + let malloc_node_2 = root_node.borrow().childs[1].clone(); + assert_eq!(malloc_node_2.borrow().childs.len(), 1); + assert_eq!(malloc_node_2.borrow().index, None); + + let poly2arr_node_1 = malloc_node_1.borrow().childs[0].clone(); + assert_eq!(poly2arr_node_1.borrow().index, Some(1)); + assert_eq!(poly2arr_node_1.borrow().childs.len(), 1); + //_verbose_node(poly2arr_node.clone()); + + let poly2arr_node_2 = malloc_node_2.borrow().childs[0].clone(); + assert_eq!(poly2arr_node_2.borrow().index, Some(1)); + assert_eq!(poly2arr_node_2.borrow().childs.len(), 1); + + let mod_mul_arr_node = poly2arr_node_1.borrow().childs[0].clone(); + let mod_sqrt_arr_node = poly2arr_node_2.borrow().childs[0].clone(); + assert_eq!(mod_mul_arr_node.borrow().childs.len(), 1); + assert_eq!(mod_sqrt_arr_node.borrow().childs.len(), 1); + + let free_node_1 = mod_mul_arr_node.borrow().childs[0].clone(); + let free_node_2 = mod_sqrt_arr_node.borrow().childs[0].clone(); + assert_eq!(free_node_1.borrow().func.func_name, "CRYPTO_free"); + assert_eq!(free_node_2.borrow().func.func_name, "CRYPTO_free"); + Ok(()) + } +} + +pub mod slice_fuzz { + use super::*; + use crate::{ + global_gadgets, log, runtime::CallStmt, utils, AssertStmt, FieldKey, FuzzProgram, LoadStmt, + Location, ObjectState, StmtIndex, + }; + use slice_utils::*; + + trait CastToParamType { + fn cast_to_param_type(&self, param_type: &str) -> Result; + } + + macro_rules! impl_cast_integer_to_param_type { + ($($name:ident),*) => { + $( + impl CastToParamType for $name{ + fn cast_to_param_type(&self, param_type: &str) -> Result{ + match param_type{ + "u8" => Ok(Box::new(*self as u8)), + "u16" => Ok(Box::new(*self as u16)), + "u32" => Ok(Box::new(*self as u32)), + "u64" => Ok(Box::new(*self as u64)), + "u128" => Ok(Box::new(*self as u128)), + "i8" => Ok(Box::new(*self as i8)), + "i16" => Ok(Box::new(*self as i16)), + "i32" => Ok(Box::new(*self as i32)), + "i64" => Ok(Box::new(*self as i64)), + "i128" => Ok(Box::new(*self as i128)), + "char" => Ok(Box::new(*self as u8 as char)), + "bool" => Ok(Box::new(*self != 0)), + "f32" => Ok(Box::new(*self as f32)), + "f64" => Ok(Box::new(*self as f64)), + _ => return Err(eyre::eyre!("error cast from {self} to {param_type}")), + } + } + } + )* + }; + } + + macro_rules! impl_cast_floating_to_param_type { + ($($name:ident),*) => { + $( + impl CastToParamType for $name{ + fn cast_to_param_type(&self, param_type: &str) -> Result{ + match param_type{ + "f32" => Ok(Box::new(*self as f32)), + "f64" => Ok(Box::new(*self as f64)), + _ => return Err(eyre::eyre!("error cast from {self} to {param_type}")), + } + } + } + )* + }; + } + + impl_cast_integer_to_param_type!(i64, u64, u8); + impl_cast_floating_to_param_type!(f64); + + impl FuzzProgram { + pub fn generate_program_for_func_by_slices( + f_name: &str, + ) -> eyre::Result> { + if crate::is_pilot_infer() || !is_using_slice() { + return Ok(None); + } + // create an empty program + let mut program: FuzzProgram = Default::default(); + program.save_mutate_state(); + let res = CallStmt::generate_new_by_slices(&mut program, CallStmt::TARGET, f_name, 0) + .with_context(|| format!("fail to generate call `{f_name}`")); + if is_slice_not_found_err(&res) { + return Ok(None); + } + let mut call = res?; + // only track target function + call.track_cov = true; + let _stmt = program.append_stmt(call); + program.check_ref_use()?; + program + .refine_program() + .with_context(|| program.serialize_all().unwrap())?; + log!(trace, "Program after refine: {}", program.serialize_all()?); + Ok(Some(program)) + } + } + + fn is_gadget_exist(f_name: &str) -> bool { + global_gadgets::get_instance() + .get_func_gadget(f_name) + .is_ok() + } + + fn _verbose_parents(parents: &Vec) { + let mut parent_names = Vec::new(); + for i in (0..parents.len()).rev() { + let parent = &parents[i]; + parent_names.push(parent.borrow().func.func_name.clone()); + } + log!(trace, "parents: {:?}", parent_names); + } + + fn generate_vec_with_given_value(state: &mut ObjectState, value: &String) -> Result> { + let mut list = vec![]; + for unit in value.as_bytes() { + let idx = state.children.len(); + let _ = state + .add_child(idx, std::any::type_name::()) + .last_child_mut()?; + list.push(*unit); + } + crate::fuzz::seq::add_vec_terminator(&mut list, state); + Ok(list) + } + + impl LoadStmt { + fn generate_vec_with_str( + str_value: &String, + arg_type: &str, + arg_ident: &str, + ) -> Result { + let mut state = + LoadStmt::new_state(arg_ident, format!("alloc::vec::Vec<{arg_type}>").as_str()); + let value = generate_vec_with_given_value(&mut state, str_value)?; + let load = LoadStmt::new(Box::new(value), state); + Ok(load) + } + } + + impl CallStmt { + pub fn generate_new_by_slices( + program: &mut FuzzProgram, + ident: &str, + f_name: &str, + depth: usize, + ) -> eyre::Result { + if !is_gadget_exist(f_name) { + return Err(Report::new(SliceError::GadgetNotFound(f_name.to_string()))); + } + if !is_func_in_slice(f_name) { + // && is_only_using_slics() + return Err(Report::new(SliceError::SliceNotFound(f_name.to_string()))); + } + + let fg = global_gadgets::get_instance() + .get_func_gadget(f_name)? + .clone(); + + log!( + trace, + "Generate new call for {f_name} with slices. depth :{depth}, ident: {ident}." + ); + let mut call = CallStmt::new(ident.to_string(), f_name.to_string(), fg); + // Find or create args for call + let type_names = call.fg.arg_types; + let is_variadic = utils::is_variadic_function(type_names); + let arg_num = if is_variadic { + type_names.len() - 1 + } else { + type_names.len() + }; + + for i in 0..arg_num { + call.generate_ith_arg_by_slice(program, f_name, i, depth)?; + } + log!(trace, "Generate new call for {f_name} with slices done."); + Ok(call) + } + + fn set_ith_arg_as_null_pointer( + &mut self, + program: &mut FuzzProgram, + index: usize, + ) -> Result<()> { + log!( + trace, + "set `{index}-th` arg of `{}` to a null pointer", + self.fg.f_name + ); + let arg_type = self.fg.arg_types[index]; + let arg_ident = self.fg.arg_idents[index]; + if !utils::is_pointer_type(arg_type) { + return Err(eyre::eyre!("Unable to set NULL for non-pointer type arg")); + } + let null_stmt = LoadStmt::generate_null(arg_type, arg_ident)?; + let stmt_index = program.insert_or_append_stmt(null_stmt)?; + self.set_arg(index, stmt_index); + Ok(()) + } + + // set ith arg with integer, floating or char type value. + fn set_ith_arg_with_value( + &mut self, + program: &mut FuzzProgram, + index: usize, + val: crate::FuzzObject, + arg_type: &str, + arg_ident: &str, + ) -> Result<()> { + log!( + trace, + "`{}` set the `{index}-th` arg to {:?}", + self.fg.f_name, + val + ); + let state = LoadStmt::new_state(arg_ident, arg_type); + let load = LoadStmt::new(val, state); + let stmt_index = program.insert_or_append_stmt(load)?; + self.set_arg(index, stmt_index); + Ok(()) + } + + // set ith arg with string type value. + fn set_ith_arg_with_string( + &mut self, + program: &mut FuzzProgram, + index: usize, + val: &String, + ) -> Result<()> { + log!( + trace, + "`{}` set the `{index}-th` arg to {:?}", + self.fg.f_name, + val + ); + let arg_type = self.fg.arg_types[index]; + let arg_ident = self.fg.arg_idents[index]; + if !utils::is_pointer_type(arg_type) { + return Err(eyre::eyre!( + "Unable to set string value with non-pointer type!" + )); + } + let mut state = LoadStmt::new_state(arg_ident, arg_type); + let pointer_value = global_gadgets::get_instance() + .get_object_builder(arg_type)? + .generate_new(&mut state)?; + // generate the vec stmt + let load_stmt = LoadStmt::generate_vec_with_str(val, arg_type, arg_ident)?; + let stmt_index = program.insert_or_append_stmt(load_stmt)?; + //set pointer location + let pointer = state.pointer.as_mut().context("pointer has ps")?; + pointer.pointer_location = Location::stmt(stmt_index); + // generate the pointer stmt + //crate::pointer::generate_pointer_location(program, &mut state, 0)?; + let pointer_load = LoadStmt::new(pointer_value, state); + let load_index = program.insert_or_append_stmt(pointer_load)?; + self.set_arg(index, load_index); + Ok(()) + } + + fn set_ith_arg_ex_value( + &mut self, + program: &mut FuzzProgram, + index: usize, + arg: &Arg, + ) -> Result<()> { + if index >= self.fg.arg_idents.len() { + return Err(Report::new(SliceError::ArgIndexError( + self.fg.f_name.to_string(), + index, + ))); + } + log!( + trace, + "set {index}-th arg of {} to {:?}", + self.fg.f_name, + arg + ); + let arg_type = self.fg.arg_types[index]; + let arg_ident = self.fg.arg_idents[index]; + match &arg.value { + ArgValue::None => return Ok(()), + ArgValue::NULLPointer => self.set_ith_arg_as_null_pointer(program, index)?, + ArgValue::Integer(int_val) => self.set_ith_arg_with_value( + program, + index, + int_val.cast_to_param_type(arg_type)?, + arg_type, + arg_ident, + )?, + ArgValue::Float(float_val) => self.set_ith_arg_with_value( + program, + index, + float_val.value().cast_to_param_type(arg_type)?, + arg_type, + arg_ident, + )?, + ArgValue::Character(char_val) => self.set_ith_arg_with_value( + program, + index, + char_val.cast_to_param_type(arg_type)?, + arg_type, + arg_ident, + )?, + ArgValue::StringValue(string_val) => { + self.set_ith_arg_with_string(program, index, string_val)?; + } + } + Ok(()) + } + + // set current call args with the exact values in a slice + fn set_arg_values_by_slice( + &mut self, + program: &mut FuzzProgram, + node: NodeTy, + ) -> Result<()> { + for (index, arg) in node.borrow().func.call.iter().enumerate() { + let res = self.set_ith_arg_ex_value(program, index, arg); + if is_arg_index_err(res.err())? { + continue; + } + } + Ok(()) + } + + fn generate_ith_arg_by_slice( + &mut self, + program: &mut FuzzProgram, + f_name: &str, + index: usize, + depth: usize, + ) -> Result<()> { + log!( + trace, + "select node and parents for {index}-th arg of {f_name}" + ); + // select a node and then upwalk the API_TREE to collect all parents. + let func_node = rand_choose_func_index_node(f_name, index); + let parents = get_parent_nodes_recursively(func_node); + if parents.is_empty() { + log!(trace, "create this arg by rules."); + self.set_ith_call_arg(program, index, depth)?; + return Ok(()); + } + _verbose_parents(&parents); + + // generate callstmt for each parent and save in a vector. + let mut call_vector: Vec<(Option, Option)> = Vec::new(); + for node in parents { + let new_name = &node.borrow().func.func_name; + if is_gadget_exist(new_name) { + let mut new_call = + CallStmt::generate_new(program, CallStmt::RELATIVE, new_name, depth + 1)?; + // set arg exact values + new_call.set_arg_values_by_slice(program, node.clone())?; + call_vector.push((Some(new_call), node.borrow().index)); + } else { + call_vector.push((None, None)); + } + } + // the first call produces the argment. + let (producer_call, _) = call_vector.pop().unwrap(); + let type_name = self.fg.arg_types[index]; + let arg_ident = self.fg.arg_idents[index]; + let producer_index: StmtIndex; + if let Some(producer_call) = producer_call { + crate::log!( + trace, + "use call `{}` to produce the arg ", + producer_call.fg.f_name + ); + let mut producer_call = producer_call; + producer_call.ident = arg_ident.to_string(); + let _tmp = crate::ReuseStmtGuard::temp_disable(); + let call_index = program.insert_or_append_stmt(producer_call)?; + let _ = program + .insert_or_append_stmt(AssertStmt::assert_non_null(call_index.use_index())); + let mut ptr_load = LoadStmt::generate_null(type_name, arg_ident)?; + let mut loc = Location::stmt(call_index); + loc.fields.push(FieldKey::Pointer); + ptr_load.state.get_pointer_mut()?.pointer_location = loc; + producer_index = program.insert_or_append_stmt(ptr_load)?; + } else { + self.set_ith_call_arg(program, index, depth)?; + producer_index = self.args[index].use_index(); + } + + // insert the rest relative call into program. + while !call_vector.is_empty() { + let (relative_call, relative_index) = call_vector.pop().unwrap(); + if relative_call.is_none() { + continue; + } + let mut relative_call = relative_call.unwrap(); + log!( + trace, + "insert relative call `{}` in slice", + relative_call.fg.f_name + ); + if relative_index.is_none() { + return Err(eyre::eyre!( + "Invalid relateive_index `{}`-ith of `{}`", + "None", + relative_call.fg.f_name + )); + } + relative_call.set_ith_arg_for_relative_call( + program, + relative_index.unwrap(), + producer_index.clone(), + type_name, + )?; + let _ = program.insert_or_append_stmt(relative_call)?; + } + self.set_arg(index, producer_index); + Ok(()) + } + } +} diff --git a/hopper-core/src/test.rs b/hopper-core/src/test.rs new file mode 100644 index 0000000..15fa67d --- /dev/null +++ b/hopper-core/src/test.rs @@ -0,0 +1,266 @@ +//! Types and functions for testing hopper, which can be used as gadgets. +//! - Types : add `Fuzz` derive +//! - Functions: add `fuzz` attribute + +use crate::{runtime::*, ObjGenerate}; +use hopper_derive::{fuzz, Fuzz}; + +pub fn generate_load_stmt(ident: &str, ty: &str) -> LoadStmt { + let mut state = LoadStmt::new_state(ident, ty); + let value = Box::new(T::generate_new( &mut state).unwrap()); + LoadStmt::new(value, state) +} + +pub fn generate_call_stmt(f_name: &str) -> CallStmt { + let fg = global_gadgets::get_instance() + .get_func_gadget(f_name) + .unwrap() + .clone(); + CallStmt::new("test_call".to_string(), f_name.to_string(), fg) +} + +pub type TestBool = u8; + +#[derive(Debug, Clone, Fuzz)] +pub struct TestType { + name: [char; 10], + flag: bool, + price: f32, + p: FuzzMutPointer, + index: i32, +} + +#[derive(Debug, Clone, Fuzz)] +pub struct ArrayWrap { + p: FuzzMutPointer, + len: u32, +} + +#[derive(Debug, Clone, Fuzz)] +pub struct ListNode { + val: i32, + next: FuzzMutPointer, +} + +#[fuzz] +pub fn create_list_node() -> FuzzMutPointer { + let ret = Box::new(ListNode { + val: 1, + next: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + }); + FuzzMutPointer::::new(Box::into_raw(ret)) +} + +#[derive(Debug, Clone, Fuzz)] +pub struct ListNode2 { + val: i32, + f1: FuzzMutPointer, + f2: FuzzMutPointer, + f3: FuzzMutPointer, +} + +#[fuzz] +pub fn list_node() -> ListNode { + ListNode { + val: 1, + next: FuzzMutPointer::null(&mut ObjectState::root("", "FuzzMutPointer")), + } +} + +#[fuzz] +pub fn list_node2() -> ListNode2 { + ListNode2 { + val: 1, + f1: FuzzMutPointer::null(&mut ObjectState::root("", "FuzzMutPointer")), + f2: FuzzMutPointer::null(&mut ObjectState::root("", "FuzzMutPointer")), + f3: FuzzMutPointer::null(&mut ObjectState::root("", "FuzzMutPointer")), + } +} + +#[fuzz] +pub fn visi_list_node(_first: ListNode) {} + +#[fuzz] +pub fn visit_reference_circle(_first: FuzzMutPointer) {} + +#[fuzz] +pub fn create_u8_ptr() -> FuzzMutPointer { + let mut arr = vec![0_u8; 8]; + let ptr = arr.as_mut_ptr(); + println!("u8 ptr: {ptr:?}"); + std::mem::forget(arr); + FuzzMutPointer::::new(ptr) +} + +#[fuzz] +pub fn create_test_ptr() -> FuzzMutPointer { + let test = Box::new(TestType { + name: ['a'; 10], + flag: false, + price: 0.2, + p: create_u8_ptr(), + index: 0, + }); + // std::mem::forget(test); + FuzzMutPointer::::new(Box::into_raw(test)) +} + +#[fuzz] +pub fn func_add(a: u8, b: u8) -> u8 { + a.saturating_add(b) +} + +#[fuzz] +fn func_create(_p: FuzzConstPointer) -> [u8; 10] { + [0; 10] +} + +#[fuzz] +pub fn func_use(_m: [u8; 10]) {} + +#[fuzz] +pub fn func_struct(test: TestType) -> bool { + test.flag && test.index == 123 +} + +#[fuzz] +fn test_arr(ptr: FuzzMutPointer, len: u32) -> bool { + if ptr.get_inner().is_null() { + return false; + } + for i in 0..len { + println!("ptr-{}: {:?}", i, unsafe { + ptr.get_inner().add(i as usize) + }) + } + true +} + +#[fuzz] +fn test_mutate_arr(ptr: FuzzMutPointer>, len: FuzzMutPointer) -> bool { + println!("ptr: {ptr:?}, len: {len:?}"); + true +} + +#[fuzz] +fn test_use_array_wrap(_warp: ArrayWrap) {} + +#[fuzz] +fn test_index(test: TestType) -> bool { + if test.p.get_inner().is_null() { + return false; + } + println!("ptr-{}: {:?}", test.index, unsafe { + test.p.get_inner().add(test.index as usize) + }); + true +} + +#[fuzz] +pub fn test_do_nothing1() {} + +#[fuzz] +pub fn test_do_nothing2() {} + +#[fuzz] +fn test_fn_pointer(fn_ptr: Option) { + println!("fn_ptr: {fn_ptr:?}"); +} + +#[fuzz] +fn test_bool(_b: TestBool) {} + +#[fuzz] +pub fn reference_circle_1() -> FuzzMutPointer { + let first = Box::new(ListNode { + val: 1, + next: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + }); + let first_raw = Box::into_raw(first); + let first_ptr = FuzzMutPointer::::new(first_raw); + let second = Box::new(ListNode { + val: 1, + next: first_ptr, + }); + unsafe { + (*first_ptr.get_inner()).next = FuzzMutPointer::::new(Box::into_raw(second)); + } + first_ptr +} + +#[fuzz] +pub fn reference_circle_2() -> FuzzMutPointer { + let first = Box::new(ListNode2 { + val: 1, + f1: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f2: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f3: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + }); + let first_raw = Box::into_raw(first); + println!("first_raw: {:p}", &first_raw); + let first_ptr = FuzzMutPointer::::new(first_raw); + let second = Box::new(ListNode2 { + val: 2, + f1: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f2: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f3: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + }); + let second_raw = Box::into_raw(second); + println!("second_raw: {:p}", &second_raw); + unsafe { + (*first_ptr.get_inner()).f1 = FuzzMutPointer::::new(second_raw); + (*first_ptr.get_inner()).f2 = FuzzMutPointer::::new(second_raw); + } + first_ptr +} + +#[fuzz] +pub fn reference_circle_3() -> FuzzMutPointer { + let first = Box::new(ListNode2 { + val: 1, + f1: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f2: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f3: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + }); + let first_raw = Box::into_raw(first); + println!("first_raw: {:p}", &first_raw); + let first_ptr = FuzzMutPointer::::new(first_raw); + let second = Box::new(ListNode2 { + val: 2, + f1: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f2: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + f3: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + }); + let second_raw = Box::into_raw(second); + println!("second_raw: {:p}", &second_raw); + let second_ptr = FuzzMutPointer::::new(second_raw); + unsafe { + (*first_ptr.get_inner()).f1 = FuzzMutPointer::::new(second_raw); + (*second_ptr.get_inner()).f2 = FuzzMutPointer::::new(first_raw); + } + first_ptr +} + +#[fuzz] +pub fn reference_circle_4() -> FuzzMutPointer { + let first = Box::new(ListNode { + val: 1, + next: FuzzMutPointer::null(&mut ObjectState::root("prev", "FuzzMutPointer")), + }); + let first_raw = Box::into_raw(first); + let first_ptr = FuzzMutPointer::::new(first_raw); + unsafe { + (*first_ptr.get_inner()).next = first_ptr; + } + first_ptr +} + +#[fuzz] +pub fn test_one(k: usize) -> usize { + k +} + +#[fuzz] +pub fn test_non_zero(k: usize) -> usize { + k +} \ No newline at end of file diff --git a/hopper-core/src/utils.rs b/hopper-core/src/utils.rs new file mode 100644 index 0000000..3bcf123 --- /dev/null +++ b/hopper-core/src/utils.rs @@ -0,0 +1,471 @@ +//! Common functions. + +use findshlibs::{SharedLibrary, TargetSharedLibrary}; +use once_cell::sync::OnceCell; +use std::fs::File; +use std::io::{prelude::*, BufReader}; +use std::{path::Path, time}; + +use crate::{global_gadgets, Deserialize, Deserializer, FuzzProgram, Serialize}; + +#[macro_export] +macro_rules! log_trace { ($($arg:expr),*) => { $crate::log!(trace, $($arg),*) }; } +#[macro_export] +macro_rules! log_debug { ($($arg:expr),*) => { $crate::log!(debug, $($arg),*) }; } +#[macro_export] +macro_rules! log_info { ($($arg:expr),*) => { $crate::log!(info, $($arg),*) }; } +#[macro_export] +macro_rules! log_warn { ($($arg:expr),*) => { $crate::log!(warn, $($arg),*) }; } +#[macro_export] +macro_rules! log_error { ($($arg:expr),*) => { $crate::log!(error, $($arg),*) }; } + +#[cfg(not(test))] +#[macro_export] +macro_rules! log { + (trace, $($arg:expr),*) => { log::trace!($($arg),*) }; + (debug, $($arg:expr),*) => { log::debug!($($arg),*) }; + (info, $($arg:expr),*) => { log::info!($($arg),*) }; + (warn, $($arg:expr),*) => { log::warn!($($arg),*) }; + (error, $($arg:expr),*) => { log::error!($($arg),*) }; +} + +#[cfg(not(test))] +pub static mut LOG_COND: bool = true; + +#[cfg(not(test))] +#[macro_export] +macro_rules! log_c { + (trace, $($arg:expr),*) => { if unsafe { $crate::utils::LOG_COND } { log::trace!($($arg),*) } else { println!($($arg),*) } }; + (debug, $($arg:expr),*) => { if unsafe { $crate::utils::LOG_COND } { log::debug!($($arg),*) } else { println!($($arg),*) } }; + (info, $($arg:expr),*) => { if unsafe { $crate::utils::LOG_COND } { log::info!($($arg),*) } else { println!($($arg),*) } }; + (warn, $($arg:expr),*) => { if unsafe { $crate::utils::LOG_COND } { log::warn!($($arg),*) } else { println!($($arg),*) } }; + (error, $($arg:expr),*) => { if unsafe { $crate::utils::LOG_COND } { log::error!($($arg),*) } else { println!($($arg),*) } }; +} + +#[cfg(test)] +#[macro_export] +macro_rules! log { + (trace, $($arg:expr),*) => { println!($($arg),*) }; + (debug, $($arg:expr),*) => { println!($($arg),*) }; + (info, $($arg:expr),*) => { println!($($arg),*) }; + (warn, $($arg:expr),*) => { println!($($arg),*) }; + (error, $($arg:expr),*) => { println!($($arg),*) }; +} + +#[cfg(test)] +#[macro_export] +macro_rules! log_c { + (trace, $($arg:expr),*) => { println!($($arg),*) }; + (debug, $($arg:expr),*) => { println!($($arg),*) }; + (info, $($arg:expr),*) => { println!($($arg),*) }; + (warn, $($arg:expr),*) => { println!($($arg),*) }; + (error, $($arg:expr),*) => { println!($($arg),*) }; +} + +#[inline] +pub fn format_time(secs: u64) -> String { + let mut s = secs; + let mut m = s / 60; + let h = m / 60; + s %= 60; + m %= 60; + format!("[{h:02}:{m:02}:{s:02}]") +} + +#[inline] +pub fn format_count(c: usize) -> String { + if c > 1000000000 { + let f = c / 10000000; + format!("{:.2}b", f as f32 / 100.0) + } else if c > 1000000 { + let f = c / 10000; + format!("{:.2}m", f as f32 / 100.0) + } else if c > 10000 { + let f = c / 10; + format!("{:.2}k", f as f32 / 100.0) + } else { + format!("{c}") + } +} + +#[inline] +pub fn calculate_speed(c: usize, d: time::Duration) -> f64 { + let ts = d.as_secs() as f64; + if ts > 0.0 { + c as f64 / ts + } else { + 0.0 + } +} + +#[inline] +// Index or length numbers include values whose types are: u16/i16/u32/i32/u64/i64/usize/isize +pub fn is_index_or_length_number(ty: &str) -> bool { + ty == "u16" + || ty == "i16" + || ty == "u32" + || ty == "i32" + || ty == "u64" + || ty == "i64" + || ty == "usize" + || ty == "isize" +} + +#[inline] +pub fn is_primitive_type(type_name: &str) -> bool { + matches!( + type_name, + "()" | "u8" + | "i8" + | "u16" + | "i16" + | "u32" + | "i32" + | "u64" + | "i64" + | "f32" + | "f64" + | "char" + | "bool" + | "RetVoid" + ) +} + +#[inline] +pub fn is_custom_type(type_name: &str) -> bool { + if cfg!(test) { + type_name.starts_with("hopper::test") + } else { + type_name.starts_with("hopper_harness") + } +} + +#[inline] +pub fn is_option_type(type_name: &str) -> bool { + type_name.starts_with("core::option") +} + +#[inline] +pub fn is_pointer_type(type_name: &str) -> bool { + is_mut_pointer_type(type_name) || is_const_pointer_type(type_name) +} + +#[inline] +pub fn is_mut_pointer_type(type_name: &str) -> bool { + type_name.starts_with("hopper::runtime::FuzzMutPointer") +} +#[inline] +pub fn is_const_pointer_type(type_name: &str) -> bool { + type_name.starts_with("hopper::runtime::FuzzConstPointer") +} + +#[inline] +pub fn is_variadic_function(type_names: &[&str]) -> bool { + if let Some(p) = type_names.last() { + return *p == "..."; + } + false +} + +#[inline] +pub fn is_vec_type(type_name: &str) -> bool { + type_name.starts_with("alloc::vec::Vec") +} + +#[inline] +pub fn is_byte(type_name: &str) -> bool { + type_name == "i8" || type_name == "u8" || type_name == "char" +} + +#[inline] +pub fn is_c_str_type(type_name: &str) -> (bool, bool) { + let mut is_buf = false; + let mut is_mut = false; + if type_name == "hopper::runtime::FuzzMutPointer" { + is_buf = true; + is_mut = true; + } else if type_name == "hopper::runtime::FuzzConstPointer" { + is_buf = true; + } + (is_buf, is_mut) +} + +#[inline] +pub fn is_buffer_pointer(ty: &str) -> bool { + if let Some(inner) = get_pointer_inner(ty) { + return is_byte(inner); + } + false +} + +#[inline] +pub fn is_void_type(type_name: &str) -> bool { + type_name == "hopper::runtime::FuzzVoid" +} + +#[inline] +pub fn is_void_pointer(type_name: &str) -> bool { + if let Some(inner) = get_pointer_inner(type_name) { + return is_void_type(inner); + } + false +} + +#[inline] +pub fn is_opaque_type(type_name: &str) -> bool { + crate::global_gadgets::get_instance() + .opaque_types + .contains(type_name) +} + +#[inline] +pub fn get_pointer_inner(type_name: &str) -> Option<&'_ str> { + if let Some(t) = type_name.strip_prefix("hopper::runtime::") { + if let Some(t) = t.strip_prefix("FuzzConstPointer<") { + return t.strip_suffix('>'); + } + if let Some(t) = t.strip_prefix("FuzzMutPointer<") { + return t.strip_suffix('>'); + } + } + None +} + +#[inline] +pub fn is_opaque_pointer(type_name: &str) -> bool { + if let Some(inner) = get_pointer_inner(type_name) { + return is_opaque_type(inner); + } + // we may put alias name for pointers in opaque_types in gadgets + is_opaque_type(type_name) +} + +#[inline] +pub fn get_vec_inner(type_name: &str) -> Option<&'_ str> { + if let Some(t) = type_name.strip_prefix("alloc::vec::Vec<") { + return t.strip_suffix('>'); + } + None +} + +#[inline] +pub fn is_opaque_vec(type_name: &str) -> bool { + if let Some(inner) = get_vec_inner(type_name) { + return is_opaque_type(inner); + } + false +} + +#[inline] +pub fn const_pointer_type(type_name: &str) -> String { + format!("hopper::runtime::FuzzConstPointer<{type_name}>") +} + +#[inline] +pub fn mut_pointer_type(type_name: &str) -> String { + format!("hopper::runtime::FuzzMutPointer<{type_name}>") +} + +#[inline] +pub fn pointer_type(type_name: &str, is_mut: bool) -> String { + if is_mut { + mut_pointer_type(type_name) + } else { + const_pointer_type(type_name) + } +} + +#[inline] +pub fn is_same_type(l_ty: &str, r_ty: &str) -> bool { + l_ty == r_ty + || get_pointer_inner(l_ty) + .zip(get_pointer_inner(r_ty)) + .map_or(false, |(l_inner, r_inner)| l_inner == r_inner) +} + +#[inline] +pub fn get_static_ty(type_name: &str) -> &'static str { + let gadgets = global_gadgets::get_mut_instance(); + if gadgets.ty_strings.is_empty() { + gadgets.init_ty_strings(); + } + + if !gadgets.ty_strings.contains(type_name) { + gadgets.ty_strings.insert(type_name.to_string()); + } + gadgets.ty_strings.get(type_name).unwrap() +} + +#[inline] +pub fn vec_type(type_name: &str) -> String { + format!("alloc::vec::Vec<{type_name}>") +} + +use std::hash::{Hash, Hasher}; + +pub fn hash_buf(buf: &[u8]) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + buf.hash(&mut hasher); + hasher.finish() +} + +#[derive(Default)] +pub struct TimeUsage { + total: u128, + num: u64, +} + +impl TimeUsage { + pub fn count(&mut self) -> TimeCounter { + TimeCounter { + t: time::Instant::now(), + usage: self, + } + } + + pub fn add_time(&mut self, t: &time::Instant) { + self.total += t.elapsed().as_micros(); + self.num += 1; + } + + pub fn format(&self) -> String { + format_time((self.total / 1_000_000) as u64) + } + + pub fn percent(&self, all_secs: u64) -> String { + let per = if all_secs == 0 { + 0 + } else { + self.total / 10_000 / all_secs as u128 + }; + format!("{per}%") + } + + pub fn avg_micro(&self) -> f64 { + if self.num == 0 { + 0.0 + } else { + (self.total / self.num as u128) as f64 + } + } + + pub fn avg_ms(&self) -> String { + let avg = self.avg_micro() / 1000.0; + format!("{avg:.2}ms") + } +} + +pub struct TimeCounter<'a> { + pub t: time::Instant, + pub usage: &'a mut TimeUsage, +} + +impl<'a> Drop for TimeCounter<'a> { + fn drop(&mut self) { + self.usage.add_time(&self.t); + } +} + +pub fn is_in_shlib(ptr: *const u8) -> bool { + static BOUNDARY: OnceCell<(usize, usize)> = OnceCell::new(); + let (start, end) = BOUNDARY.get_or_init(|| { + let mut path = std::env::current_exe().unwrap(); + path.pop(); + path.pop(); + let dir = path.to_str().unwrap(); + let mut start = 0; + let mut end = 0; + TargetSharedLibrary::each(|shlib| { + let name = shlib.name().to_string_lossy(); + if name.starts_with(dir) && !name.ends_with("hopper-harness") { + start = shlib.actual_load_addr().into(); + end = start + shlib.len(); + } + }); + (start, end) + }); + let ptr = ptr as usize; + ptr > *start && ptr < *end +} + +#[inline] +pub fn is_unwritable(ptr: *const u8) -> bool { + if let Ok(r) = region::query(ptr) { + !r.is_writable() + } else { + false + } +} + +pub struct FileAppender { + pub fd: File, +} + +impl FileAppender { + pub fn create>(path: T) -> eyre::Result { + let fd = File::create(path)?; + Ok(Self { fd }) + } + + pub fn open>(path: T) -> eyre::Result { + let fd = std::fs::OpenOptions::new().append(true).open(path)?; + Ok(Self { fd }) + } + + pub fn append(&mut self, item: &T) -> eyre::Result<()> { + let buf = item.serialize()?; + self.fd.write_all(buf.as_bytes())?; + writeln!(self.fd)?; + Ok(()) + } + + pub fn append_list(&mut self, list: &[T]) -> eyre::Result<()> { + for item in list { + self.append(item)?; + } + Ok(()) + } +} + +pub fn read_list_from_file, T: Deserialize>(path: R) -> eyre::Result> { + let mut list = vec![]; + let p: &Path = path.as_ref(); + if !p.exists() { + crate::log!(warn, "file {:?} does not exist!", p); + return Ok(list); + } + let file = File::open(p)?; + let reader = BufReader::new(&file); + for line in reader.lines() { + let line = line?; + let mut de = Deserializer::new(&line, None); + let item = T::deserialize(&mut de)?; + list.push(item); + } + Ok(list) +} + +pub fn read_list_with_program_from_file, T: Deserialize>( + path: R, + program: &mut FuzzProgram, +) -> eyre::Result> { + let mut list = vec![]; + let p: &Path = path.as_ref(); + if !p.exists() { + crate::log!(warn, "file {:?} does not exist!", p); + return Ok(list); + } + let file = File::open(p)?; + let reader = BufReader::new(&file); + for line in reader.lines() { + if let Err(e) = line { + crate::log!(warn, "fail to parse review result ({p:?}): {e:?}"); + break; + } + let line = line?; + let mut de = Deserializer::new(&line, Some(program)); + let item = T::deserialize(&mut de)?; + list.push(item); + } + Ok(list) +} diff --git a/hopper-derive-impl/Cargo.toml b/hopper-derive-impl/Cargo.toml new file mode 100644 index 0000000..0195722 --- /dev/null +++ b/hopper-derive-impl/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "hopper-derive-impl" +version = "1.0.0" +edition = "2021" +authors = ["Peng Chen "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +syn = {version = "1.0", features = ["full", "visit", "fold"]} +quote = "1.0" +proc-macro2 = "1" + +[features] +default = ["ctor_hook"] +ctor_hook = [] +link_hook = [] +use_crate = [] \ No newline at end of file diff --git a/hopper-derive-impl/src/field.rs b/hopper-derive-impl/src/field.rs new file mode 100644 index 0000000..38e1961 --- /dev/null +++ b/hopper-derive-impl/src/field.rs @@ -0,0 +1,565 @@ +//! Fields + +use proc_macro2::{Span, TokenStream}; +use syn::punctuated::Punctuated; +use syn::{Token, Type}; + +use crate::my_quote; + +pub struct FieldExt<'a> { + pub ty: &'a syn::Type, + pub ident: syn::Ident, + pub named: bool, +} + +impl<'a> FieldExt<'a> { + pub fn new(field: &'a syn::Field, idx: usize, named: bool) -> FieldExt<'a> { + FieldExt { + ty: &field.ty, + ident: if named { + field.ident.clone().unwrap() + } else { + syn::Ident::new(&format!("f{idx}"), proc_macro2::Span::call_site()) + }, + named, + } + } + + pub fn is_phantom_data(&self) -> bool { + match *self.ty { + syn::Type::Path(syn::TypePath { + qself: None, + ref path, + }) => path + .segments + .last() + .map(|x| x.ident == "PhantomData") + .unwrap_or_else(|| false), + _ => false, + } + } + + pub fn init_phantom(&self, field_name: &str, ty: &Type) -> TokenStream { + let ph = if cfg!(feature = "std") { + my_quote!(::std::marker::PhantomData) + } else { + my_quote!(::core::marker::PhantomData) + }; + my_quote!({ + let state = state.add_child(#field_name, std::any::type_name::<#ty>()).last_child()?; + state.done_deterministic(); + #ph + }) + } + + pub fn get_field_value(&self, use_self: bool) -> TokenStream { + let ident = &self.ident; + if use_self { + if self.named { + my_quote!(self.#ident) + } else { + my_quote!(self.0) + } + } else { + my_quote!(#ident) + } + } + + pub fn is_opaque(&self) -> bool { + self.named && self.ident.to_string().starts_with('_') + } +} + +pub fn convert_fields(fields: &syn::Fields) -> (Vec, bool, bool) { + let (fields, named) = match *fields { + syn::Fields::Named(ref fields) => (Some(&fields.named), true), + syn::Fields::Unit => (None, false), + syn::Fields::Unnamed(ref fields) => (Some(&fields.unnamed), false), + }; + let unit = fields.is_none(); + let fields = convert_field_list(fields, named); + (fields, named, unit) +} + +pub fn convert_field_list( + fields: Option<&Punctuated>, + named: bool, +) -> Vec { + if let Some(fields) = fields { + fields + .iter() + .enumerate() + .map(|(i, f)| FieldExt::new(f, i, named)) + .collect() + } else { + vec![] + } +} + +pub fn list_field_keys(fields: &[FieldExt], named: bool) -> TokenStream { + let fields = fields.iter().map(|f| &f.ident); + let fields = my_quote![ #(#fields),* ]; + if named { + my_quote!({ #fields }) + } else { + my_quote!(( #fields )) + } +} + +pub fn struct_object_gen_body(fields: &[FieldExt], unit: bool, named: bool) -> TokenStream { + let inits = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let ty = f.ty; + let init = if f.is_phantom_data() { + f.init_phantom(&field_name, ty) + } else { + my_quote!(<#ty>::generate_new(state.add_child(#field_name, std::any::type_name::<#ty>()).last_child_mut()?)?) + }; + if f.named { + my_quote!(#field: #init) + } else { + my_quote!(#init) + } + }); + if unit { + my_quote!() + } else if named { + my_quote![{ #(#inits),* }] + } else { + // unnamed + my_quote![( #(#inits),* )] + } +} + +pub fn union_object_gen_body(fields: &[FieldExt]) -> TokenStream { + let gens = fields + .iter() + .enumerate() + .map(|(i, f)| { + let field = &f.ident; + let field_name = field.to_string(); + let ty = f.ty; + let index = syn::LitInt::new(&i.to_string(), Span::call_site()); + my_quote!(#index => Self { #field: <#ty>::generate_new(state.add_child(#field_name, std::any::type_name::<#ty>()).last_child_mut()?)? }) + }); + let crate_path = super::get_crate_path(); + let items = my_quote![ #(#gens),* ]; + let field_size = fields.len(); + let size_ident = syn::LitInt::new(&field_size.to_string(), Span::call_site()); + my_quote!( + match #crate_path::gen_range(0..#size_ident) { + #items, + _ => { unreachable!() }, + } + ) +} + +pub fn struct_object_layout_body(fields: &[FieldExt], unit: bool) -> TokenStream { + let fields = fields.iter().map(|f| { + if f.is_phantom_data() { + return my_quote!(); + } + let field = &f.ident; + let field_name = field.to_string(); + let value = f.get_field_value(true); + my_quote!(layout.add_field(#field_name, #value.get_layout(fold_ptr));) + }); + if unit { + my_quote!() + } else { + my_quote![ #(#fields)* ] + } +} + +pub fn struct_object_mutate_body(fields: &[FieldExt], unit: bool, is_det: bool) -> TokenStream { + if unit { + return my_quote!(); + } + let mutates = fields.iter().enumerate().map(|(i, f)| { + let field = &f.ident; + let field_name = field.to_string(); + let index = syn::LitInt::new(&i.to_string(), Span::call_site()); + let value = f.get_field_value(true); + if f.is_phantom_data() { + my_quote!(#index => {}) + } else if is_det { + my_quote!(#index => { #value.det_mutate(state.get_child_mut(#field_name)?) }) + } else { + my_quote!(#index => { #value.mutate(state.get_child_mut(#field_name)?) }) + } + }); + let crate_path = super::get_crate_path(); + let items = my_quote![ #(#mutates),* ]; + let choose_index = if is_det { + my_quote!(state.get_deterministic_child_position()) + } else { + my_quote!(#crate_path::choose_weighted_by_state(state)) + }; + my_quote!( + if let Some(index) = #choose_index { + match index { + #items, + _ => { unreachable!() }, + } + } else { + Ok(#crate_path::MutateOperator::nop()) + } + ) +} + +pub fn union_object_mutate_body(fields: &[FieldExt]) -> TokenStream { + let mutates = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let value = f.get_field_value(true); + // state is chosen outside + my_quote!(#field_name => { #value.mutate(state) }) + }); + let items = my_quote![ #(#mutates),* ]; + let crate_path = super::get_crate_path(); + my_quote!( + if #crate_path::unlikely() { + use #crate_path::ObjGenerate; + state.clear(); + let rng_state = #crate_path::save_rng_state(); + *self = Self::generate_new(state)?; + return Ok(state.as_mutate_operator(#crate_path::MutateOperation::UnionNew { rng_state } )); + } + unsafe { + let state = state.last_child_mut()?; + let key = state.key.as_str()?; + match key { + #items, + _ => { unreachable!() }, + } + } + ) +} + +pub fn struct_object_mutate_op_body(fields: &[FieldExt], unit: bool) -> TokenStream { + if unit { + return my_quote!(Ok(self as *mut Self as *mut u8)); + } + let raws = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let value = f.get_field_value(true); + my_quote!(#field_name => {return #value.mutate_by_op( state.get_child_mut(#field_name)?, &keys[1..], op);}) + }); + let items = my_quote![ #(#raws),* ]; + my_quote!( + if keys.is_empty() { + eyre::bail!(format!("keys: {:?} and op {:?} does not works in struct", keys, op)); + } + let key = keys[0].as_str()?; + match key { + #items, + _ => { unreachable!("key {} not found", key) }, + } + ) +} + +pub fn struct_object_opaque_body(fields: &[FieldExt]) -> TokenStream { + let mut check_opaque = fields.iter().map(|f| { + let ty = f.ty; + my_quote!(<#ty>::is_opaque()) + }); + let mut ret = if let Some(first) = check_opaque.next() { + first + } else { + my_quote!() + }; + for item in check_opaque { + ret = my_quote!(#ret || #item); + } + ret +} + +pub fn struct_object_ptr_body(fields: &[FieldExt], unit: bool) -> TokenStream { + if unit { + return my_quote!(Ok(self as *mut Self as *mut u8)); + } + let raws = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let value = f.get_field_value(true); + my_quote!(#field_name => #value.get_ptr_by_keys(&keys[1..])) + }); + let items = my_quote![ #(#raws),* ]; + my_quote!( + if keys.is_empty() { + return Ok(self as *const Self as *mut Self as *mut u8); + } + let key = keys[0].as_str()?; + match key { + #items, + _ => { unreachable!() }, + } + ) +} + +pub fn struct_object_serialize_body(fields: &[FieldExt], use_self: bool) -> TokenStream { + let buf_ident = syn::Ident::new("buf", Span::call_site()); + let sers = fields.iter().map(|f| { + if f.is_phantom_data() { + return my_quote!(); + } + let field = &f.ident; + let field_name = field.to_string(); + let value = f.get_field_value(use_self); + let header = format!("{field}: "); + my_quote!( + #buf_ident.push_str(#header); + #buf_ident.push_str(&#value.serialize_obj(state.get_child(#field_name)?)?); + #buf_ident.push_str(", "); + ) + }); + my_quote!( #(#sers)* ) +} + +pub fn struct_object_translate_body(fields: &[FieldExt], use_self: bool) -> TokenStream { + let buf_ident = syn::Ident::new("buf", Span::call_site()); + let sers = fields.iter().map(|f| { + if f.is_phantom_data() { + return my_quote!(); + } + let field = &f.ident; + let field_name = field.to_string(); + let value = f.get_field_value(use_self); + my_quote!( + #buf_ident.push_str(&#value.translate_obj_to_c(state.get_child(#field_name)?, program)?); + #buf_ident.push_str(", "); + ) + }); + my_quote!( #(#sers)* ) +} + +pub fn struct_serialize_body(fields: &[FieldExt], use_self: bool) -> TokenStream { + let buf_ident = syn::Ident::new("buf", Span::call_site()); + let sers = fields.iter().map(|f| { + if f.is_phantom_data() { + return my_quote!(); + } + let field = &f.ident; + let header = format!("{field}: "); + let value = f.get_field_value(use_self); + my_quote!( + #buf_ident.push_str(#header); + #buf_ident.push_str(&#value.serialize()?); + #buf_ident.push_str(", "); + ) + }); + my_quote!( #(#sers)* ) +} + +pub fn union_object_serialize_body(fields: &[FieldExt], use_self: bool) -> TokenStream { + let buf_ident = syn::Ident::new("buf", Span::call_site()); + let sers = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let header = format!("{field}: "); + let value = f.get_field_value(use_self); + // state is chosen outside + my_quote!(#field_name => { + #buf_ident.push_str(#header); + #buf_ident.push_str(&#value.serialize_obj(state)?) + }) + }); + let items = my_quote![ #(#sers),* ]; + my_quote!( + unsafe { + let state = state.last_child()?; + let key = state.key.as_str()?; + match key { + #items, + _ => { unreachable!() }, + } + } + ) +} + +pub fn union_object_translate_body(fields: &[FieldExt], use_self: bool) -> TokenStream { + let buf_ident = syn::Ident::new("buf", Span::call_site()); + let sers = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let value = f.get_field_value(use_self); + // state is chosen outside + my_quote!(#field_name => { + #buf_ident.push_str(&#value.translate_obj_to_c(state, program)?) + }) + }); + let items = my_quote![ #(#sers),* ]; + my_quote!( + unsafe { + let state = state.last_child()?; + let key = state.key.as_str()?; + match key { + #items, + _ => { unreachable!() }, + } + } + ) +} + +pub fn union_serialize_body(fields: &[FieldExt], use_self: bool) -> TokenStream { + let buf_ident = syn::Ident::new("buf", Span::call_site()); + // FIXME: only serialize as first type + let item_first = if let Some(f) = fields.first() { + let field = &f.ident; + let header = format!("{field}: "); + let value = f.get_field_value(use_self); + my_quote!( + #buf_ident.push_str(#header); + #buf_ident.push_str(&#value.serialize()?) + ) + } else { + my_quote!() + }; + my_quote!( + unsafe { #item_first } + ) +} + +pub fn struct_deserialize_body(fields: &[FieldExt], unit: bool, named: bool) -> TokenStream { + if unit { + return my_quote!(); + } + let de_ident = syn::Ident::new("de", Span::call_site()); + let desers = fields.iter().map(|f| { + let field = &f.ident; + let ty = f.ty; + let header = format!("{field}:"); + let init = if f.is_phantom_data() { + if cfg!(feature = "std") { + my_quote!(::std::marker::PhantomData) + } else { + my_quote!(::core::marker::PhantomData) + } + } else { + my_quote!({ + #de_ident.eat_token(#header)?; + let val = <#ty>::deserialize(#de_ident)?; + #de_ident.eat_token(", ")?; + val + }) + }; + if f.named { + my_quote!(#field: #init) + } else { + my_quote!(#init) + } + }); + if named { + my_quote![{ #(#desers),* }] + } else { + my_quote![( #(#desers),* )] + } +} + +pub fn struct_object_deserialize_body(fields: &[FieldExt], unit: bool, named: bool) -> TokenStream { + if unit { + return my_quote!(); + } + let de_ident = syn::Ident::new("de", Span::call_site()); + let desers = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let ty = f.ty; + let header = format!("{field}:"); + let init = if f.is_phantom_data() { + f.init_phantom(&field_name, ty) + } else { + my_quote!({ + #de_ident.eat_token(#header)?; + let val = <#ty>::deserialize_obj(#de_ident, state.add_child(#field_name, std::any::type_name::<#ty>()).last_child_mut()?)?; + #de_ident.eat_token(", ")?; + val + }) + }; + if f.named { + my_quote!(#field: #init) + } else { + my_quote!(#init) + } + }); + if named { + my_quote![{ #(#desers),* }] + } else { + my_quote![( #(#desers),* )] + } +} + +pub fn union_object_deserialize_body(fields: &[FieldExt]) -> TokenStream { + let de_ident = syn::Ident::new("de", Span::call_site()); + let sers = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let ty = &f.ty; + my_quote!(#field_name => { + Self { #field: <#ty>::deserialize_obj(#de_ident, state.add_child(#field_name, std::any::type_name::<#ty>()).last_child_mut()?)? } + }) + }); + let items = my_quote![ #(#sers),* ]; + my_quote!( + { + let field = #de_ident.next_token_until(":")?; + match field { + #items + _ => { unreachable!() }, + } + }) +} + +pub fn union_deserialize_body(fields: &[FieldExt]) -> TokenStream { + let de_ident = syn::Ident::new("de", Span::call_site()); + let sers = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let ty = &f.ty; + my_quote!(#field_name => { + Self { #field: <#ty>::deserialize(#de_ident)? } + }) + }); + let items = my_quote![ #(#sers),* ]; + my_quote!( + { + let field = #de_ident.next_token_until(":")?; + match field { + #items + _ => { unreachable!() }, + } + }) +} + +pub fn union_object_use_member(fields: &[FieldExt]) -> TokenStream { + let use_member = fields.iter().map(|f| { + let field = &f.ident; + let field_name = field.to_string(); + let ty = &f.ty; + let value = f.get_field_value(true); + let crate_path = super::get_crate_path(); + // state should be revised + my_quote!(#field_name => { + let member_state = state.last_child_mut()?; + if let hopper::FieldKey::Field(f) = &member_state.key { + if member == f { + #crate_path::set_refine_suc(false); + return Ok(()); + } else { + state.clear(); + #value = <#ty>::generate_new(state.add_child(#field_name, std::any::type_name::<#ty>()).last_child_mut()?)?; + } + } + }) + }); + + let items = my_quote![ #(#use_member),* ]; + my_quote!( + match member.as_str() { + #items + _ => { unreachable!() }, + } + ) +} diff --git a/hopper-derive-impl/src/folder.rs b/hopper-derive-impl/src/folder.rs new file mode 100644 index 0000000..2d9d323 --- /dev/null +++ b/hopper-derive-impl/src/folder.rs @@ -0,0 +1,142 @@ +use syn::{ + fold::{self, Fold}, + Type, TypeBareFn, +}; + +use crate::my_quote; + +pub struct FuzzFolder { + replace_ptr: bool, +} + +impl Default for FuzzFolder { + fn default() -> Self { + Self { replace_ptr: true } + } +} + +impl Fold for FuzzFolder { + fn fold_type(&mut self, node: Type) -> Type { + if !self.replace_ptr { + return fold::fold_type(self, node); + } + match node { + Type::Ptr(ptr) => { + let crate_path = super::get_crate_path(); + let inner = self.fold_type(*ptr.elem); + if ptr.mutability.is_some() { + Type::Verbatim(my_quote!( + #crate_path::FuzzMutPointer::<#inner> + )) + } else { + Type::Verbatim(my_quote!( + #crate_path::FuzzConstPointer::<#inner> + )) + } + } + Type::Path(ref path) => { + if check_void_type(path) { + let crate_path = super::get_crate_path(); + Type::Verbatim(my_quote!( + #crate_path::FuzzVoid + )) + } else if check_fn_ptr_path(path) { + let crate_path = super::get_crate_path(); + Type::Verbatim(my_quote!( + #crate_path::FuzzFrozenPointer::<#path> + )) + } else if let Some(ty) = check_bitfield_type(path) { + ty + } else { + fold::fold_type(self, node) + } + } + _ => fold::fold_type(self, node), + } + } + + fn fold_item_impl(&mut self, i: syn::ItemImpl) -> syn::ItemImpl { + let ty = &i.self_ty; + let ty = my_quote!(#ty).to_string(); + // ignore + if ty.starts_with("__BindgenBitfieldUnit") { + return i; + } + if ty.starts_with("__IncompleteArrayField") { + println!("cargo:warning=item: {ty:?}, disable replace ptr"); + self.replace_ptr = false; + let ret = fold::fold_item_impl(self, i); + self.replace_ptr = true; + ret + } else { + fold::fold_item_impl(self, i) + } + } +} + +const VOID_PATH: [&str; 4] = ["std", "os", "raw", "c_void"]; + +fn check_void_type(path: &syn::TypePath) -> bool { + let it = path.path.segments.iter(); + it.zip(VOID_PATH.iter()) + .all(|(seg, ident)| seg.ident == ident) +} + +fn check_bitfield_type(path: &syn::TypePath) -> Option { + let mut it = path.path.segments.iter(); + if let Some(seg) = it.next() { + if seg.ident == "__BindgenBitfieldUnit" { + let crate_path = super::get_crate_path(); + let arg = &seg.arguments; + return Some(Type::Verbatim(my_quote!( + #crate_path::HopperBindgenBitfieldUnit::#arg + ))); + } + } + None +} + +fn check_fn_ptr_path(path: &syn::TypePath) -> bool { + if let Some(fn_ty) = get_fn_type_in_option(path) { + // We don't want the number of arguments in a function pointer to exceed MAX_SIG_ARG_LEN either. + if fn_ty.inputs.len() > crate::func_hook::MAX_SIG_ARG_LEN { + return true; + } + } + false +} + +pub fn get_fn_type_in_option(path: &syn::TypePath) -> Option<&TypeBareFn> { + let mut it = path.path.segments.iter(); + let generic_args = it + .next() + .and_then(|seg| { + if seg.ident == "std" || seg.ident == "core" { + it.next() + } else { + None + } + }) + .and_then(|seg| { + if seg.ident == "option" { + it.next() + } else { + None + } + }) + .and_then(|seg| { + if seg.ident == "Option" { + Some(&seg.arguments) + } else { + None + } + }); + if let Some(syn::PathArguments::AngleBracketed(params)) = generic_args { + if let syn::GenericArgument::Type(syn::Type::BareFn(fn_ty)) = + params.args.iter().next().unwrap() + { + return Some(fn_ty); + } + } + None +} diff --git a/hopper-derive-impl/src/format.rs b/hopper-derive-impl/src/format.rs new file mode 100644 index 0000000..78bd43f --- /dev/null +++ b/hopper-derive-impl/src/format.rs @@ -0,0 +1,66 @@ +use std::{ + borrow::Cow, + env, + io::{self, Write}, + path::PathBuf, + process::{Command, Stdio}, +}; + +/// Gets the rustfmt path to rustfmt the generated bindings. +fn rustfmt_path() -> io::Result { + if let Ok(rustfmt) = env::var("RUSTFMT") { + return Ok(rustfmt.into()); + } + // No rustfmt binary was specified, so assume that the binary is called + // "rustfmt" and that it is in the user's PATH. + Ok("rustfmt".into()) +} + +pub fn rustfmt_generated_string(source: &str) -> io::Result> { + let rustfmt = rustfmt_path()?; + let mut cmd = Command::new(rustfmt); + + cmd.stdin(Stdio::piped()).stdout(Stdio::piped()); + + let mut child = cmd.spawn()?; + let mut child_stdin = child.stdin.take().unwrap(); + let mut child_stdout = child.stdout.take().unwrap(); + + let source = source.to_owned(); + + // Write to stdin in a new thread, so that we can read from stdout on this + // thread. This keeps the child from blocking on writing to its stdout which + // might block us from writing to its stdin. + let stdin_handle = ::std::thread::spawn(move || { + let _ = child_stdin.write_all(source.as_bytes()); + source + }); + + let mut output = vec![]; + io::copy(&mut child_stdout, &mut output)?; + + let status = child.wait()?; + let source = stdin_handle.join().expect( + "The thread writing to rustfmt's stdin doesn't do \ + anything that could panic", + ); + + match String::from_utf8(output) { + Ok(bindings) => match status.code() { + Some(0) => Ok(Cow::Owned(bindings)), + Some(2) => Err(io::Error::new( + io::ErrorKind::Other, + "Rustfmt parsing errors.".to_string(), + )), + Some(3) => { + // log::warn!("Rustfmt could not format some lines."); + Ok(Cow::Owned(bindings)) + } + _ => Err(io::Error::new( + io::ErrorKind::Other, + "Internal rustfmt error".to_string(), + )), + }, + _ => Ok(Cow::Owned(source)), + } +} diff --git a/hopper-derive-impl/src/func_hook.rs b/hopper-derive-impl/src/func_hook.rs new file mode 100644 index 0000000..371c21e --- /dev/null +++ b/hopper-derive-impl/src/func_hook.rs @@ -0,0 +1,223 @@ +//! ctor: https://github.com/mmastrac/rust-ctor/blob/master/ctor/src/lib.rs + +use crate::my_quote; +use proc_macro2::{Ident, Span, TokenStream}; +use syn::{parse_quote, spanned::Spanned, ItemStruct, ItemType, Signature}; + +pub const MAX_SIG_ARG_LEN: usize = 12; + +#[cfg(feature = "link_hook")] +pub fn add_func_hook<'ast>( + hook_name: &str, + sigs: &[Signature], + structs: &[&'ast ItemStruct], + alias: &[&'ast ItemType], +) -> TokenStream { + let linkme_ident = Ident::new(&format!("{}___hopper_gadget", hook_name), Span::call_site()); + + let type_gadgets = convert_struct_to_type_gadgets(structs); + let func_gadgets = convert_sig_to_func_gadgets(sigs); + let strcut_extra = add_custom_type_extra_info(structs, alias); + let crate_path = super::get_crate_path(); + my_quote!( + #[::linkme::distributed_slice(#crate_path::link_hook::HOPPER_FN_GADGET_PROVIDERS)] + fn #linkme_ident (gadgets: &mut #crate_path::ProgramGadgets) { + #(#func_gadgets)* + #(#type_gadgets)* + #(#strcut_extra)* + gadgets.build_graph(); + } + ) +} + +#[cfg(feature = "ctor_hook")] +pub fn add_func_hook<'ast>( + hook_name: &str, + sigs: &[Signature], + structs: &[&'ast ItemStruct], + alias: &[&'ast ItemType], +) -> TokenStream { + let ctor_ident = Ident::new( + &format!("{hook_name}___hopper_ctor___ctor"), + Span::call_site(), + ); + + let type_gadgets = convert_struct_to_type_gadgets(structs); + let func_gadgets = convert_sig_to_func_gadgets(sigs); + let strcut_extra = add_custom_type_extra_info(structs, alias); + let crate_path = super::get_crate_path(); + my_quote!( + #[used] + #[allow(non_upper_case_globals)] + #[doc(hidden)] + #[cfg_attr(any(target_os = "linux", target_os = "android"), link_section = ".init_array")] + #[cfg_attr(target_os = "freebsd", link_section = ".init_array")] + #[cfg_attr(target_os = "netbsd", link_section = ".init_array")] + #[cfg_attr(target_os = "openbsd", link_section = ".init_array")] + #[cfg_attr(target_os = "dragonfly", link_section = ".init_array")] + #[cfg_attr(target_os = "illumos", link_section = ".init_array")] + #[cfg_attr(any(target_os = "macos", target_os = "ios"), link_section = "__DATA,__mod_init_func")] + #[cfg_attr(windows, link_section = ".CRT$XCU")] + static #ctor_ident + : + unsafe extern "C" fn() = + { + unsafe extern "C" fn #ctor_ident() { + let gadgets = #crate_path::global_gadgets::get_mut_instance(); + #(#func_gadgets)* + #(#type_gadgets)* + #(#strcut_extra)* + gadgets.build_graph(); + } + #ctor_ident + } + ; + ) +} + +fn convert_struct_to_type_gadgets(structs: &[&'_ ItemStruct]) -> Vec { + structs + .iter() + .map(|&sig| { + let ty = &sig.ident; + my_quote!( + gadgets.add_type_with_pointer::<#ty>(); + ) + }) + .collect() +} + + +fn convert_sig_to_func_gadgets(sigs: &[Signature]) -> Vec { + sigs.iter() + .map(|sig| { + let fn_name = &sig.ident; + // avoid too many args / name starts with '_' / return '!' + if sig.inputs.len() > MAX_SIG_ARG_LEN || fn_name.to_string().starts_with('_') { + return my_quote!(); + } + if let syn::ReturnType::Type(_, ref ty) = sig.output { + if matches!(ty.as_ref(), syn::Type::Never(_)) { + return my_quote!(); + } + } + + match convert_sig_to_fn_type(sig) { + Ok((fn_type, extra_info)) => my_quote!( + gadgets.add_function(stringify!(#fn_name), &(#fn_name as #fn_type), #extra_info); + ), + Err(errors) => errors.iter().map(syn::parse::Error::to_compile_error).collect(), + } + }) + .collect() +} + +fn add_custom_type_extra_info( + structs: &[&'_ ItemStruct], + alias: &[&'_ ItemType], +) -> Vec { + let alias_idents: Vec = alias.iter().map(|item| item.ident.to_string()).collect(); + let mut tokens = vec![]; + for stru in structs { + let stru_ident = stru.ident.to_string(); + let (fields, _named, _unit) = super::field::convert_fields(&stru.fields); + for f in &fields { + let ty = f.ty; + let ty_alias = format_type(ty); + // only add alias type for pointer + if !ty_alias.starts_with("std") + && (alias_idents.contains(&ty_alias) || is_alias_pointer(&ty_alias, &alias_idents)) + { + let mut f_ident = f.ident.to_string(); + f_ident.push_str("@hopper_harness::"); + f_ident.push_str(&stru_ident); + tokens.push(my_quote!( + gadgets.add_field_alias_type::<#ty>(#f_ident, #ty_alias); + )) + } + } + } + tokens +} + +fn is_alias_pointer(type_name: &str, alias_idents: &[String]) -> bool { + if let Some(t) = type_name.strip_prefix("hopper::runtime::") { + if let Some(t) = t.strip_prefix("FuzzConstPointer<") { + let inner = t.strip_suffix('>').unwrap(); + return alias_idents.iter().any(|s| s == inner); + } + if let Some(t) = t.strip_prefix("FuzzMutPointer<") { + let inner = t.strip_suffix('>').unwrap(); + return alias_idents.iter().any(|s| s == inner); + } + } + false +} + +fn convert_sig_to_fn_type( + sig: &Signature, +) -> Result<(syn::TypeBareFn, TokenStream), Vec> { + let mut args: syn::punctuated::Punctuated = + syn::punctuated::Punctuated::new(); + let mut arg_idents = vec![]; + let mut alias_arg_types = vec![]; + let mut alias_ret_type = my_quote!(None); + let mut errors = vec![]; + sig.inputs.iter().for_each(|arg| match *arg { + syn::FnArg::Typed(syn::PatType { + ref pat, ref ty, .. + }) => { + let ident: Ident = parse_quote!(#pat); + arg_idents.push(ident.to_string()); + args.push(parse_quote!(_: #ty)); + let ty_str = format_type(ty); + alias_arg_types.push(my_quote!(#ty_str)); + } + _ => { + errors.push(syn::parse::Error::new( + arg.span(), + "unsupported kind of function argument", + )); + } + }); + + if let syn::ReturnType::Type(_, ref ty) = sig.output { + let ty_str = format_type(ty); + alias_ret_type = my_quote!(Some(#ty_str)); + } + + if !errors.is_empty() { + return Err(errors); + } + + let fn_type = syn::TypeBareFn { + lifetimes: None, + unsafety: sig.unsafety, + abi: sig.abi.clone(), + fn_token: ::default(), + paren_token: syn::token::Paren::default(), + inputs: args, + variadic: sig.variadic.clone(), + output: sig.output.clone(), + }; + let extra_info = my_quote!(&[#(#arg_idents),*], &[#(#alias_arg_types),*], #alias_ret_type); + Ok((fn_type, extra_info)) +} + +pub fn format_type(ty: &syn::Type) -> String { + let mut ty_str = my_quote!(#ty).to_string(); + ty_str.retain(|c| !c.is_whitespace()); + ty_str + .replace("FuzzMutPointer<", "hopper::runtime::FuzzMutPointer<") + .replace("FuzzConstPointer<", "hopper::runtime::FuzzConstPointer<") + .replace( + "::hopper::FuzzMutPointer::", + "hopper::runtime::FuzzMutPointer", + ) + .replace( + "::hopper::FuzzConstPointer::", + "hopper::runtime::FuzzConstPointer", + ) + .replace("::hopper::FuzzVoid", "hopper::runtime::FuzzVoid") + .replace("::std", "std") +} diff --git a/hopper-derive-impl/src/func_sig.rs b/hopper-derive-impl/src/func_sig.rs new file mode 100644 index 0000000..4a2d645 --- /dev/null +++ b/hopper-derive-impl/src/func_sig.rs @@ -0,0 +1,120 @@ +///! Implementation general function for both foreign and internal functions. +///! Ref: https://docs.rs/syn/1.0.76/src/syn/item.rs.html#611-622 +///! WARN: Deprecated!! + +use std::{iter, slice}; + +use proc_macro2::TokenStream; +use quote::{ToTokens, TokenStreamExt}; +use syn::{ + braced, bracketed, + parse::{Parse, ParseStream, Result}, + AttrStyle, Attribute, Block, Path, Signature, Token, Visibility, +}; +pub struct FnDecl { + pub attrs: Vec, + pub vis: Visibility, + pub sig: Signature, + pub block: Option>, + pub semi_token: Option, +} + +impl Parse for FnDecl { + fn parse(input: ParseStream) -> Result { + let mut attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let sig: Signature = input.parse()?; + let lookahead = input.lookahead1(); + let mut block = None; + let mut semi_token = None; + if lookahead.peek(Token![;]) { + // ForeignItemFn + let semi: Token![;] = input.parse()?; + semi_token = Some(semi); + } else if lookahead.peek(syn::token::Brace) { + // ItemFn + let content; + let brace_token = braced!(content in input); + parse_inner(&content, &mut attrs)?; + let stmts = content.call(Block::parse_within)?; + block = Some(Box::new(Block { brace_token, stmts })); + } else { + return Err(lookahead.error()); + }; + + Ok(FnDecl { + attrs, + vis, + sig, + block, + semi_token + }) + } +} + +impl ToTokens for FnDecl { + fn to_tokens(&self, tokens: &mut TokenStream) { + tokens.append_all(self.attrs.outer()); + self.vis.to_tokens(tokens); + self.sig.to_tokens(tokens); + if let Some(blk) = &self.block { + blk.brace_token.surround(tokens, |tokens| { + tokens.append_all(self.attrs.inner()); + tokens.append_all(&blk.stmts); + }); + } + if let Some(semi) = &self.semi_token { + semi.to_tokens(tokens); + } + } +} + +pub fn parse_inner(input: ParseStream, attrs: &mut Vec) -> Result<()> { + while input.peek(Token![#]) && input.peek2(Token![!]) { + attrs.push(input.call(single_parse_inner)?); + } + Ok(()) +} + +pub fn single_parse_inner(input: ParseStream) -> Result { + let content; + Ok(Attribute { + pound_token: input.parse()?, + style: AttrStyle::Inner(input.parse()?), + bracket_token: bracketed!(content in input), + path: content.call(Path::parse_mod_style)?, + tokens: content.parse()?, + }) +} + + +pub trait FilterAttrs<'a> { + type Ret: Iterator; + + fn outer(self) -> Self::Ret; + fn inner(self) -> Self::Ret; +} + +impl<'a> FilterAttrs<'a> for &'a [Attribute] { + type Ret = iter::Filter, fn(&&Attribute) -> bool>; + + fn outer(self) -> Self::Ret { + fn is_outer(attr: &&Attribute) -> bool { + match attr.style { + AttrStyle::Outer => true, + AttrStyle::Inner(_) => false, + } + } + self.iter().filter(is_outer) + } + + fn inner(self) -> Self::Ret { + fn is_inner(attr: &&Attribute) -> bool { + match attr.style { + AttrStyle::Inner(_) => true, + AttrStyle::Outer => false, + } + } + self.iter().filter(is_inner) + } +} diff --git a/hopper-derive-impl/src/lib.rs b/hopper-derive-impl/src/lib.rs new file mode 100644 index 0000000..cce7fa9 --- /dev/null +++ b/hopper-derive-impl/src/lib.rs @@ -0,0 +1,70 @@ +pub mod field; +pub mod folder; +pub mod format; +pub mod func_hook; +pub mod object; +pub mod visitor; +pub mod serde; + +#[macro_use] +extern crate quote; +use proc_macro2::TokenStream; +use syn::{fold::Fold, visit::Visit}; + +pub const ENABLE_SET_FN_POINTER: bool = enable_fn_pointer(); +pub const DEFAULT_FN_POINTER_PREFIX: &str = fn_pointer_name_prefix(); + +#[macro_export] +macro_rules! my_quote { + ($($t:tt)*) => (quote_spanned!(proc_macro2::Span::call_site() => $($t)*)) +} + +pub fn derive_bindings(content: &str) -> TokenStream { + let syntax = syn::parse_file(content).expect("Unable to parse file"); + + let mut folder = folder::FuzzFolder::default(); + let replaced_syntax = folder.fold_file(syntax); + + let mut fuzz_visitor = visitor::FuzzVisitor::default(); + fuzz_visitor.visit_file(&replaced_syntax); + + let callbacks = fuzz_visitor.generate_callbacks(); + + let result = my_quote!( + #callbacks + + #replaced_syntax + + #fuzz_visitor + ); + + result +} + +static mut USE_IN_COMPILER: bool = false; + +pub fn set_compiler_env() { + unsafe { + USE_IN_COMPILER = true; + } +} + +pub fn get_crate_path() -> syn::Path { + if cfg!(feature = "use_crate") && ! unsafe { USE_IN_COMPILER } { + syn::parse_quote!(crate) + } else { + syn::parse_quote!(::hopper) + } +} + +const fn enable_fn_pointer() -> bool { + option_env!("HOPPER_DISABLE_FN_POINTER").is_none() +} + +const fn fn_pointer_name_prefix() -> &'static str { + if let Some(v) = option_env!("HOPPER_FUNCTION_POINTER_PREFIX") { + v + } else { + "GENERATED_hopper_callback_" + } +} \ No newline at end of file diff --git a/hopper-derive-impl/src/object.rs b/hopper-derive-impl/src/object.rs new file mode 100644 index 0000000..5e02505 --- /dev/null +++ b/hopper-derive-impl/src/object.rs @@ -0,0 +1,201 @@ +use crate::field; +use crate::my_quote; +use crate::serde; + +use proc_macro2::TokenStream; +use syn::{punctuated::Punctuated, Token}; + +/// Add implmentation of object's fuzzing trait for struct types +pub fn object_trait_for_struct( + name: &syn::Ident, + generics: &syn::Generics, + attrs: &[syn::Attribute], + fields: &syn::Fields, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let serde = serde::serde_trait_for_struct(name, generics, attrs, fields); + let object_serde = serde::object_serde_trait_for_struct(name, generics, attrs, fields); + let (fields, named, unit) = field::convert_fields(fields); + let gen_body = field::struct_object_gen_body(&fields, unit, named); + let mutate_body = field::struct_object_mutate_body(&fields, unit, false); + let det_mutate_body = field::struct_object_mutate_body(&fields, unit, true); + let mutate_op_body = field::struct_object_mutate_op_body(&fields, unit); + let ptr_body = field::struct_object_ptr_body(&fields, unit); + let layout_body = field::struct_object_layout_body(&fields, unit); + // let is_opaque = fields.iter().any(|f| f.is_opaque()); + let is_opaque = field::struct_object_opaque_body(&fields); + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + let add_types = fields.iter().map(|f| { + let f_ty = f.ty; + my_quote!(gadgets.add_type::<#f_ty>();) + }); + let field_ty = fields.iter().map(|f| { + let f_ty = f.ty; + let f_name = f.ident.to_string(); + my_quote!( + ret.insert(#f_name.to_string(), std::any::type_name::<#f_ty>().to_owned()); + ) + }); + + my_quote! { + impl #impl_generics #crate_path::ObjFuzzable for #name #ty_generics #where_clause { + } + impl #impl_generics #crate_path::ObjGenerate for #name #ty_generics #where_clause { + fn generate_new(state: &mut #crate_path::ObjectState) -> eyre::Result { + let val = #name #gen_body; + Ok(val) + } + } + impl #impl_generics #crate_path::ObjMutate for #name #ty_generics #where_clause { + fn det_mutate(&mut self, state: &mut #crate_path::ObjectState) -> eyre::Result<#crate_path::MutateOperator> { + #det_mutate_body + } + fn mutate(&mut self, state: &mut #crate_path::ObjectState) -> eyre::Result<#crate_path::MutateOperator> { + #mutate_body + } + fn mutate_by_op(&mut self, state: &mut #crate_path::ObjectState, + keys: &[#crate_path::FieldKey], op: &#crate_path::MutateOperation) -> eyre::Result<()> { + #mutate_op_body + } + } + impl #impl_generics #crate_path::ObjValue for #name #ty_generics #where_clause { + fn get_layout(&self, fold_ptr: bool) -> #crate_path::ObjectLayout { + let mut layout = #crate_path::ObjectLayout::root(self.type_name(), self as *const Self as *mut u8); + #layout_body + layout + } + fn get_ptr_by_keys(&self, keys: &[#crate_path::FieldKey]) -> eyre::Result<*mut u8> { + #ptr_body + } + } + impl #impl_generics #crate_path::ObjType for #name #ty_generics #where_clause { + fn is_opaque() -> bool { + #is_opaque + } + + fn add_fields_to_gadgets(gadgets: &mut #crate_path::ProgramGadgets) { + #(#add_types)* + } + + fn get_fields_ty() -> std::collections::HashMap { + let mut ret = std::collections::HashMap::default(); + #(#field_ty)* + ret + } + } + #serde + #object_serde + } +} + +pub fn object_trait_for_union( + name: &syn::Ident, + generics: &syn::Generics, + attrs: &[syn::Attribute], + fields: &syn::FieldsNamed, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let serde = serde::serde_trait_for_union(name, generics, attrs, fields); + let object_serde = serde::object_serde_trait_for_union(name, generics, attrs, fields); + let fields = field::convert_field_list(Some(&fields.named), true); + let gen_body = field::union_object_gen_body(&fields); + let mutate_body = field::union_object_mutate_body(&fields); + let mutate_op_body = field::struct_object_mutate_op_body(&fields, false); + let mutate_union_use = field::union_object_use_member(&fields); + let ptr_body = field::struct_object_ptr_body(&fields, false); + let add_types = fields.iter().map(|f| { + let f_ty = f.ty; + my_quote!(gadgets.add_type::<#f_ty>();) + }); + let field_ty = fields.iter().map(|f| { + let f_ty = f.ty; + let f_name = f.ident.to_string(); + my_quote!( + ret.insert(#f_name.to_string(), std::any::type_name::<#f_ty>().to_owned()); + ) + }); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics #crate_path::ObjFuzzable for #name #ty_generics #where_clause { + } + impl #impl_generics #crate_path::ObjGenerate for #name #ty_generics #where_clause { + fn generate_new(state: &mut #crate_path::ObjectState) -> eyre::Result { + state.is_union = true; + let val = #gen_body; + Ok(val) + } + } + impl #impl_generics #crate_path::ObjMutate for #name #ty_generics #where_clause { + fn mutate(&mut self, state: &mut #crate_path::ObjectState) -> eyre::Result<#crate_path::MutateOperator> { + #mutate_body + } + fn mutate_by_op(&mut self, state: &mut #crate_path::ObjectState, + keys: &[#crate_path::FieldKey], op: &#crate_path::MutateOperation) -> eyre::Result<()> { + use hopper::ObjGenerate; + unsafe { + match op { + #crate_path::MutateOperation::UnionUse{ member, .. } => { + #mutate_union_use + } + _ => { + #mutate_op_body + } + } + Ok(()) + } + } + } + impl #impl_generics #crate_path::ObjValue for #name #ty_generics #where_clause { + fn get_layout(&self, _fold_ptr: bool) -> #crate_path::ObjectLayout { + let mut layout = #crate_path::ObjectLayout::root( + std::any::type_name::(), + self as *const Self as *mut u8, + ); + layout.is_union = true; + layout + } + + fn get_ptr_by_keys(&self, keys: &[#crate_path::FieldKey]) -> eyre::Result<*mut u8> { + unsafe { + #ptr_body + } + } + } + impl #impl_generics #crate_path::ObjType for #name #ty_generics #where_clause { + fn add_fields_to_gadgets(gadgets: &mut #crate_path::ProgramGadgets) { + #(#add_types)* + } + + fn get_fields_ty() -> std::collections::HashMap { + let mut ret = std::collections::HashMap::default(); + #(#field_ty)* + ret + } + } + #serde + #object_serde + } +} + +/// Add implmentation of object's fuzzing trait for enum types +/// TODO: implement variants +pub fn object_trait_for_enum( + _name: &syn::Ident, + _generics: &syn::Generics, + _attrs: &[syn::Attribute], + variants: &Punctuated, +) -> TokenStream { + if variants.is_empty() { + panic!("#[derive(Hopper)] cannot be implemented for enums with zero variants"); + } + let impls = variants.iter().map(|v| { + if v.discriminant.is_some() { + panic!("#[derive(Hopper)] cannot be implemented for enums with discriminants"); + } + // qual = my_quote!(::#variant), + //object_trait_for_struct(name, generics, attrs, &v.fields, Some(&v.ident)) + my_quote!() + }); + my_quote!(#(#impls)*) +} diff --git a/hopper-derive-impl/src/serde.rs b/hopper-derive-impl/src/serde.rs new file mode 100644 index 0000000..14bc6c2 --- /dev/null +++ b/hopper-derive-impl/src/serde.rs @@ -0,0 +1,335 @@ +use crate::field; +use crate::my_quote; + +use proc_macro2::TokenStream; +use syn::{punctuated::Punctuated, Token}; + +/// Add implmentation of object's serde trait for struct types +pub fn serde_trait_for_struct( + name: &syn::Ident, + generics: &syn::Generics, + _attrs: &[syn::Attribute], + fields: &syn::Fields, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let (fields, named, unit) = field::convert_fields(fields); + let ser_body = field::struct_serialize_body(&fields, true); + let de_body = field::struct_deserialize_body(&fields, unit, named); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics #crate_path::Serialize for #name #ty_generics #where_clause { + fn serialize(&self) -> eyre::Result { + let mut buf = String::new(); + buf.push_str("{ "); + #ser_body; + buf.push_str(" }"); + Ok(buf) + } + } + impl #impl_generics #crate_path::Deserialize for #name #ty_generics #where_clause { + fn deserialize(de: &mut #crate_path::Deserializer) -> eyre::Result { + de.eat_token("{")?; + let val = #name #de_body; + de.eat_token("}")?; + Ok(val) + } + } + } +} + +pub fn object_serde_trait_for_struct( + name: &syn::Ident, + generics: &syn::Generics, + _attrs: &[syn::Attribute], + fields: &syn::Fields, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let (fields, named, unit) = field::convert_fields(fields); + let ser_obj_body = field::struct_object_serialize_body(&fields, true); + let trans_obj_body = field::struct_object_translate_body(&fields, true); + let de_obj_body = field::struct_object_deserialize_body(&fields, unit, named); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics #crate_path::ObjectSerialize for #name #ty_generics #where_clause { + fn serialize_obj(&self, state: &#crate_path::ObjectState) -> eyre::Result { + let mut buf = String::new(); + buf.push_str("{ "); + #ser_obj_body; + buf.push_str(" }"); + Ok(buf) + } + } + impl #impl_generics #crate_path::ObjectDeserialize for #name #ty_generics #where_clause { + fn deserialize_obj(de: &mut #crate_path::Deserializer, state: &mut #crate_path::ObjectState) -> eyre::Result { + de.eat_token("{")?; + let val = #name #de_obj_body; + de.eat_token("}")?; + Ok(val) + } + } + impl #impl_generics #crate_path::ObjectTranslate for #name #ty_generics #where_clause { + fn translate_obj_to_c(&self, state: &#crate_path::ObjectState, program: &#crate_path::FuzzProgram) -> eyre::Result { + let mut buf = String::new(); + buf.push_str("{ "); + #trans_obj_body; + buf.push_str(" }"); + Ok(buf) + } + } + } +} + +pub fn serde_trait_for_union( + name: &syn::Ident, + generics: &syn::Generics, + _attrs: &[syn::Attribute], + fields: &syn::FieldsNamed, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let fields = field::convert_field_list(Some(&fields.named), true); + let ser_body = field::union_serialize_body(&fields, true); + let de_body = field::union_deserialize_body(&fields); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics #crate_path::Serialize for #name #ty_generics #where_clause { + fn serialize(&self) -> eyre::Result { + let mut buf = String::new(); + buf.push_str("{ "); + #ser_body; + buf.push_str(" }"); + Ok(buf) + } + } + impl #impl_generics #crate_path::Deserialize for #name #ty_generics #where_clause { + fn deserialize(de: &mut #crate_path::Deserializer) -> eyre::Result { + de.eat_token("{")?; + let val = #de_body; + de.trim_start(); + de.eat_token("}")?; + Ok(val) + } + } + } +} + +pub fn object_serde_trait_for_union( + name: &syn::Ident, + generics: &syn::Generics, + _attrs: &[syn::Attribute], + fields: &syn::FieldsNamed, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let fields = field::convert_field_list(Some(&fields.named), true); + let ser_obj_body = field::union_object_serialize_body(&fields, true); + let trans_obj_body = field::union_object_translate_body(&fields, true); + let de_obj_body = field::union_object_deserialize_body(&fields); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics #crate_path::ObjectSerialize for #name #ty_generics #where_clause { + fn serialize_obj(&self, state: &#crate_path::ObjectState) -> eyre::Result { + let mut buf = String::new(); + buf.push_str("{ "); + #ser_obj_body; + buf.push_str(" }"); + Ok(buf) + } + } + impl #impl_generics #crate_path::ObjectDeserialize for #name #ty_generics #where_clause { + fn deserialize_obj(de: &mut #crate_path::Deserializer, state: &mut #crate_path::ObjectState) -> eyre::Result { + state.is_union = true; + de.eat_token("{")?; + let val = #de_obj_body; + de.trim_start(); + de.eat_token("}")?; + Ok(val) + } + } + impl #impl_generics #crate_path::ObjectTranslate for #name #ty_generics #where_clause { + fn translate_obj_to_c(&self, state: &#crate_path::ObjectState, program: &#crate_path::FuzzProgram) -> eyre::Result { + let mut buf = String::new(); + buf.push_str("{ "); + #trans_obj_body; + buf.push_str(" }"); + Ok(buf) + } + } + } +} + +pub fn serde_trait_for_enum( + name: &syn::Ident, + generics: &syn::Generics, + _attrs: &[syn::Attribute], + variants: &Punctuated, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let ser_impls = variants.iter().map(|v| { + let key = &v.ident; + let key_name = key.to_string(); + let fields = &v.fields; + if fields.is_empty() { + my_quote!(Self::#key => { + Ok(format!("{}$", #key_name)) + }) + } else { + let (fields, named, _) = field::convert_fields(fields); + let fields_ser = field::struct_serialize_body(&fields, false); + let field_keys = field::list_field_keys(&fields, named); + my_quote!(Self::#key #field_keys => { + let mut buf = String::new(); + buf.push_str(#key_name); + buf.push_str("${ "); + #fields_ser + buf.push_str(" }"); + Ok(buf) + }) + } + }); + let ser_impls = my_quote![ #(#ser_impls),* ]; + let de_impls = variants.iter().map(|v| { + let key = &v.ident; + let key_name = key.to_string(); + let fields = &v.fields; + if fields.is_empty() { + my_quote!(#key_name => Ok(Self::#key)) + } else { + let (fields, named, unit) = field::convert_fields(fields); + let de_obj_body = field::struct_deserialize_body(&fields, unit, named); + my_quote!(#key_name => { + de.eat_token("{")?; + let val = Self::#key #de_obj_body; + de.trim_start(); + de.eat_token("}")?; + Ok(val) + }) + } + }); + let de_impls = my_quote![ #(#de_impls),* ]; + + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics #crate_path::Serialize for #name #ty_generics #where_clause { + fn serialize(&self) -> eyre::Result { + match self { + #ser_impls, + _ => { unreachable!() }, + } + } + } + impl #impl_generics #crate_path::Deserialize for #name #ty_generics #where_clause { + fn deserialize(de: &mut #crate_path::Deserializer) -> eyre::Result { + let key = de.next_token_until("$")?; + match key { + #de_impls, + _ => { eyre::bail!("fail to deserialize: {}", de.buf) }, + } + } + } + } +} + +pub fn object_serde_trait_for_enum( + name: &syn::Ident, + generics: &syn::Generics, + _attrs: &[syn::Attribute], + variants: &Punctuated, +) -> TokenStream { + let crate_path = super::get_crate_path(); + let ser_obj_impls = variants.iter().map(|v| { + let key = &v.ident; + let key_name = key.to_string(); + let fields = &v.fields; + if fields.is_empty() { + my_quote!(Self::#key => { + Ok(format!("{}$", #key_name)) + }) + } else { + let (fields, named, _) = field::convert_fields(fields); + let fields_ser = field::struct_object_serialize_body(&fields, false); + let field_keys = field::list_field_keys(&fields, named); + my_quote!(Self::#key #field_keys => { + let mut buf = String::new(); + buf.push_str(#key_name); + buf.push_str("${ "); + #fields_ser + buf.push_str(" }"); + Ok(buf) + }) + } + }); + let ser_obj_impls = my_quote![ #(#ser_obj_impls),* ]; + let de_obj_impls = variants.iter().map(|v| { + let key = &v.ident; + let key_name = key.to_string(); + let fields = &v.fields; + if fields.is_empty() { + my_quote!(#key_name => Ok(Self::#key)) + } else { + let (fields, named, unit) = field::convert_fields(fields); + let de_obj_body = field::struct_object_deserialize_body(&fields, unit, named); + my_quote!(#key_name => { + de.eat_token("{")?; + let val = Self::#key #de_obj_body; + de.trim_start(); + de.eat_token("}")?; + Ok(val) + }) + } + }); + let de_obj_impls = my_quote![ #(#de_obj_impls),* ]; + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics #crate_path::ObjectSerialize for #name #ty_generics #where_clause { + fn serialize_obj(&self, state: &#crate_path::ObjectState) -> eyre::Result { + match self { + #ser_obj_impls, + _ => { unreachable!() }, + } + } + } + impl #impl_generics #crate_path::ObjectDeserialize for #name #ty_generics #where_clause { + fn deserialize_obj(de: &mut #crate_path::Deserializer, state: &mut #crate_path::ObjectState) -> eyre::Result { + let key = de.next_token_until("$")?; + match key { + #de_obj_impls, + _ => { unreachable!() }, + } + } + } + } +} + +pub fn kind_trait_for_enum( + name: &syn::Ident, + generics: &syn::Generics, + _attrs: &[syn::Attribute], + variants: &Punctuated, +) -> TokenStream { + let kind_impls = variants.iter().map(|v| { + let key = &v.ident; + let key_name = key.to_string(); + let fields = &v.fields; + if fields.is_empty() { + my_quote!(Self::#key => #key_name) + } else { + let (_, named, _) = field::convert_fields(fields); + if named { + my_quote!(Self::#key {..} => #key_name) + } else { + my_quote!(Self::#key(_) => #key_name) + } + } + }); + let kind_impls = my_quote![ #(#kind_impls),* ]; + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + my_quote! { + impl #impl_generics EnumKind for #name #ty_generics #where_clause { + fn kind(&self) -> &'static str { + match self { + #kind_impls, + _ => { unreachable!() }, + } + } + } + } +} diff --git a/hopper-derive-impl/src/visitor.rs b/hopper-derive-impl/src/visitor.rs new file mode 100644 index 0000000..22bb882 --- /dev/null +++ b/hopper-derive-impl/src/visitor.rs @@ -0,0 +1,269 @@ +use std::collections::BTreeMap; + +use proc_macro2::{Ident, Span, TokenStream}; +use quote::{ToTokens, TokenStreamExt}; +use syn::{ + visit::{self, Visit}, + ForeignItemFn, ItemEnum, ItemFn, ItemForeignMod, ItemStruct, ItemType, ItemUnion, ReturnType, + Signature, Token, TypeBareFn, TypePath, +}; + +use crate::my_quote; +use crate::object; +use crate::{func_hook::*, ENABLE_SET_FN_POINTER}; + +#[derive(Default)] +pub struct FuzzVisitor<'ast> { + mod_ident: Option, + functions: Vec, + structs: Vec<&'ast ItemStruct>, + enums: Vec<&'ast ItemEnum>, + unions: Vec<&'ast ItemUnion>, + type_alias: Vec<&'ast ItemType>, + extern_mark: Option<&'ast ItemForeignMod>, + excluded_type: Vec, + // TypeBareFn does not implement `Default`, so we cannot use hashmap. + callbacks: BTreeMap, +} + +impl<'ast> FuzzVisitor<'ast> { + pub fn set_mod_ident(mut self, mod_ident: Ident) -> Self { + self.mod_ident = Some(mod_ident); + self + } + + fn hook_name(&self) -> String { + if let Some(mod_name) = &self.mod_ident { + format!("mod_{mod_name}") + } else { + "file_bindings".to_string() + } + } + + fn hook_func_pointer_type(&mut self, path: &TypePath) { + if !ENABLE_SET_FN_POINTER { + return; + } + if let Some(fn_ty) = super::folder::get_fn_type_in_option(path) { + if fn_ty.variadic.is_none() { + let mut identifier = + fn_ty + .inputs + .iter() + .fold(String::new(), |mut identifier, arg_ty| { + identifier.push_str(&arg_ty.ty.to_token_stream().to_string()); + identifier.push(';'); + identifier + }); + identifier.push_str("->"); + identifier.push_str(&fn_ty.output.to_token_stream().to_string()); + self.callbacks.insert(identifier, fn_ty.clone()); + } + } + } + + fn add_func_hook_to_tokens(&self, tokens: &mut TokenStream) { + let funcs: Vec = self + .functions + .iter() + .filter(|f| { + // check if the signature contain excluded types + let name = f.ident.to_string(); + let syntax = my_quote!(#f); + let syntax = syntax.to_string(); + if name.starts_with("__") { + println!("cargo:warning=`{}` includes `__`", &syntax); + return false; + } + for ty in &self.excluded_type { + if syntax.contains(ty) { + println!("cargo:warning=`{}` use excluded type: {}", &syntax, ty); + return false; + } + } + true + }) + .cloned() + .collect(); + let ts = add_func_hook(&self.hook_name(), &funcs, &self.structs, &self.type_alias); + tokens.extend(ts); + } + + fn add_structs_to_tokens(&self, tokens: &mut TokenStream) { + for &stru in &self.structs { + let ts = object::object_trait_for_struct( + &stru.ident, + &stru.generics, + &stru.attrs, + &stru.fields, + ); + tokens.extend(ts); + } + } + + fn add_enums_to_tokens(&self, tokens: &mut TokenStream) { + for &enu in &self.enums { + let ts = + object::object_trait_for_enum(&enu.ident, &enu.generics, &enu.attrs, &enu.variants); + tokens.extend(ts); + } + } + + fn add_unions_to_tokens(&self, tokens: &mut TokenStream) { + for &un in &self.unions { + let ts = object::object_trait_for_union(&un.ident, &un.generics, &un.attrs, &un.fields); + tokens.extend(ts); + } + } + + pub fn generate_callbacks(&mut self) -> TokenStream { + let callbacks = self.callbacks.values().enumerate().map(|(i, f_raw)| { + let unsafety = &f_raw.unsafety; + let abi = &f_raw.abi; + let fn_token = &f_raw.fn_token; + let mut fn_name_str = String::from("GENERATED_hopper_callback_"); + fn_name_str.push_str(&i.to_string()); + println!("cargo:warning=generate callback {fn_name_str}: {}", f_raw.to_token_stream()); + let fn_name_tokens = fn_name_str.parse::().unwrap(); + let lifetimes = &f_raw.lifetimes; + let inputs = &f_raw.inputs; + let output = &f_raw.output; + let fn_body_tokens = if let ReturnType::Type(_, ty) = output { + // Callback returns non-void + let crate_path = super::get_crate_path(); + let ret_ty = ty.to_token_stream(); + // Here we make a meaningless state to please `generate_new`. + let mut body = my_quote!( + use #crate_path::ObjGenerate; + #crate_path::set_pilot_det(true); + let ret = #ret_ty::generate_new(&mut #crate_path::ObjectState::root("", "")).expect("failed to generate objects in callback"); + #crate_path::set_pilot_det(false); + ret + ); + if let syn::Type::Path(path) = ty.as_ref() { + if super::folder::get_fn_type_in_option(path).is_some() { + body = my_quote!(None) + } + } + body + } else { + // Callback returns void. + my_quote!( () ) + }; + let sig_tokens = my_quote!( + #unsafety #abi #fn_token #fn_name_tokens #lifetimes (#inputs) #output + ); + let sig: Signature = syn::parse2(sig_tokens.clone()).expect("failed to parse function signature"); + self.functions.push(sig); + let callback = my_quote!( + #[no_mangle] + #[allow(unused_variables)] + pub #sig_tokens { + #fn_body_tokens + } + ); + + callback + }); + + my_quote!( #(#callbacks)* ) + } +} + +impl<'ast> Visit<'ast> for FuzzVisitor<'ast> { + fn visit_item_struct(&mut self, node: &'ast ItemStruct) { + let attrs = &node.attrs; + let attrs_tokens = my_quote!(#(#attrs)*, ); + let attrs = attrs_tokens.to_string(); + let ident = node.ident.to_string(); + if ident == "__BindgenBitfieldUnit" { + // ignore implement for __BindgenBitfieldUnit + } + // avoid adding struct without clone + else if !attrs.contains("Clone") { + println!( + "cargo:warning={} has not clone attribute! {}", + &ident, attrs + ); + self.excluded_type.push(ident); + } else { + self.structs.push(node); + } + visit::visit_item_struct(self, node); + } + + fn visit_item_enum(&mut self, node: &'ast ItemEnum) { + self.enums.push(node); + visit::visit_item_enum(self, node); + } + + fn visit_item_union(&mut self, node: &'ast ItemUnion) { + self.unions.push(node); + visit::visit_item_union(self, node); + } + + fn visit_item_type(&mut self, node: &'ast ItemType) { + self.type_alias.push(node); + visit::visit_item_type(self, node); + } + + fn visit_item_foreign_mod(&mut self, node: &'ast ItemForeignMod) { + self.extern_mark = Some(node); + visit::visit_item_foreign_mod(self, node); + self.extern_mark = None; + } + + fn visit_foreign_item_fn(&mut self, node: &'ast ForeignItemFn) { + if check_pub_vis(&node.vis) { + if let Some(mark) = self.extern_mark { + let mut sig = node.sig.clone(); + sig.abi = Some(mark.abi.clone()); + sig.unsafety = Some(Default::default()); + self.functions.push(sig); + } + } + visit::visit_foreign_item_fn(self, node); + } + + fn visit_item_fn(&mut self, node: &'ast ItemFn) { + if check_pub_vis(&node.vis) { + self.functions.push(node.sig.clone()); + } + visit::visit_item_fn(self, node); + } + + fn visit_type_path(&mut self, node: &'ast TypePath) { + self.hook_func_pointer_type(node); + visit::visit_type_path(self, node); + } +} + +fn check_pub_vis(vis: &syn::Visibility) -> bool { + matches!(vis, syn::Visibility::Public(_)) +} + +impl<'ast> ToTokens for FuzzVisitor<'ast> { + fn to_tokens(&self, tokens: &mut TokenStream) { + if let Some(mod_name) = &self.mod_ident { + let mod_kw = ::default(); + mod_kw.to_tokens(tokens); + let fuzz_mod = Ident::new(&format!("{mod_name}_hopper_generated"), Span::call_site()); + tokens.append(fuzz_mod); + + let brace = syn::token::Brace::default(); + brace.surround(tokens, |tokens| { + tokens.extend(my_quote!(use super::#mod_name::*;)); + + self.add_structs_to_tokens(tokens); + self.add_enums_to_tokens(tokens); + self.add_unions_to_tokens(tokens); + self.add_func_hook_to_tokens(tokens); + }); + } else { + self.add_structs_to_tokens(tokens); + self.add_enums_to_tokens(tokens); + self.add_unions_to_tokens(tokens); + self.add_func_hook_to_tokens(tokens); + } + } +} diff --git a/hopper-derive/Cargo.toml b/hopper-derive/Cargo.toml new file mode 100644 index 0000000..ee839e2 --- /dev/null +++ b/hopper-derive/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "hopper-derive" +version = "1.0.0" +edition = "2021" +authors = ["Peng Chen "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +syn = {version = "1.0", features = ["full", "visit"]} +quote = "1.0" +proc-macro2 = "1" +hopper-derive-impl = { path = "../hopper-derive-impl" } + +[dev-dependencies] +hopper = { path = "../hopper-core" } + +[lib] +proc-macro = true + +[features] +use_crate = ["hopper-derive-impl/use_crate"] \ No newline at end of file diff --git a/hopper-derive/src/lib.rs b/hopper-derive/src/lib.rs new file mode 100644 index 0000000..c2053f8 --- /dev/null +++ b/hopper-derive/src/lib.rs @@ -0,0 +1,90 @@ +//!# A custom dervie implementation for `#[derive(Hopper)]`, which is used in Hopper Fuzzer. +//! +//!blabla... + +#![crate_type = "proc-macro"] +#![recursion_limit = "192"] +extern crate proc_macro; +extern crate proc_macro2; +extern crate syn; +#[macro_use] +extern crate quote; +use hopper_derive_impl::*; + +use proc_macro::TokenStream; +use syn::visit::Visit; +use syn::ItemFn; + +#[proc_macro_attribute] +pub fn fuzz_all(_metadata: TokenStream, input: TokenStream) -> TokenStream { + let input: syn::ItemMod = syn::parse(input).expect("Couldn't parse item mod"); + let mut fuzz_vistor = visitor::FuzzVisitor::default().set_mod_ident(input.ident.clone()); + fuzz_vistor.visit_item_mod(&input); + + let result = my_quote!( + #input + #fuzz_vistor + ); + result.into() +} + +#[proc_macro_derive(Fuzz, attributes(Fuzz))] +pub fn derive_fuzz(input: TokenStream) -> TokenStream { + let ast: syn::DeriveInput = syn::parse(input).expect("Couldn't parse item"); + let result = match ast.data { + syn::Data::Enum(ref e) => object::object_trait_for_enum(&ast.ident, &ast.generics, &ast.attrs, &e.variants), + syn::Data::Struct(ref s) => object::object_trait_for_struct(&ast.ident, &ast.generics, &ast.attrs, &s.fields), + syn::Data::Union(ref u) => object::object_trait_for_union(&ast.ident, &ast.generics, &ast.attrs, &u.fields), + }; + result.into() +} + +#[proc_macro_attribute] +pub fn fuzz(_metadata: TokenStream, input: TokenStream) -> TokenStream { + let input_fn: ItemFn = syn::parse(input).expect("Couldn't parse function signature"); + let ctor_hook = func_hook::add_func_hook( + &format!("fn_{}", &input_fn.sig.ident), + &vec![input_fn.sig.clone()], + &[], + &[], + ); + + let result = my_quote!( + #input_fn + #ctor_hook + ); + + result.into() +} + +#[proc_macro_derive(Serde, attributes(Serde))] +pub fn derive_serde(input: TokenStream) -> TokenStream { + let ast: syn::DeriveInput = syn::parse(input).expect("Couldn't parse item"); + let result = match ast.data { + syn::Data::Enum(ref e) => serde::serde_trait_for_enum(&ast.ident, &ast.generics, &ast.attrs, &e.variants), + syn::Data::Struct(ref s) => serde::serde_trait_for_struct(&ast.ident, &ast.generics, &ast.attrs, &s.fields), + syn::Data::Union(ref u) => serde::serde_trait_for_union(&ast.ident, &ast.generics, &ast.attrs, &u.fields), + }; + result.into() +} + +#[proc_macro_derive(ObjectSerde, attributes(ObjectSerde))] +pub fn derive_obj_serde(input: TokenStream) -> TokenStream { + let ast: syn::DeriveInput = syn::parse(input).expect("Couldn't parse item"); + let result = match ast.data { + syn::Data::Enum(ref e) => serde::serde_trait_for_enum(&ast.ident, &ast.generics, &ast.attrs, &e.variants), + syn::Data::Struct(ref s) => serde::serde_trait_for_struct(&ast.ident, &ast.generics, &ast.attrs, &s.fields), + syn::Data::Union(ref u) => serde::serde_trait_for_union(&ast.ident, &ast.generics, &ast.attrs, &u.fields), + }; + result.into() +} + +#[proc_macro_derive(EnumKind, attributes(EnumKind))] +pub fn derive_enum_kind(input: TokenStream) -> TokenStream { + let ast: syn::DeriveInput = syn::parse(input).expect("Couldn't parse item"); + let result = match ast.data { + syn::Data::Enum(ref e) => serde::kind_trait_for_enum(&ast.ident, &ast.generics, &ast.attrs, &e.variants), + _ => unreachable!() + }; + result.into() +} \ No newline at end of file diff --git a/hopper-harness/Cargo.toml b/hopper-harness/Cargo.toml new file mode 100644 index 0000000..0d7f5c9 --- /dev/null +++ b/hopper-harness/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "hopper-harness" +version = "1.0.0" +edition = "2021" +authors = ["Peng Chen "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +hopper = { path = "../hopper-core" } +log = "0.4" +time = "0.3" +flexi_logger = "0.22" +clap = { version = "4.2", features = ["derive"] } +linkme = { version = "0.3", optional = true } +eyre = "0.6" +color-eyre = { version = "0.6", default-features = false } +rand = "0.8" +regex = "1" + +[build-dependencies] +bindgen = "0.65" +hopper-derive-impl = { path = "../hopper-derive-impl" } + +[features] +default = ["ctor_hook"] +link_hook = ["linkme", "hopper/link_hook", "hopper-derive-impl/link_hook"] +ctor_hook = ["hopper/ctor_hook", "hopper-derive-impl/ctor_hook"] +llvm_mode = ["hopper/llvm_mode"] +e9_mode = ["hopper/e9_mode"] +cov_mode = [] +verbose = ["hopper/verbose"] +testsuite = ["hopper/testsuite"] \ No newline at end of file diff --git a/hopper-harness/build.rs b/hopper-harness/build.rs new file mode 100644 index 0000000..3ae7cc8 --- /dev/null +++ b/hopper-harness/build.rs @@ -0,0 +1,258 @@ +extern crate bindgen; + +use std::path::PathBuf; +use std::{env, fs}; + +use bindgen::callbacks; + +#[derive(Debug)] +pub struct HopperCallbacks {} + +const IGNORE_MACROS: &[&str] = &[ + "FE_DIVBYZERO", + "FE_DOWNWARD", + "FE_INEXACT", + "FE_INVALID", + "FE_OVERFLOW", + "FE_TONEAREST", + "FE_TOWARDZERO", + "FE_UNDERFLOW", + "FE_UPWARD", + "FP_INFINITE", + "FP_INT_DOWNWARD", + "FP_INT_TONEAREST", + "FP_INT_TONEARESTFROMZERO", + "FP_INT_TOWARDZERO", + "FP_INT_UPWARD", + "FP_NAN", + "FP_NORMAL", + "FP_SUBNORMAL", + "FP_ZERO", + "IPPORT_RESERVED", +]; + +impl HopperCallbacks { + fn new() -> Self { + Self {} + } +} + +impl callbacks::ParseCallbacks for HopperCallbacks { + /// This will be called on every file inclusion, with the full path of the included file. + /// Tell cargo to invalidate the built crate whenever any of the included header files changed. + fn include_file(&self, filename: &str) { + println!("cargo:rerun-if-changed={filename}"); + } + + /// This function will be run on every macro that is identified. + fn will_parse_macro(&self, name: &str) -> callbacks::MacroParsingBehavior { + if IGNORE_MACROS.contains(&name) { + callbacks::MacroParsingBehavior::Ignore + } else { + callbacks::MacroParsingBehavior::Default + } + } + + /// This function will run for every extern variable and function. The returned value determines + /// the name visible in the bindings. + fn generated_name_override(&self, _item_info: callbacks::ItemInfo<'_>) -> Option { + None + } + + /// This function will run for every extern variable and function. The returned value determines + /// the link name in the bindings. + fn generated_link_name_override(&self, _item_info: callbacks::ItemInfo<'_>) -> Option { + None + } + + /// The integer kind an integer macro should have, given a name and the + /// value of that macro, or `None` if you want the default to be chosen. + fn int_macro(&self, _name: &str, _value: i64) -> Option { + None + } + + /// This will be run on every string macro. The callback cannot influence the further + /// treatment of the macro, but may use the value to generate additional code or configuration. + fn str_macro(&self, _name: &str, _value: &[u8]) {} + + /// This will be run on every function-like macro. The callback cannot + /// influence the further treatment of the macro, but may use the value to + /// generate additional code or configuration. + /// + /// The first parameter represents the name and argument list (including the + /// parentheses) of the function-like macro. The second parameter represents + /// the expansion of the macro as a sequence of tokens. + fn func_macro(&self, _name: &str, _value: &[&[u8]]) {} + + /// Allows to rename an enum variant, replacing `_original_variant_name`. + fn enum_variant_name( + &self, + _enum_name: Option<&str>, + _original_variant_name: &str, + _variant_value: callbacks::EnumVariantValue, + ) -> Option { + None + } + + /// Allows to rename an item, replacing `_original_item_name`. + fn item_name(&self, _original_item_name: &str) -> Option { + None + } + + /// This will be called every time `bindgen` reads an environment variable whether it has any + /// content or not. + fn read_env_var(&self, _key: &str) {} + + /* + fn add_derives(&self, _name: &str) -> Vec { + vec![ + "Serialize".to_string(), + "Deserialize".to_string(), + // "Fuzz".to_string(), + ] + } + */ +} + +#[cfg(target_os = "linux")] +static DYNAMIC_LIB_SUFFIX: &str = ".so"; +#[cfg(target_os = "macos")] +static DYNAMIC_LIB_SUFFIX: &'static str = ".dylib"; +#[cfg(target_os = "windows")] +static DYNAMIC_LIB_SUFFIX: &str = ".dll"; +static STATIC_LIB_SUFFIX: &str = ".a"; + +fn link_libraries() { + // Tell cargo to tell rustc to link shared library. + let library_env = env::var("HOPPER_LIBRARY").unwrap(); + let library_list = library_env.split(','); + for library in library_list { + let library_path = PathBuf::from(library); + let dir = library_path.parent().unwrap().to_string_lossy(); + let lib = library_path.file_name().unwrap().to_string_lossy(); + #[cfg(target_os = "linux")] + let lib = lib.trim_start_matches("lib"); + let (lib, is_static) = if let Some(s) = lib.strip_suffix(STATIC_LIB_SUFFIX) { + (s, true) + } else { + let (lib, _) = lib + .split_once(DYNAMIC_LIB_SUFFIX) + .unwrap_or_else(|| panic!("library should end with `{DYNAMIC_LIB_SUFFIX}`")); + (lib, false) + }; + println!("cargo:warning=dir={dir}, lib={lib}"); + if is_static { + println!("cargo:rustc-link-lib=static={lib}"); + } else { + println!("cargo:rustc-link-lib=dylib={lib}"); + #[cfg(target_os = "linux")] + println!("cargo:rustc-link-arg=-Wl,-rpath,{dir}"); + } + println!("cargo:rustc-link-search=native={dir}"); + } +} + +fn main() { + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + if env::var("HOPPER_LIBRARY").is_err() { + fs::write(out_path.join("fuzz_extend.rs"), "".as_bytes()).expect("Unable to write file"); + return; + } + link_libraries(); + let header_path = env::var("HOPPER_HEADER").unwrap(); + println!("cargo:rerun-if-changed={header_path}"); + // The bindgen::Builder is the main entry point + // to bindgen, and lets you build up options for + // the resulting bindings. + let mut builder = bindgen::Builder::default() + // The input header we would like to generate + // bindings for. + .header(header_path) + // Callback during parsing + .parse_callbacks(Box::new(HopperCallbacks::new())) + // Disable derive default + .derive_default(false) + // Should have debug trait + .derive_debug(true) + // If debug can't derived, we impl it + .impl_debug(true) + // use NewType for enum + .default_enum_style(bindgen::EnumVariation::NewType { + is_bitfield: false, + is_global: false, + }) + // default blocklist + .blocklist_function("__.*") + .blocklist_function("strtold") + // Diable layout test + .layout_tests(false); + + if let Some(allowlist) = option_env!("HOPPER_FUNC_ALLOW_LIST") { + builder = builder.allowlist_function("fopen"); + let list: std::str::Split = allowlist.split(','); + for item in list { + builder = builder.allowlist_function(item); + } + } + if let Some(blacklist) = option_env!("HOPPER_FUNC_BLACKLIST") { + let list: std::str::Split = blacklist.split(','); + for item in list { + builder = builder.blocklist_function(item); + } + } + if let Some(blacklist) = option_env!("HOPPER_TYPE_BLACKLIST") { + let list = blacklist.split(','); + for item in list { + builder = builder.blocklist_type(item); + } + } + if let Some(blacklist) = option_env!("HOPPER_ITEM_BLACKLIST") { + let list = blacklist.split(','); + for item in list { + builder = builder.blocklist_item(item); + } + } + if let Some(include_search_paths) = option_env!("HOPPER_INCLUDE_SEARCH_PATH") { + let list = include_search_paths.split(':'); + for item in list { + let arg = format!("-I{item}"); + println!("cargo:warning=add_search_path={item}"); + builder = builder.clang_arg(arg.as_str()); + } + } + + if let Some(opaque_list) = option_env!("HOPPER_CUSTOM_OPAQUE_LIST") { + let list = opaque_list.split(','); + for item in list { + println!("cargo:warning=`{item}` is custom opaque"); + builder = builder.opaque_type(item); + } + } + // default opaque type + builder = builder.opaque_type("_IO_FILE"); + + // enable the verbose flag for clang + // builder = builder.clang_arg("-v"); + + // Finish the builder and generate the bindings. + let bindings = builder + .generate() + // https://github.com/rust-lang/rust-bindgen/pull/1846 + // dynamic_library_name() + // Unwrap the Result and panic on failure. + .expect("Unable to generate bindings"); + + // Write the bindings to the $OUT_DIR/bindings.rs file. + // sbindings.write_to_file(out_path.join("bindings.rs")).expect("Couldn't write bindings!"); + + // Enhance FFI binding code for hopper + hopper_derive_impl::set_compiler_env(); + let fuzz_gen = hopper_derive_impl::derive_bindings(&bindings.to_string()); + let raw_content = fuzz_gen.to_string(); + // Format code by rustfmt + let fmt_content = hopper_derive_impl::format::rustfmt_generated_string(&raw_content) + .expect("fail to format content"); + // Write to the fuzz_extend.rs file. + fs::write(out_path.join("fuzz_extend.rs"), fmt_content.as_bytes()) + .expect("Unable to write file"); +} diff --git a/hopper-harness/src/bin/hopper-bench.rs b/hopper-harness/src/bin/hopper-bench.rs new file mode 100644 index 0000000..28840c8 --- /dev/null +++ b/hopper-harness/src/bin/hopper-bench.rs @@ -0,0 +1,43 @@ +//! Used for benchmark hopper's runtime efficient +//! Since we do not set core_limit, you should set: +//! ulimit -c 0 + +use std::{path::PathBuf, io::Read}; + +pub fn main() { + hopper_harness::hopper_extend(); + if let Some(dir) = std::env::args().nth(1) { + let mut executor = hopper::Executor::default(); + hopper::init_depot_dirs().unwrap(); + // let mut feedback = hopper::Feedback::new().unwrap(); + // feedback.clear(); + // executor.set_timeout(1); + let start_at = std::time::Instant::now(); + let path = PathBuf::from(dir); + let mut inputs = vec![]; + for entry in path.read_dir().unwrap() { + let file = entry.unwrap().path(); + if !file.is_file() { + continue; + } + let mut buffer = String::new(); + let mut f = std::fs::File::open(file).unwrap(); + f.read_to_string(&mut buffer).unwrap(); + inputs.push(buffer); + } + let start_run_at = std::time::Instant::now(); + + for (i, input) in inputs.iter().enumerate() { + let ret = executor.execute(|| { + let mut program = hopper::read_program(input, false).unwrap(); + program.eval() + }); + println!("{i}, {ret:?}"); + } + + let t_run = start_run_at.elapsed(); + let t_all = start_at.elapsed(); + println!("num: {}, run: {}s, all: {}s", inputs.len(), t_run.as_secs_f32(), t_all.as_secs_f32()); + + } +} diff --git a/hopper-harness/src/bin/hopper-fuzzer.rs b/hopper-harness/src/bin/hopper-fuzzer.rs new file mode 100644 index 0000000..ea993d0 --- /dev/null +++ b/hopper-harness/src/bin/hopper-fuzzer.rs @@ -0,0 +1,103 @@ +extern crate clap; +use hopper::HopperError; +use std::io::prelude::*; + +fn init_logger() { + use flexi_logger::*; + + let output_file = FileSpec::default() + .directory(hopper::OUTPUT_DIR) + .basename("fuzzer"); + + #[cfg(not(feature = "verbose"))] + Logger::try_with_env_or_str("info") + .unwrap() // Write all error, warn, and info messages + .log_to_file(output_file) + .duplicate_to_stdout(Duplicate::Debug) + .format_for_files(opt_format) + .adaptive_format_for_stdout(AdaptiveFormat::Opt) + .rotate( + // If the program runs long enough, + Criterion::Size(1 << 30), + Naming::Timestamps, + Cleanup::KeepLogFiles(3), + ) + .start() + .unwrap(); + + #[cfg(feature = "verbose")] + { + use flexi_logger::writers::FileLogWriter; + let status_writer = Box::new( + FileLogWriter::builder( + FileSpec::default() + .directory(hopper::OUTPUT_DIR) + .suppress_timestamp() + .basename("status"), + ).rotate( + Criterion::Size(1 << 30), + Naming::Timestamps, + Cleanup::KeepLogFiles(3)) + .try_build() + .unwrap(), + ); + + let status_oneshot_writer = Box::new( + FileLogWriter::builder( + FileSpec::default() + .directory(hopper::OUTPUT_DIR) + .suppress_timestamp() + .basename("status_oneshot"), + ) + .rotate( + Criterion::Size(1), + Naming::Numbers, + Cleanup::KeepLogFiles(0), + ) + .try_build() + .unwrap(), + ); + + Logger::try_with_env_or_str("info") + .unwrap() // Write all error, warn, and info messages + .log_to_file(output_file) + .add_writer("Status", status_writer) + .add_writer("StatusOneShot", status_oneshot_writer) + .duplicate_to_stdout(Duplicate::Debug) + .format_for_files(opt_format) + .adaptive_format_for_stdout(AdaptiveFormat::Opt) + .rotate( + // If the program runs long enough, + Criterion::Size(1 << 30), + Naming::Timestamps, + Cleanup::KeepLogFiles(3), + ) + .start() + .unwrap(); + } + + +} + +fn main() -> eyre::Result<()> { + color_eyre::install()?; + hopper::parse_config()?; + init_logger(); + log::info!("Hopper starting ..."); + log::info!("config: {:?}", hopper::get_config()); + hopper_harness::hopper_extend(); + let res = hopper::run_fuzzer(); + if let Err(err) = &res { + if let Some(HopperError::TestSuccess) = err.downcast_ref::() { + std::process::exit(hopper::TEST_SUCCESS_EXIT_CODE); + } + log::error!("fuzzer error is wrote into misc/fuzzer_error.log"); + hopper::create_dir_in_output_if_not_exist(hopper::MISC_DIR)?; + let path = hopper::output_file_path("misc/fuzzer_error.log"); + let mut f = std::fs::File::create(path)?; + writeln!(f, "{err:#?}")?; + } + log::info!("Hopper ending ..."); + res?; + Ok(()) +} diff --git a/hopper-harness/src/bin/hopper-generator.rs b/hopper-harness/src/bin/hopper-generator.rs new file mode 100644 index 0000000..c8f807e --- /dev/null +++ b/hopper-harness/src/bin/hopper-generator.rs @@ -0,0 +1,156 @@ +/// Generator for replay mutating and generating. +/// ./bin/hopper-generator ./crashes/id:000000 +/// if function target should be specific, set `HOPPER_PATTERN` environment. +/// +use hopper::{ + Deserialize, Deserializer, FuzzProgram, FuzzStmt, MutateOperator, RngState, + Serialize +}; + +fn main() -> eyre::Result<()> { + color_eyre::install()?; + hopper_harness::hopper_extend(); + flexi_logger::Logger::try_with_env_or_str("trace")?.start()?; + if let Some(file) = std::env::args().nth(1) { + let mut replay_mode = true; + let mut op_mode = false; + let mut input_only = false; + let mut refine = false; + std::env::args().for_each(|flag| { + if flag == "--op-mode" { + op_mode = true; + } + if flag == "--no-replay-mode" { + replay_mode = false; + } + if flag == "--input-only" { + input_only = true; + } + if flag == "--refine" { + refine = true; + } + }); + process_file(&file, op_mode, replay_mode, input_only, refine)?; + } + Ok(()) +} + +fn process_file( + file: &str, + op_mode: bool, + replay_mode: bool, + input_only: bool, + refine: bool, +) -> eyre::Result<()> { + hopper::init_constraints()?; + hopper::read_existing_opaue()?; + + hopper::effective::load_effective_args()?; + let buf = std::fs::read_to_string(file)?; + if refine { + let mut program = hopper::read_program(&buf, false)?; + program.refine_program()?; + log::info!("refined program:\n {}", program.serialize_all()?); + return Ok(()); + } + + let mut lines = buf.lines(); + let mut parent = None; + if let Some(l) = lines.next() { + let mut de = Deserializer::new(l, None); + let _ = de.next_token_until("Parent:")?; + let parent_buf = de.next_token_until(",")?; + if parent_buf != "None" { + parent = Some(parent_buf.parse::()?); + } + log::info!("parent: {:?}", parent); + } + + let mut rng_state = None; + for l in lines { + let mut de = Deserializer::new(l, None); + if de.strip_token("") && replay_mode { + de.trim_start(); + log::info!("load rng : {}", l); + rng_state = Some(RngState::deserialize(&mut de)?); + } + if de.strip_token("") { + de.trim_start(); + let flag: u8 = de.parse_number()?; + hopper::set_mutate_flag(flag); + } + if let Some(pos) = l.find(hopper::CallStmt::TARGET) { + let f = l[pos + 9..].split_once(' ').unwrap().0; + hopper::get_config_mut().set_func_target(f)?; + } + if de.strip_token("") && op_mode { + if let Some(seed_id) = parent { + let mut program = hopper::read_input_in_queue(seed_id)?; + program.parent = Some(seed_id); + de.trim_start(); + de.program = Some(&mut program); + let operators = Vec::::deserialize(&mut de)?; + log::info!("ops: {}", operators.serialize()?); + program.mutate_program_by_ops(&operators)?; + program.refine_program()?; + log::info!("mutate program:\n {}", program.serialize_all()?); + return Ok(()); + } else { + return Err(eyre::eyre!( + "Program parent not found. operator mode require a parent to be mutated" + )); + } + } + } + + if op_mode { + return Err(eyre::eyre!("Operators not found.")); + } + + if let Ok(pattern) = std::env::var("FUNC_PATTERN") { + let config = hopper::get_config_mut(); + config.func_pattern = Some(pattern); + config.set_func_pattern()?; + } + if let Ok(v) = std::env::var("PILOT_DET") { + hopper::set_pilot_det(v != "0" && v != "false"); + } + if let Ok(v) = std::env::var("SINGLE_CALL") { + hopper::set_single_call(v != "0" && v != "false"); + } + if let Ok(v) = std::env::var("REUSE_STMT") { + hopper::set_reuse_stmt(v != "0" && v != "false"); + } + + if let Some(seed_id) = parent { + let mut program = hopper::read_input_in_queue(seed_id)?; + program.parent = Some(seed_id); + program.update_weight(); + // do not support det mutate + for is in &mut program.stmts { + if let FuzzStmt::Load(load) = &mut is.stmt { + load.state.done_deterministic(); + } + } + if let Some(rng) = rng_state { + hopper::restore_rng_state(rng); + } + log::info!("parent: {}", program.serialize_all()?); + if input_only { + program.mutate_program_inputs()?; + } else { + log::info!("current rng2: {:?}", hopper::save_rng_state()); + program.mutate_program()?; + } + log::info!("mutate program:\n {}", program.serialize_all()?); + } else { + if let Some(rng) = rng_state { + hopper::restore_rng_state(rng); + } + let target = hopper::get_config().func_target.unwrap(); + let program = FuzzProgram::generate_program_for_func(target)?; + log::info!("generate program:\n{}", program.serialize_all()?); + } + + Ok(()) +} diff --git a/hopper-harness/src/bin/hopper-harness.rs b/hopper-harness/src/bin/hopper-harness.rs new file mode 100644 index 0000000..549cf90 --- /dev/null +++ b/hopper-harness/src/bin/hopper-harness.rs @@ -0,0 +1,84 @@ +//! Hopper Harness +//! Create a fork server to receive control message and execute program +//! +//! Fork Server: harness --server +//! Replay: harness [file_name] [options] +//! options: +//! --sanitize : use `sanitize` in program, the default one is `review` +//! --execute : use `eval` in program, the default one is `review` +//! --nofork : do not fork a process and then execute, it will run program with fork by default + +use std::path::Path; + +fn init_logger(name: &str) { + use flexi_logger::*; + let mut output_file = FileSpec::default().basename(name); + output_file = output_file.directory(hopper::OUTPUT_DIR); + Logger::try_with_env_or_str("info") + .unwrap() // Write all error, warn, and info messages + .log_to_file(output_file) + .format_for_files(opt_format) + .rotate( + // If the program runs long enough, + Criterion::Size(1 << 30), + Naming::Timestamps, + Cleanup::KeepLogFiles(3), + ) + .start() + .unwrap(); +} + +pub fn main() -> eyre::Result<()> { + hopper_harness::hopper_extend(); + let is_server = std::env::args().any(|f| f == "--server"); + if is_server { + let is_fast = std::env::args().any(|f| f == "--fast"); + if is_fast { + init_logger("harness_fast"); + } else { + init_logger("harness"); + } + let res = hopper::run_fork_server(); + if let Err(err) = res { + log::error!("error: {}", err); + log::error!("root cause: {:?}", err.root_cause()); + std::process::exit(hopper::FORK_ERROR_EXIT_CODE); + } + return Ok(()); + } + let query_gadgets = std::env::args().any(|f| f == "--gadgets"); + if query_gadgets { + hopper::create_dir_in_output_if_not_exist(hopper::MISC_DIR)?; + hopper::global_gadgets::get_instance().save_gadgets_to_file()?; + return Ok(()); + } + + if let Some(file_name) = std::env::args().nth(1) { + color_eyre::install()?; + flexi_logger::Logger::try_with_env_or_str("trace") + .unwrap() + .start() + .unwrap(); + let infer_crash = std::env::args().any(|f| f == "--infer"); + if infer_crash { + hopper::global_gadgets::get_mut_instance().build_arg_and_ret_graph(); + hopper::infer_crash(&file_name)?; + let path = Path::new("found_constraints"); + hopper::CONSTRAINTS.with(|c| c.borrow().save_to_file(path))?; + return Ok(()); + } + let minimize_input = std::env::args().any(|f| f == "--minimize"); + if minimize_input { + hopper::minimize_input(&file_name)?; + return Ok(()); + } + let mut cmd = hopper::ForkCmd::Review; + if std::env::args().any(|f| f == "--sanitize") { + cmd = hopper::ForkCmd::Sanitize; + } else if std::env::args().any(|f| f == "--execute") { + cmd = hopper::ForkCmd::Execute; + } + hopper::run_program(&file_name, cmd)?; + } + Ok(()) +} diff --git a/hopper-harness/src/bin/hopper-sanitizer.rs b/hopper-harness/src/bin/hopper-sanitizer.rs new file mode 100644 index 0000000..710d4da --- /dev/null +++ b/hopper-harness/src/bin/hopper-sanitizer.rs @@ -0,0 +1,580 @@ +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use eyre::{Context, ContextCompat}; +use hopper::{ + self, filter_function_constraint_with, ConstraintSig, Deserialize, FuzzStmt, + Fuzzer, RcIndex, SanitizeResult, Serialize, +}; + +pub fn main() -> eyre::Result<()> { + flexi_logger::Logger::try_with_env_or_str("info")?.start()?; + hopper_harness::hopper_extend(); + hopper::log!(info, "read constraints..."); + let mut fuzzer = hopper::create_fuzzer()?; + + if let Some(file_name) = std::env::args().nth(1) { + let path = PathBuf::from(file_name); + sanitize_crash(&path, &mut fuzzer, None, 0, 1)?; + return Ok(()); + } + + let minimized_crashes_dir = hopper::output_file_path(hopper::MINIMIZED_CRASHES_DIR); + if minimized_crashes_dir.exists() { + std::fs::remove_dir_all(&minimized_crashes_dir)?; + } + std::fs::create_dir(minimized_crashes_dir)?; + + let crashes = fuzzer + .depot + .crashes + .read_dir()? + .iter() + .filter(|x| x.extension().is_none()) + .cloned() + .collect(); + let crashes = sanitize_crash_by_rip(crashes)?; + let crashes = sanitize_grouped_crashes(crashes, &mut fuzzer)?; + let crashes = sanitize_crash_by_clang_sanitizer_pc(crashes)?; + let crashes = sanitize_variable_args_crash(crashes)?; + sanitize_uninfered_crashes(&crashes, &mut fuzzer)?; + classify_crashes_by_infer(&crashes)?; + Ok(()) +} + +// Remove the duplicated crashes by rip, and save results in unique_crashes. +fn sanitize_crash_by_rip(crashes: Vec) -> eyre::Result> { + hopper::log!(info, "Remove the duplicated crashes by rip.."); + let mut unique_programs: Vec = Vec::new(); + let mut rip_set: HashSet = HashSet::new(); + for crash_path in &crashes { + let crash_raw = std::fs::read_to_string(crash_path) + .with_context(|| format!("fail to read this path : {crash_path:?} to string."))?; + let rip = extract_program_rip(&crash_raw); + if let Some(rip) = rip { + if rip_set.contains(&rip) { + hopper::log!(warn, "duplicated crash filtered out by rip: {}", rip); + continue; + } + // rip == "0x0" may be many different cases + if rip != "0x0" { + rip_set.insert(rip); + } + } + unique_programs.push(crash_path.clone()); + } + hopper::log!( + info, + "sanitize {} crashes to : {} by rip", + crashes.len(), + unique_programs.len() + ); + Ok(unique_programs) +} + +// remove the duplicated crashes by pc of clang sanitizer. +fn sanitize_crash_by_clang_sanitizer_pc(crashes: Vec) -> eyre::Result> { + hopper::log!(info, "Remove the duplicated crashes by pc of clang sanitizer.."); + let mut fail_cnt = 0; + let mut unique_programs: Vec = Vec::new(); + let mut pc_set: HashSet = HashSet::new(); + for crash_path in &crashes { + let mut executable_path = PathBuf::from(crash_path); + executable_path.set_extension("out"); + if !executable_path.exists() { + fail_cnt += 1; + continue; + } + // hopper::log!(info, "{:?}", executable_path); + let hopper_out_dir = hopper::OUTPUT_DIR; + let binary = String::from(executable_path.to_str().context("path should be valid")?); + //let binary = String::from(".") + &binary; + let output = Command::new("setarch") + .arg("-R") + .arg(binary) + .env("LD_LIBRARY_PATH", hopper_out_dir) + .output() + .context("fail to execute the program")?; + let err_output = String::from_utf8_lossy(&output.stderr); + if let Some(pc) = extract_output_pc(&err_output) { + if pc_set.contains(&pc) { + hopper::log!(warn, "duplicated crash filtered out by pc: {}", pc); + } else { + unique_programs.push(crash_path.clone()); + pc_set.insert(pc); + } + continue; + } + unique_programs.push(crash_path.clone()); + hopper::log!(info, "not found a valid pc from this: {:#?}", err_output); + } + if fail_cnt == crashes.len() { + hopper::log!(warn, "skip duplicated cashes by clang sanitizer!"); + hopper::log!(warn, "please make sure your program are translated and compiled with AddressSanitizer correctly. hint: ./fuzz.sh hopper translate_crash"); + return Ok(crashes); + } + hopper::log!( + info, + "sanitize {} crashes to : {} by sanitizing pc", + crashes.len(), + unique_programs.len() + ); + Ok(unique_programs) +} + +fn sanitize_crash( + crash_path: &Path, + fuzzer: &mut Fuzzer, + minimized_crashes_holder: Option<&hopper::DepotDir>, + crash_index: usize, + total: usize, +) -> eyre::Result { + let crash_raw = std::fs::read_to_string(crash_path)?; + let mut crash_p = hopper::read_program(&crash_raw, false)?; + hopper::parse_program_extra(&crash_raw, &mut crash_p)?; + hopper::log!(info, "[{crash_index}/{total}]:id_{:06}", crash_p.id); + + // check + // 1. its ops is likely to FP + // 2. whether it violates the constraint + // 3. whether it is from another seed or succeeded in pilot + // check ops + + if let Some(fail_at) = crash_p.get_fail_stmt_index() { + crash_p.stmts.truncate(fail_at.get() + 1); + crash_p.check_ref_use()?; + hopper::log!(info, "minimized program: {}", crash_p.serialize()?); + } + // Refine the program again to see if it violates the latest constraint + let mut refine_ops = crash_p.refine_program()?; + // Eliminate invalidated ops + /* + let constraint_violated = refine_ops.iter().any(|op| { + matches!(op.op, hopper::MutateOperation::PointerGen { .. }) + || matches!(op.op, hopper::MutateOperation::InitOpaque { .. }) + }); + */ + if !refine_ops.is_empty() { + let status = fuzzer.executor.execute_program(&crash_p)?; + if status.is_normal() { + refine_ops.retain(|op| !op.key.is_released()); + hopper::log!( + warn, + "violate constraints! ops: {}", + refine_ops.serialize()? + ); + return Ok(false); + } + } + + let mut failed_holder = None; + for st in crash_p.stmts.iter() { + if let hopper::FuzzStmt::Call(call) = &st.stmt { + if call.failure { + failed_holder = Some((st.index.get(), call.fg.f_name.to_owned())); + break; + } + } + } + + if let Some((failed_at, failed_func_name)) = failed_holder { + if !filter_function_constraint_with(&failed_func_name, |c| c.can_succeed) { + hopper::log!(warn, "crash function is pilot-infer failed!"); + return Ok(false); + } + + // double check if it is crash + let mut num_fail = 0; + // hopper::log!(info, "re-check execution"); + for _ in 0..10 { + let status = fuzzer.executor.execute_program(&crash_p)?; + if status.is_crash() { + num_fail += 1; + } + } + if num_fail < 10 { + hopper::log!( + info, + "the program may success during re-running, {num_fail}/10" + ); + } + if num_fail == 0 { + hopper::log!(info, "double check crash fail: the program runs successful"); + return Ok(false); + } + + let skip_mutate = std::env::var("SKIP_MUTATE").is_ok(); + let mut succ_cnt = 0; + if !skip_mutate { + // try mutate the crash for 30 times and see how many times it succeeds + // hopper::log!(info, "mutate input"); + crash_p.update_weight(); + for is in &mut crash_p.stmts { + if let FuzzStmt::Load(load) = &mut is.stmt { + load.state.done_deterministic(); + } + } + let max = 10 * crash_p.stmts.len(); + for _ in 0..max { + let mut cloned_p = crash_p.clone(); + cloned_p.mutate_program_inputs()?; + let status = fuzzer.executor.execute_program(&cloned_p)?; + let last_index = fuzzer.observer.feedback.last_stmt_index(); + let is_effective_exec = last_index >= failed_at; + if status.is_normal() && is_effective_exec { + succ_cnt += 1; + hopper::log!(warn, "the input can be success after mutate input"); + break; + } + } + if succ_cnt == 0 { + hopper::log!( + warn, + "crash input always crash after mutate values in input" + ); + } + } + if skip_mutate || succ_cnt >= 0 { + let mut cov_p = crash_p.clone(); + cov_p.set_calls_track_cov(false); + cov_p.get_call_stmt_mut(failed_at).unwrap().track_cov = true; + let status = fuzzer.executor.execute_program(&cov_p)?; + if status.is_normal() { + hopper::log!(error, "status should not be normal"); + } + let path = fuzzer.observer.feedback.path.get_list(); + hopper::log!(debug, "path: {:?}", path); + let new_edges = fuzzer.observer.get_new_uniq_path(status); + if !new_edges.is_empty() { + hopper::log!(info, "found new edges, start sanitize"); + fuzzer.executor.sanitize_program(&crash_p)?; + let sanitize_result = SanitizeResult::conclusion(&crash_p)?; + hopper::log!(debug, "new edges: {:?}", new_edges); + fuzzer.observer.merge_coverage(&new_edges, status); + let file_name = crash_path.file_name().unwrap().to_str().unwrap(); + if let Some(minimized_crashes) = minimized_crashes_holder { + hopper::log!(info, "save {file_name} into minimized_dir."); + minimized_crashes.save_program_custom( + file_name, + &crash_p, + status, + Some(sanitize_result.to_string()), + )?; + } else { + hopper::log!(info, "Accepted."); + } + + // minimize + let mut min_p = crash_p.clone(); + // truncate stmts + min_p.set_calls_track_cov(false); + let mut has_min = false; + if let Some(crash_pos) = min_p.get_fail_stmt_index() { + if let Some(call) = min_p.get_call_stmt_mut(crash_pos.get()) { + eyre::ensure!(call.failure, "should be failure"); + call.ident = hopper::CallStmt::TARGET.to_string(); + call.track_cov = true; + } + let new_len = crash_pos.get() + 1; + if new_len < min_p.stmts.len() { + has_min = true; + } + min_p.stmts.truncate(new_len); + min_p.check_ref_use()?; + } + if fuzzer.minimize(&mut min_p, &status)? { + has_min = true; + } + if has_min { + hopper::log!(info, "found minimized crash input"); + let min_file_name = format!("{file_name}_min"); + let min_sanitize_result = SanitizeResult::conclusion(&min_p)?; + if let Some(minimized_crashes) = minimized_crashes_holder { + minimized_crashes.save_program_custom( + &min_file_name, + &min_p, + status, + Some(min_sanitize_result.to_string()), + )?; + } else { + hopper::log!(info, "minimized program: {}", min_p.serialize_all()?); + hopper::log!(info, "sanitizer: {}", min_sanitize_result.to_string()); + } + } + return Ok(true); + } else { + hopper::log!(warn, "duplicated crash"); + } + } + } else { + hopper::log!( + warn, + "failed at non-call statement: {}", + crash_p.serialize()? + ); + } + Ok(false) +} + + +pub fn extract_program_rip(buf: &str) -> Option { + //memory 0x402000 and RIP 0x7fc72eff3bb6 + let re = regex::Regex::new(r"0x\d+ and RIP (0x[0-9a-z]+) ").unwrap(); + let res = re.captures(buf); + if let Some(cap) = res { + let rip = cap.get(1); + if let Some(rip) = rip { + return Some(rip.as_str().to_string()); + } + } + None +} + +pub fn extract_output_pc(buf: &str) -> Option { + let re = regex::Regex::new(r"pc (0x[0-9a-z]+) ").unwrap(); + let res = re.captures(buf); + if let Some(cap) = res { + let pc = cap.get(1); + if let Some(pc) = pc { + return Some(pc.as_str().to_string()); + } + } + None +} + +/// extract violate constraints from raw str program. +pub fn extract_violate_constraints(buf: &str) -> eyre::Result> { + //* Voilate constraint: png_convert_to_rfc1123_buffer[$0][[&]] = SetLength${ len: 4, }, png_convert_to_rfc1123_buffer[$0][[&]] = SetLength${ len: 64, }, + let mut constraint_vec: Vec = Vec::new(); + let mut buf_de = hopper::Deserializer::new(buf, None); + if buf_de.next_token_until("Violate constraint: ").is_err() { + return Ok(constraint_vec); + } + while buf_de.peek_char().is_some() { + let curr_buf = buf_de.buf; + let mut i = 0; + let mut bracket_cnt = 0; + while i < curr_buf.len() { + if let Some(c) = curr_buf.chars().nth(i) { + if c == '{' { + bracket_cnt += 1; + } else if c == '}' { + bracket_cnt -= 1; + } else if c == ',' && bracket_cnt == 0 { + break; + } + } + i += 1; + } + let constraint_str = &curr_buf[0..i]; + buf_de.buf = curr_buf[i + 1..].trim(); + let mut de = hopper::Deserializer::new(constraint_str, None); + let constraint_sig = hopper::ConstraintSig::deserialize(&mut de)?; + constraint_vec.push(constraint_sig); + } + Ok(constraint_vec) +} + +/// classify crashes into the "infered" and "uninfered" groups, according whether they have infered violate constraints. +fn classify_crashes_by_infer(crashes: &Vec) -> eyre::Result<()> { + hopper::log!(info, "Save crashes to minimized crashes directory ..."); + let infered_dir = hopper::output_file_path(hopper::MINIMIZED_CRASHES_DIR).join("infered"); + if !infered_dir.exists() { + std::fs::create_dir(&infered_dir)?; + } + let uninfered_dir = hopper::output_file_path(hopper::MINIMIZED_CRASHES_DIR); + let mut infered_crashes: Vec = Vec::new(); + let mut uninfered_crashes: Vec = Vec::new(); + for crash_path in crashes { + hopper::log!(trace, "current path: {:?}", crash_path); + let crash_raw = std::fs::read_to_string(crash_path) + .unwrap_or_else(|_| panic!("fail to read this path : {crash_path:?} to string.")); + let constraint_sigs = extract_violate_constraints(&crash_raw)?; + if constraint_sigs.is_empty() { + uninfered_crashes.push(crash_path.clone()); + } else { + infered_crashes.push(crash_path.clone()); + } + } + save_crashes_to_dir(&infered_crashes, infered_dir)?; + save_crashes_to_dir(&uninfered_crashes, uninfered_dir)?; + hopper::log!( + info, + "classify {} crashes into: {} infered and {} uninfered", + crashes.len(), + infered_crashes.len(), + uninfered_crashes.len() + ); + Ok(()) +} + +/// group crashed by failed functions. +fn group_crashes_by_failed_call( + crashes: &Vec, +) -> eyre::Result>> { + let mut grouped_crash: HashMap> = HashMap::new(); + for crash_path in crashes { + hopper::log!(trace, "{:?}", crash_path); + let raw_program = std::fs::read_to_string(crash_path)?; + let program = hopper::read_program(&raw_program, false)?; + let fail_at = program.get_fail_stmt_index(); + if let Some(fail_at) = fail_at { + if let Some(call) = program.get_call_stmt(fail_at.get()) { + let call_name = &call.name; + if let Some(g) = grouped_crash.get_mut(call_name) { + g.push(crash_path.clone()); + } else { + grouped_crash.insert(call_name.clone(), vec![crash_path.clone()]); + } + } else { + hopper::log!(error, "cannot retrieve the failed call at {:?}", fail_at); + } + } else { + hopper::log!( + error, + "cannot find the failed call! crash_path: {:?}", + crash_path + ); + } + } + Ok(grouped_crash) +} + +/// remove the duplicated crashes from crash groups. +fn sanitize_grouped_crashes( + crashes: Vec, + fuzzer: &mut Fuzzer, +) -> eyre::Result> { + hopper::log!(info, "Remove the duplicated crashes from crash groups ..."); + let grouped_crashes = group_crashes_by_failed_call(&crashes)?; + let mut unique_programs: Vec = Vec::new(); + for (group, crashes) in &grouped_crashes { + let mut trace_vec = Vec::new(); + for crash_path in crashes { + let crash_raw = std::fs::read_to_string(crash_path)?; + let mut crash_program = hopper::read_program(&crash_raw, false)?; + crash_program.set_calls_track_cov(false); + // set the fail call's track_cov + for is in crash_program.stmts.iter_mut() { + if let hopper::FuzzStmt::Call(call) = &mut is.stmt { + if &call.name == group { + call.track_cov = true; + } + } + } + let _status = fuzzer.executor.execute_program(&crash_program)?; + let path_list = fuzzer.observer.feedback.path.get_list(); + hopper::log!(trace, "this execution paths: {:?}", path_list); + trace_vec.push((crash_path.clone(), path_list)); + } + // select the max_len trace, if there is other has new path as it, maybe is a fresh crash. + trace_vec.sort_by(|a, b| b.1.len().cmp(&a.1.len())); + let max_trace = &trace_vec[0]; + let mut trace_set: HashSet = max_trace.1.iter().map(|x| x.0).collect(); + for (crash_path, trace) in &trace_vec[1..] { + let has_new: Vec<&(usize, hopper::BucketType)> = trace + .iter() + .filter(|x| !trace_set.contains(&x.0)) + .collect(); + hopper::log!(trace, "new_trace: {:?}", has_new); + if !has_new.is_empty() { + has_new.iter().for_each(|x| { + trace_set.insert(x.0); + }); + unique_programs.push(crash_path.clone()); + continue; + } + hopper::log!(warn, "duplicated crashes `{crash_path:?}` found in groups {group}") + } + unique_programs.push(max_trace.0.clone()); + } + let len: usize = grouped_crashes.values().map(|x| x.len()).sum(); + hopper::log!( + info, + "sanitize {} crashes to : {} by grouped sanitize", + len, + unique_programs.len() + ); + Ok(unique_programs) +} + +/// sanitize the crashes caused by variable length arugments. +fn sanitize_variable_args_crash(crashes: Vec) -> eyre::Result> { + hopper::log!(info, "Sanitize the crashes caused by variable length arugments ..."); + let mut sanitized_programs: Vec = Vec::new(); + let len = crashes.len(); + for crash_path in crashes { + let raw_program = std::fs::read_to_string(&crash_path)?; + let program = hopper::read_program(&raw_program, false)?; + if let Some(call) = program.get_fail_call_stmt() { + let type_names = call.fg.arg_types; + let is_variadic = hopper::is_variadic_function(type_names); + if is_variadic { + hopper::log!( + warn, + "a crash caused by variable arguments is filtered out." + ); + continue; + } + } + sanitized_programs.push(crash_path.clone()); + } + hopper::log!( + info, + "sanitize {} crashes to : {} by filtering out the variable argument crashes.", + len, + sanitized_programs.len() + ); + Ok(sanitized_programs) +} + +/// infer the crashes again, mainly aim to sanitize the previous uninfered (opaque pointer) crashes. +fn sanitize_uninfered_crashes(crashes: &Vec, fuzzer: &mut Fuzzer) -> eyre::Result<()> { + hopper::log!(info, "Infer the crashes again ..."); + for crash_path in crashes { + let crash_raw = std::fs::read_to_string(crash_path)?; + if !extract_violate_constraints(&crash_raw)?.is_empty() { + continue; + } + let program = hopper::read_program(&crash_raw, false)?; + // exclude those has been refined. + /* + { + let mut p = program.clone(); + let ops = p.refine_program()?; + if !ops.is_empty() { + let mut buf = program.serialize_all()?; + let op_content = format!("Refine by: {}", ops.serialize()?); + buf.push_str(&op_content); + std::fs::write(crash_path, buf)?; + hopper::log!(warn, "{crash_path:?} is refined by :{op_content}."); + return Ok(()); + } + } + */ + let new_constraints = fuzzer.crash_infer(&program).with_context(|| { + format!("update constraint failed: {}", program.serialize().unwrap()) + })?; + if !new_constraints.is_empty() { + let mut sanitize_result = hopper::SanitizeResult::default(); + sanitize_result.add_violated_constraints(&new_constraints)?; + let mut buf = program.serialize_all()?; + buf.push_str(&sanitize_result.to_string()); + std::fs::write(crash_path, buf)?; + hopper::log!(warn, "{:?} uninfered crash is now infered.", crash_path); + } + } + Ok(()) +} + +fn save_crashes_to_dir(crashes: &Vec, save_dir: PathBuf) -> eyre::Result<()> { + for crash_path in crashes { + let basename = crash_path.file_name().unwrap(); + let mut save_path = PathBuf::from(&save_dir); + save_path.push(basename); + let buf = std::fs::read(crash_path)?; + std::fs::write(save_path, buf)?; + } + Ok(()) +} diff --git a/hopper-harness/src/bin/hopper-slice.rs b/hopper-harness/src/bin/hopper-slice.rs new file mode 100644 index 0000000..b1f585b --- /dev/null +++ b/hopper-harness/src/bin/hopper-slice.rs @@ -0,0 +1,44 @@ +use clap::Parser; +use hopper::{effective::*, FuzzStmt, Serialize}; + +/// Hopper - fuzz libraries fully automatically +/// hopper-slice is a tool for debugging slice issues +#[derive(Parser, Debug)] +#[clap(name = "hopper-slice")] +#[clap(version = "1.0.0", author = "Tencent")] +pub struct SliceConfig { + /// Input program + #[clap(long, value_parser)] + pub input: String, + + /// Call index + #[clap(long, value_parser)] + pub index: usize, + + /// Argument position + #[clap(long, value_parser)] + pub arg: Option, +} + +fn main() -> eyre::Result<()> { + color_eyre::install()?; + hopper_harness::hopper_extend(); + flexi_logger::Logger::try_with_env_or_str("trace")?.start()?; + let config = SliceConfig::parse(); + log::info!("config: {:?}", config); + // hopper::check_gadgets().unwrap(); + let buf = std::fs::read_to_string(&config.input)?; + let mut program = hopper::read_program(&buf, false)?; + hopper::parse_program_extra(&buf, &mut program)?; + let call_i = config.index; + if let Some(arg_pos) = config.arg { + if let FuzzStmt::Call(call) = &program.stmts[call_i].stmt { + let p = slice_arg(&program, call, call_i, arg_pos)?; + log::info!("sliced: {}", p.serialize().unwrap()); + } + } else { + hopper::Fuzzer::collect_effective_args_in_call(&program, call_i)?; + // log_effective_args(); + } + Ok(()) +} diff --git a/hopper-harness/src/bin/hopper-translate.rs b/hopper-harness/src/bin/hopper-translate.rs new file mode 100644 index 0000000..a4a1344 --- /dev/null +++ b/hopper-harness/src/bin/hopper-translate.rs @@ -0,0 +1,110 @@ +use clap::Parser; +use hopper::Translate; +use std::io::Write; + +/// Hopper - fuzz libraries fully automatically +#[derive(Parser, Debug)] +#[clap(name = "hopper-translate")] +#[clap(version = "1.0.0", author = "Tencent")] +pub struct TranslateConfig { + /// Path of header file of library + #[clap(long, value_parser)] + pub header: String, + + /// Output directory of harness + #[clap(long, value_parser)] + pub input: String, + + /// Output directory of harness + #[clap(long, value_parser)] + pub output: Option, +} + +fn main() -> eyre::Result<()> { + color_eyre::install()?; + hopper_harness::hopper_extend(); + flexi_logger::Logger::try_with_env_or_str("trace")?.start()?; + let config = TranslateConfig::parse(); + log::info!("config: {:?}", config); + // hopper::check_gadgets().unwrap(); + let buf = std::fs::read_to_string(&config.input)?; + let program = hopper::read_program(&buf, false)?; + let out = program.translate_to_c()?; + let include_header = format!("#include \"{}\"\n", config.header); + let mut out = include_header + &out; + fix_error(&mut out, &config.header)?; + log::info!("{}", out); + let f_name = if let Some(out_f) = config.output { + out_f + } else { + config.input + ".c" + }; + let mut f = std::fs::File::create(&f_name)?; + f.write_all(out.as_bytes())?; + log::info!("please run: gcc -g -I. -L. -lyourlib {}", f_name); + Ok(()) +} + +fn fix_error(code: &mut String, header: &str) -> eyre::Result<()> { + use std::io::BufRead; + static TMP_CODE_FILE: &str = "/tmp/hopper_tmp.c"; + static TMP_OUT_FILE: &str = "/tmp/hopper_tmp.out"; + log::info!("try to fix struct type error.."); + let header_path = std::path::Path::new(header); + std::fs::write(TMP_CODE_FILE, &code)?; + let mut args = vec![ + TMP_CODE_FILE, + "-g", + "-c", + "-I.", + "-o", + TMP_OUT_FILE, + ]; + if let Some(header_dir) = header_path.parent() { + args.push("-I"); + args.push(header_dir.to_str().unwrap()); + if let Some(hh_dir) = header_dir.parent() { + args.push("-I"); + args.push(hh_dir.to_str().unwrap()); + } + } + if let Some(include_search_paths) = option_env!("HOPPER_INCLUDE_SEARCH_PATH") { + let list = include_search_paths.split(':'); + for item in list { + args.push("-I"); + args.push(item); + } + } + let ret = std::process::Command::new("clang") + .args(args) + .output()?; + let mut struct_list = vec![]; + for line in ret.stdout.lines() { + let line = line?; + if line.contains(r"use \xe2\x80\x98struct\xe2\x80\x99 keyword to refer to the") || line.contains(r"unknown type name") { + struct_list.push(get_struct_name(&line)?); + } + } + if struct_list.is_empty() { + return Ok(()); + } + for s in struct_list { + *code = code.replace(&format!("{s} "), &format!("struct {s} ")); + log::warn!("replace `{s}` to `struct {s}`"); + } + + Ok(()) +} + + +fn get_struct_name(line: &str) -> eyre::Result { + static LEFT_MARK: &str = r"\xe2\x80\x98"; + static RIGHT_MAKR: &str = r"\xe2\x80\x99"; + if let Some(l) = line.find(LEFT_MARK) { + let rest = &line[l + LEFT_MARK.len() ..]; + if let Some(r) = line.find(RIGHT_MAKR) { + return Ok(rest[..r].to_string()); + } + } + eyre::bail!("Fail to find struct name"); +} \ No newline at end of file diff --git a/hopper-harness/src/lib.rs b/hopper-harness/src/lib.rs new file mode 100644 index 0000000..3a874c3 --- /dev/null +++ b/hopper-harness/src/lib.rs @@ -0,0 +1,9 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +include!(concat!(env!("OUT_DIR"), "/fuzz_extend.rs")); + +pub fn hopper_extend() { + log::debug!("hopper extend loaded"); +} diff --git a/hopper-instrument/e9-mode/README.md b/hopper-instrument/e9-mode/README.md new file mode 100644 index 0000000..726817b --- /dev/null +++ b/hopper-instrument/e9-mode/README.md @@ -0,0 +1,20 @@ + +## E9Patch +- *E9* mode is using [e9pacth](https://github.com/GJDuck/e9patch) for instrumentation. You can read its [paper](https://comp.nus.edu.sg/~gregory/papers/e9patch.pdf) and [documentations](https://github.com/GJDuck/e9patch/tree/master/doc). + + +## Our plugins +- `hopper-e9-plugin.cpp`: from [E9AFL](https://github.com/GJDuck/e9afl) for branch coverage collection. +- `hopper-instr-plugin.cpp`: our plugin for tracing cmp instructions. + +## Test e9 plugin + +- Print intermidiate content with JSON format. +``` +/root/hopper/install/e9tool --format='json' -o /root/hopper/testsuite/ctest/libctest_instr.so -M 'plugin("/root/hopper/install/hopper-instr-plugin.so").match()' -P 'plugin("/root/hopper/install/hopper-instr-plugin.so").patch()' -- /root/hopper/testsuite/ctest/libctest.so +``` + +- Run pacth manually. +``` +E9AFL_PATH=/root/hopper/install /root/hopper/install/e9tool -o /root/hopper/testsuite/ctest/output/libctest_cov.so -M 'plugin("/root/hopper/install/hopper-e9-plugin.so").match()' -P 'plugin("/root/hopper/install/hopper-e9-plugin.so").patch()' -M 'plugin("/root/hopper/install/hopper-instr-plugin.so").match()' -P 'plugin("/root/hopper/install/hopper-instr-plugin.so").patch()' -- /root/hopper/testsuite/ctest/libctest.so +``` diff --git a/hopper-instrument/e9-mode/build.sh b/hopper-instrument/e9-mode/build.sh new file mode 100755 index 0000000..26e87c7 --- /dev/null +++ b/hopper-instrument/e9-mode/build.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# +# Copyright (C) 2021 National University of Singapore +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if [ -t 1 ] +then + RED="\033[31m" + GREEN="\033[32m" + YELLOW="\033[33m" + BOLD="\033[1m" + OFF="\033[0m" +else + RED= + GREEN= + YELLOW= + BOLD= + OFF= +fi + +set -e + +VERSION=39238830c3ba90d80b6996f05c872a7f8f042003 + +SOURCE_DIR=$(pwd) +PREFIX=${PREFIX:-install} + +echo "PREFIX: $PREFIX" +echo "PWD: $SOURCE_DIR" +mkdir -p $PREFIX +mkdir -p $PREFIX/tmp + +# STEP (1): install e9patch if necessary: +if [ ! -x $PREFIX/tmp/e9patch-$VERSION/e9patch ] +then + if [ ! -f $PREFIX/tmp/e9patch-$VERSION.zip ] + then + echo -e "${GREEN}$0${OFF}: downloading e9patch-$VERSION.zip..." + wget -O $PREFIX/tmp/e9patch-$VERSION.zip https://github.com/GJDuck/e9patch/archive/$VERSION.zip + fi + echo -e "${GREEN}$0${OFF}: extracting e9patch-$VERSION.zip..." + cd $PREFIX/tmp + unzip -q e9patch-$VERSION.zip + echo -e "${GREEN}$0${OFF}: building e9patch..." + cd e9patch-$VERSION + echo -e "${GREEN}$0${OFF}: patch e9patch..." + patch -p1 <$SOURCE_DIR/e9patch.diff + ./build.sh + cp e9patch ../../ + cp e9tool ../../ + echo -e "${GREEN}$0${OFF}: e9patch has been built..." +else + echo -e "${GREEN}$0${OFF}: using existing e9patch..." +fi + +# STEP (2): build the E9Tool plugin: +# build the E9Tool plugin for ELF: +cd $SOURCE_DIR +echo -e "${GREEN}$0${OFF}: building the hopper plugin..." +echo "g++ -std=c++11 -fPIC -shared -o hopper-e9-plugin.so -O2 hopper-e9-plugin.cpp -I ." +g++ -std=c++11 -fPIC -shared -o $PREFIX/hopper-e9-plugin-elf.so -O2 hopper-e9-plugin.cpp \ + -I $PREFIX/tmp/e9patch-$VERSION/src/e9tool/ +strip $PREFIX/hopper-e9-plugin-elf.so +chmod a-x $PREFIX/hopper-e9-plugin-elf.so +# build the E9Tool plugin for PE: +g++ -std=c++11 -fPIC -shared -o $PREFIX/hopper-e9-plugin-pe.so -O2 hopper-e9-plugin.cpp \ + -I $PREFIX/tmp/e9patch-$VERSION/src/e9tool/ -DWINDOWS +strip $PREFIX/hopper-e9-plugin-pe.so +chmod a-x $PREFIX/hopper-e9-plugin-pe.so + +# build cmp plugin +# build cmp plugin for ELF: +g++ -std=c++11 -fPIC -shared -o $PREFIX/hopper-instr-plugin-elf.so -O2 hopper-instr-plugin.cpp \ + -I $PREFIX/tmp/e9patch-$VERSION/src/e9tool/ +strip $PREFIX/hopper-instr-plugin-elf.so +chmod a-x $PREFIX/hopper-instr-plugin-elf.so +# build cmp plugin for PE +g++ -std=c++11 -fPIC -shared -o $PREFIX/hopper-instr-plugin-pe.so -O2 hopper-instr-plugin.cpp \ + -I $PREFIX/tmp/e9patch-$VERSION/src/e9tool/ -DWINDOWS +strip $PREFIX/hopper-instr-plugin-pe.so +chmod a-x $PREFIX/hopper-instr-plugin-pe.so + +# STEP (3): build the runtime: +# build the runtime for ELF +echo -e "${GREEN}$0${OFF}: building the hopper runtime..." +echo -e "${PREFIX}/tmp/e9patch-${VERSION}/e9compile.sh hopper-e9-rt.c -I ${PREFIX}/tmp/e9patch-${VERSION}/examples/ \ + -I ${PREFIX}/tmp/e9patch-${VERSION}/src/e9patch/ -DNO_GLIBC=1" +$PREFIX/tmp/e9patch-$VERSION/e9compile.sh hopper-e9-rt.c -I $PREFIX/tmp/e9patch-$VERSION/examples/ \ + -I $PREFIX/tmp/e9patch-$VERSION/src/e9patch/ -DNO_GLIBC=1 +rm hopper-e9-rt.o +chmod a-x hopper-e9-rt +mv hopper-e9-rt $PREFIX/hopper-e9-rt-elf +# build the runtime for PE: +echo -e "${GREEN}$0${OFF}: remember to change HOPPER_PATH_SHMID and HOPPER_INSTR_SHMID in windows.c" +$PREFIX/tmp/e9patch-$VERSION/e9compile.sh hopper-e9-rt.c -I $PREFIX/tmp/e9patch-$VERSION/examples/ \ + -I $PREFIX/tmp/e9patch-$VERSION/src/e9patch/ -DWINDOWS -mabi=ms +rm hopper-e9-rt.o +chmod a-x hopper-e9-rt +mv hopper-e9-rt $PREFIX/hopper-e9-rt-pe + +# STEP (4): build the driver: +# g++ -std=c++11 -fPIC -pie -O2 -o e9hopper e9hopper.cpp +# strip e9hopper + +echo -e "${GREEN}$0${OFF}: done!" +echo diff --git a/hopper-instrument/e9-mode/config.h b/hopper-instrument/e9-mode/config.h new file mode 100644 index 0000000..1932f6a --- /dev/null +++ b/hopper-instrument/e9-mode/config.h @@ -0,0 +1,44 @@ +#ifndef _HAVE_COMMON_CONFIG_H +#define _HAVE_COMMON_CONFIG_H + +#ifndef MAP_SIZE_POW2 +#define MAP_SIZE_POW2 16 +#endif +#define MAP_SIZE ((size_t)1 << MAP_SIZE_POW2) + +// coverage +#ifndef WINDOWS +#define AREA_BASE 0x200000 +#else +#define AREA_BASE 0x47E00000 +#endif +#define AREA_SIZE MAP_SIZE +#define AREA_POINTER ((uint8_t *)AREA_BASE) + +// cmp and mem +#define INSTR_AREA (AREA_BASE + 0x100000) +#define CMP_AREA INSTR_AREA +#define CMP_AREA_SIZE 0x80000 +#define CMP_LIST_SIZE (CMP_AREA_SIZE / 32) +#define MEM_AREA (INSTR_AREA + CMP_AREA_SIZE) +#define MEM_AREA_SIZE 0x30000 +#define MEM_LIST_SIZE (MEM_AREA_SIZE / 24) +#define INSTR_AREA_SIZE (CMP_AREA_SIZE + MEM_AREA_SIZE) +#define INFO_AREA (INSTR_AREA + INSTR_AREA_SIZE) +#define INSTR_INFO_SIZE 64 +#define INSTR_ALL_SIZE (INSTR_AREA_SIZE + INSTR_INFO_SIZE) +#define INSTR_AREA_POINTER ((uint64_t *)INSTR_AREA) + +// for canary +#define CANARY_PTR (INSTR_AREA + 0x100000) +#define CANARY_AREA_SIZE 0x100000 + +#ifndef WINDOWS +#define arg1 rdi +#define arg2 rsi +#else +#define arg1 rcx +#define arg2 rdx +#endif + +#endif /* ! _HAVE_DEFS_H */ diff --git a/hopper-instrument/e9-mode/e9patch.diff b/hopper-instrument/e9-mode/e9patch.diff new file mode 100644 index 0000000..cd2d635 --- /dev/null +++ b/hopper-instrument/e9-mode/e9patch.diff @@ -0,0 +1,76 @@ +diff -ruNa ./src/e9patch/e9loader_pe.cpp ./src/e9patch_patch/e9loader_pe.cpp +--- ./src/e9patch/e9loader_pe.cpp 2022-07-06 17:04:31.091502513 +0800 ++++ ./src/e9patch2/e9loader_pe.cpp 2022-07-06 17:06:20.076026514 +0800 +@@ -486,7 +486,7 @@ + kernel32 = (const uint8_t *)entry->DllBase; + else if (e9wcscasecmp(name->Buffer, L"ntdll.dll") == 0) + ntdll = (const uint8_t *)entry->DllBase; +- else if (e9wcscasecmp(name->Buffer, L"user32.dll") == 0) ++ else if (e9wcscasecmp(name->Buffer, L"msvcrt.dll") == 0) + user32 = (const uint8_t *)entry->DllBase; + curr = curr->Flink; + } +@@ -516,8 +516,8 @@ + config->magic[4] != 'T' || config->magic[5] != 'C' || + config->magic[6] != 'H' || config->magic[7] != '\0') + e9error("missing \"E9PATCH\" magic number"); +- if (config->inits != 0x0) +- e9error("custom initialization functions are not-yet-implemented"); ++ // if (config->inits != 0x0) ++ // e9error("custom initialization functions are not-yet-implemented"); + if (config->finis != 0x0) + e9error("custom finalization functions are not-yet-implemented"); + if (config->mmap != 0x0) +@@ -641,7 +641,18 @@ + uint32_t old_prot; + (void)VirtualProtect(base, config->size, PAGE_EXECUTE_READ, &old_prot); + } +- ++ ++ if (config->inits != 0x0) ++ { ++ const intptr_t *inits = (const intptr_t *)(loader_base + config->inits); ++ typedef void (*init_t)(const struct e9_config_s *config); ++ for (uint32_t i = 0; i < config->num_inits; i++) ++ { ++ init_t init = (init_t)(inits[i]+image_base); ++ init(config); ++ } ++ } ++ + return entry; + } + +diff -ruNa ./src/e9patch/e9pe.cpp ./src/e9patch2/e9pe.cpp +--- ./src/e9patch/e9pe.cpp 2022-07-06 17:04:31.091502513 +0800 ++++ ./src/e9patch2/e9pe.cpp 2022-07-06 17:06:20.076026514 +0800 +@@ -384,6 +384,16 @@ + memcpy(data + size, e9loader_pe_bin, sizeof(e9loader_pe_bin)); + size += sizeof(e9loader_pe_bin); + ++ config->inits = (B->inits.size() > 0? (uint32_t)(size - config_offset): 0); ++ for (auto init: B->inits) ++ { ++ intptr_t addr = BASE_ADDRESS(init); ++ addr |= (IS_ABSOLUTE(init)? E9_ABS_ADDR: 0); ++ memcpy(data + size, &addr, sizeof(addr)); ++ size += sizeof(addr); ++ config->num_inits++; ++ } ++ + uint32_t loader_virtual_size = (uint32_t)(size - config_offset); + size = ALIGN(size, file_align); + uint32_t loader_disk_size = (uint32_t)(size - config_offset); +@@ -506,9 +516,9 @@ + warning("ignoring `--loader-phdr' option for Windows PE binary"); + if (option_loader_static_set) + warning("ignoring `--loader-static' option for Windows PE binary"); +- if (B->inits.size() > 0) +- error("initialization routines are non-yet-implemented for " +- "Windows PE binaries"); ++ // if (B->inits.size() > 0) ++ // error("initialization routines are non-yet-implemented for " ++ // "Windows PE binaries"); + if (B->finis.size() > 0) + error("finalization routines are non-yet-implemented for " + "Windows PE binaries"); diff --git a/hopper-instrument/e9-mode/hopper-e9-plugin.cpp b/hopper-instrument/e9-mode/hopper-e9-plugin.cpp new file mode 100644 index 0000000..2d162b1 --- /dev/null +++ b/hopper-instrument/e9-mode/hopper-e9-plugin.cpp @@ -0,0 +1,744 @@ +/* + * ___ _ _____ _ + * ___ / _ \ / \ | ___| | + * / _ \ (_) |/ _ \ | |_ | | + * | __/\__, / ___ \| _| | |___ + * \___| /_/_/ \_\_| |_____| + * + * Copyright (C) 2021 National University of Singapore + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "e9plugin.h" +#include "config.h" + +using namespace e9tool; +int num_bb = 0; +int num_bad_bb = 0; +int num_opt_bb = 0; + +/* + * Options. + */ +enum Option +{ + OPTION_NEVER, + OPTION_DEFAULT, + OPTION_ALWAYS +}; +static Option option_debug = OPTION_DEFAULT; +static Option option_instrument = OPTION_DEFAULT; +static Option option_Oselect = OPTION_DEFAULT; +static Option option_Oblock = OPTION_DEFAULT; +unsigned int inst_ratio = 100; +unsigned int map_size_pow2 = MAP_SIZE_POW2; +unsigned int map_size_mask = (1 << map_size_pow2) - 1; + +enum Counter +{ + COUNTER_CLASSIC, + COUNTER_NEVER_ZERO, + COUNTER_SATURATED +}; + +static Option parseOption(const char *str) +{ + if (strcmp(str, "never") == 0) + return OPTION_NEVER; + if (strcmp(str, "default") == 0) + return OPTION_DEFAULT; + if (strcmp(str, "always") == 0) + return OPTION_ALWAYS; + error("bad option value \"%s\"; expected one of {\"never\", \"default\", " + "\"always\"}", str); +} + +static Counter parseCounter(const char *str) +{ + if (strcmp(str, "classic") == 0) + return COUNTER_CLASSIC; + if (strcmp(str, "neverzero") == 0) + return COUNTER_NEVER_ZERO; + if (strcmp(str, "saturated") == 0) + return COUNTER_SATURATED; + error("bad counter value \"%s\"; expected one of {\"classic\", \"neverzero\", " + "\"saturated\"}", str); +} + +/* + * CFG + */ +struct BasicBlock +{ + std::vector preds; // Predecessor BBs + std::vector succs; // Successor BBs + intptr_t instrument = -1; // Instrumentation point + int id = -1; // ID + bool optimized = false; // Optimize block? + bool bad = false; // Bad block? +}; +typedef std::map CFG; +#define BB_INDIRECT (-1) + +/* + * Paths + */ +typedef std::map Paths; + +/* + * All instrumentation points. + */ +static std::set instrument; + +/* + * Initialization. + */ +extern void *e9_plugin_init(const Context *cxt) +{ + // Make seed depend on filename. + unsigned seed = 0; + const char *filename = getELFFilename(cxt->elf); + for (int i = 0; filename[i] != '\0'; i++) + seed = 101 * seed + (unsigned)filename[i]; + srand(seed); + + const int32_t stack_adjust = 0x4000; + const int32_t afl_rt_ptr = 0x50000000; + const int32_t afl_area_ptr = AREA_BASE; + +#ifndef WINDOWS + // Reserve memory used by the afl_area_ptr: + sendReserveMessage(cxt->out, afl_area_ptr, AREA_SIZE, /*absolute=*/true); +#endif + + const char *str = nullptr; + std::string option_path("."); + Counter option_counter = COUNTER_CLASSIC; + if ((str = getenv("E9AFL_COUNTER")) != nullptr) + option_counter = parseCounter(str); + if ((str = getenv("E9AFL_DEBUG")) != nullptr) + option_debug = parseOption(str); + if ((str = getenv("E9AFL_INSTRUMENT")) != nullptr) + option_instrument = parseOption(str); + if ((str = getenv("E9AFL_OBLOCK")) != nullptr) + option_Oblock = parseOption(str); + if ((str = getenv("E9AFL_OSELECT")) != nullptr) + option_Oselect = parseOption(str); + if ((str = getenv("E9AFL_PATH")) != nullptr) + option_path = str; + if (option_instrument == OPTION_NEVER) + return nullptr; + if (option_Oblock == OPTION_ALWAYS) + warning("always removing AFL instrumentation for bad blocks; coverage " + "may be incomplete"); + if ((str = getenv("HOPPER_INST_RATIO")) != nullptr) { + sscanf(str, "%u", &inst_ratio); + if (inst_ratio < 0 || inst_ratio > 100) + inst_ratio = 100; + } + if ((str = getenv("HOPPER_MAP_SIZE_POW2")) != nullptr) { + sscanf(str, "%u", &map_size_pow2); + if (map_size_pow2 < 16) map_size_pow2 = 16; + if (map_size_pow2 > 20) map_size_pow2 = 20; + map_size_mask = (1 << map_size_pow2) - 1; + } + warning("inst_ratio: %d, map_size_pow2: %d", inst_ratio, map_size_pow2); + + // Send the AFL runtime (if not shared object): + /* + std::string path(option_path); + path += "/hopper-e9-rt"; + const ELF *rt = parseELF(path.c_str(), afl_rt_ptr); + sendELFFileMessage(cxt->out, rt); + */ + + // Send the AFL instrumentation: + // + // Save state: + // + // lea -0x4000(%rsp),%rsp + // push %rax + // seto %al + // lahf + // push %rax + // + std::stringstream code; + code << 0x48 << ',' << 0x8d << ',' << 0xa4 << ',' << 0x24 << ',' + << "{\"int32\":" << -stack_adjust << "},"; + code << 0x50 << ','; + code << 0x0f << ',' << 0x90 << ',' << 0xc0 << ','; + code << 0x9f << ','; + code << 0x50 << ','; + + // AFL instrumentation: +#ifndef WINDOWS + // mov %fs:0x48,%eax // mov prev_loc,%eax + // const unsigned TLS = 0x4c; // tcbhead_t.__glibc_unused1 + // code << 0x64 << ',' << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' + // << TLS << ',' << 0x00 << ',' << 0x00 << ',' << 0x00 << ','; + // code << 0x35 << ',' << "\"$curr_loc\"" << ','; + // movl %ds:0x3b0100, %eax + code << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' << 0x00 << ',' + << 0x01 << ',' << 0x3b << ',' << 0x00 << ','; +#else + //// mov 0x47ff2000,%eax // mov prev_loc,%eax + code << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' + << 0x00 << ',' << 0x20 << ',' << 0xff << ',' << 0x47 << ','; +#endif + // cmp $0xFFFFFFFF, %eax + // je .Lok + // xor $curr_loc,%eax + code << 0x83 << ',' << 0xf8 << ',' << 0xff << ','; + code << 0x74 << ",{\"rel8\":\".Lok\"},"; + code << 0x35 << ',' << "\"$curr_loc\"" << ','; + +#ifndef WINDOWS + // push %rbx + code << 0x53 << ','; + // mov %fs:0x58,%ebx // mov context,%ebx + // code << 0x64 << ',' << 0x8b << ',' << 0x1c << ',' << 0x25 << ',' + // << 0x58 << ',' << 0x00 << ',' << 0x00 << ',' << 0x00 << ','; + // mov %ds:0x3b0110,%ebx // mov context,%ebx + code << 0x8b << ',' << 0x1c << ',' << 0x25 << ',' << 0x10 << ',' + << 0x01 << ',' << 0x3b << ',' << 0x00 << ','; + // xor %ebx, %eax + code << 0x31 << ',' << 0xd8 << ','; + // pop %rbx + code << 0x5b << ','; +#endif + + switch (option_counter) + { + default: + case COUNTER_CLASSIC: + // incb afl_area_ptr(%eax) + code << 0x67 << ',' << 0xfe << ',' << 0x80 << ',' + << "{\"int32\":" << afl_area_ptr << "},"; + break; + case COUNTER_NEVER_ZERO: + // addb $0x1,afl_area_ptr(%eax) + // adcb $0x0,afl_area_ptr(%eax) + code << 0x67 << ',' << 0x80 << ',' << 0x80 << ',' + << "{\"int32\":" << afl_area_ptr << "}," << 0x01 << ','; + code << 0x67 << ',' << 0x80 << ',' << 0x90 << ',' + << "{\"int32\":" << afl_area_ptr << "}," << 0x00 << ','; + break; + case COUNTER_SATURATED: + // addb $0x1,afl_area_ptr(%eax) + // sbbb $0x0,afl_area_ptr(%eax) + code << 0x67 << ',' << 0x80 << ',' << 0x80 << ',' + << "{\"int32\":" << afl_area_ptr << "}," << 0x01 << ','; + code << 0x67 << ',' << 0x80 << ',' << 0x98 << ',' + << "{\"int32\":" << afl_area_ptr << "}," << 0x00 << ','; + break; + } +#ifndef WINDOWS + // movl $(curr_loc>>1),%fs:0x48 // mov (curr_loc>>1),prev_loc + // code << 0x64 << ',' << 0xc7 << ',' << 0x04 << ',' << 0x25 << ',' + // << 0x48 << ',' << 0x00 << ',' << 0x00 << ',' << 0x00 << ',' + // << "\"$curr_loc_1\"" << ','; + + // movl $(curr_loc>>1),%ds:0x3b0100 // mov (curr_loc>>1),prev_loc + code << 0xc7 << ',' << 0x04 << ',' << 0x25 << ',' + << 0x00 << ',' << 0x01 << ',' << 0x3b << ',' << 0x00 << ',' + << "\"$curr_loc_1\"" << ','; +#else + // movl $(curr_loc>>1),0x47ff2000 // mov (curr_loc>>1),prev_loc + code << 0xc7 << ',' << 0x04 << ',' << 0x25 << ',' + << 0x00 << ',' << 0x20 << ',' << 0xff << ',' << 0x47 << ',' + << "\"$curr_loc_1\"" << ','; +#endif + + // Restore state: + // + // .CLok: + // pop %rax + // add $0x7f,%al + // sahf + // pop %rax + // lea 0x4000(%rsp),%rsp + // + code << "\".Lok\","; + code << 0x58 << ','; + code << 0x04 << ',' << 0x7f << ','; + code << 0x9e << ','; + code << 0x58 << ','; + code << 0x48 << ',' << 0x8d << ',' << 0xa4 << ',' << 0x24 << ',' + << "{\"int32\":" << stack_adjust << "},"; + + sendTrampolineMessage(cxt->out, "$afl", code.str().c_str()); + + return nullptr; +} + +/* + * Normalize a block address. + */ +static intptr_t normalize(intptr_t addr, const Targets &targets) +{ + if (addr == BB_INDIRECT) + return BB_INDIRECT; + auto i = targets.lower_bound(addr); + if (i == targets.end()) + return BB_INDIRECT; + return i->first; +} + +/* + * Add a predecessor block. + */ +static void addPredecessor(intptr_t pred, intptr_t succ, + const Targets &targets, CFG &cfg) +{ + pred = normalize(pred, targets); + succ = normalize(succ, targets); + auto j = cfg.find(succ); + if (j == cfg.end()) + { + BasicBlock empty; + auto r = cfg.insert({succ, empty}); + j = r.first; + } + j->second.preds.push_back(pred); +} + +/* + * Add a successor block. + */ +static void addSuccessor(intptr_t pred, intptr_t succ, + const Targets &targets, CFG &cfg) +{ + pred = normalize(pred, targets); + succ = normalize(succ, targets); + auto j = cfg.find(pred); + if (j == cfg.end()) + { + BasicBlock empty; + auto r = cfg.insert({pred, empty}); + j = r.first; + } + j->second.succs.push_back(succ); +} + +/* + * Build the CFG from the set of jump targets. + */ +static void buildCFG(const ELF *elf, const Instr *Is, size_t size, + const Targets &targets, CFG &cfg) +{ + for (const auto &entry: targets) + { + intptr_t target = entry.first, bb = target; + TargetKind kind = entry.second; + + size_t i = findInstr(Is, size, target); + if (i >= size) + continue; + + BasicBlock empty; + (void)cfg.insert({bb, empty}); + + if ((kind & TARGET_INDIRECT) != 0) + addPredecessor(BB_INDIRECT, bb, targets, cfg); + + const Instr *I = Is + i; + + for (++i; i < size; i++) + { + InstrInfo info0, *info = &info0; + getInstrInfo(elf, I, info); + bool end = false; + intptr_t target = -1, next = -1; + switch (info->mnemonic) + { + case MNEMONIC_RET: + end = true; + break; + case MNEMONIC_JMP: + end = true; + // Fallthrough: + case MNEMONIC_CALL: + if (info->op[0].type == OPTYPE_IMM) + target = (intptr_t)info->address + + (intptr_t)info->size + (intptr_t)info->op[0].imm; + break; + case MNEMONIC_JO: case MNEMONIC_JNO: case MNEMONIC_JB: + case MNEMONIC_JAE: case MNEMONIC_JE: case MNEMONIC_JNE: + case MNEMONIC_JBE: case MNEMONIC_JA: case MNEMONIC_JS: + case MNEMONIC_JNS: case MNEMONIC_JP: case MNEMONIC_JNP: + case MNEMONIC_JL: case MNEMONIC_JGE: case MNEMONIC_JLE: + case MNEMONIC_JG: + end = true; + next = (intptr_t)info->address + (intptr_t)info->size; + target = next + (intptr_t)info->op[0].imm; + break; + default: + break; + } + if (target > 0x0) + addPredecessor(bb, target, targets, cfg); + if (next > 0x0) + addPredecessor(bb, next, targets, cfg); + if (end) + { + if (target > 0) + addSuccessor(bb, target, targets, cfg); + if (next > 0) + addSuccessor(bb, next, targets, cfg); + if (!(target > 0 || next > 0)) + addSuccessor(bb, BB_INDIRECT, targets, cfg); + break; + } + const Instr *J = I+1; + if (I->address + I->size != J->address) + break; + if (targets.find(J->address) != targets.end()) + { + // Fallthrough: + addPredecessor(bb, J->address, targets, cfg); + addSuccessor(bb, J->address, targets, cfg); + break; + } + I = J; + } + } + + int id = 0; + for (auto &entry: cfg) + entry.second.id = id++; +} + +/* + * Attempt to optimize away a bad block. + */ +static void optimizeBlock(CFG &cfg, BasicBlock &bb); +static void optimizePaths(CFG &cfg, BasicBlock *pred_bb, BasicBlock *succ_bb, Paths &paths) +{ + auto i = paths.find(succ_bb); + if (i != paths.end()) + { + // Multiple paths to succ_bb; + BasicBlock *unopt_bb = nullptr; + if (pred_bb != nullptr) + unopt_bb = pred_bb; + else if (i->second != nullptr) + unopt_bb = i->second; + + // Note: (unopt_bb == nullptr) can happen in degenerate cases, e.g.: + // jne .Lnext; .Lnext: ... + if (unopt_bb != nullptr) + { + unopt_bb->optimized = false; + optimizeBlock(cfg, *unopt_bb); + } + return; + } + paths.insert({succ_bb, pred_bb}); + if (succ_bb == nullptr || !succ_bb->optimized) + return; + + pred_bb = succ_bb; + for (auto succ: succ_bb->succs) + { + auto i = cfg.find(succ); + succ_bb = (i == cfg.end()? nullptr: &i->second); + optimizePaths(cfg, pred_bb, succ_bb, paths); + } +} +static void optimizeBlock(CFG &cfg, BasicBlock &bb) +{ + if (bb.optimized) + return; + Paths paths; + for (auto succ: bb.succs) + { + auto i = cfg.find(succ); + BasicBlock *succ_bb = (i == cfg.end()? nullptr: &i->second); + optimizePaths(cfg, nullptr, succ_bb, paths); + } +} + +/* + * Verify the optimization is correct (for debugging). + */ +static void verify(CFG &cfg, intptr_t curr, BasicBlock *bb, + std::set &seen) +{ + for (auto succ: bb->succs) + { + auto i = cfg.find(succ); + BasicBlock *succ_bb = (i == cfg.end()? nullptr: &i->second); + if (succ_bb == nullptr) + fprintf(stderr, " BB_%d->indirect", bb->id); + else + fprintf(stderr, " BB_%d->BB_%d", bb->id, + cfg.find(succ)->second.id); + auto r = seen.insert(succ_bb); + if (!r.second) + { + putc('\n', stderr); + error("multiple non-instrumented paths detected"); + } + if (succ_bb != nullptr && succ_bb->optimized) + verify(cfg, succ, succ_bb, seen); + } +} +static void verify(CFG &cfg) +{ + if (option_Oblock == OPTION_ALWAYS) + return; + putc('\n', stderr); + for (auto &entry: cfg) + { + BasicBlock *bb = &entry.second; + if (bb->optimized) + continue; + fprintf(stderr, "\33[32mVERIFY\33[0m BB_%d:", + cfg.find(entry.first)->second.id); + std::set seen; + verify(cfg, entry.first, bb, seen); + putc('\n', stderr); + } + putc('\n', stderr); +} + +/* + * Calculate all instrumentation points. + */ +static void calcInstrumentPoints(const ELF *elf, const Instr *Is, size_t size, + Targets &targets, std::set &instrument) +{ + // Step #1: build the CFG: + CFG cfg; + buildCFG(elf, Is, size, targets, cfg); + + // Step #2: find all instrumentation-points/bad-blocks + for (const auto &entry: targets) + { + intptr_t target = entry.first, bb = target; + TargetKind kind = entry.second; + + size_t i = findInstr(Is, size, target); + if (i >= size) + continue; + const Instr *I = Is + i; + + uint8_t target_size = I->size; + for (++i; option_Oselect != OPTION_NEVER && i < size && + target_size < /*sizeof(jmpq)=*/5; i++) + { + InstrInfo info0, *info = &info0; + getInstrInfo(elf, I, info); + bool end = false; + switch (info->mnemonic) + { + case MNEMONIC_RET: + case MNEMONIC_CALL: + case MNEMONIC_JMP: + case MNEMONIC_JO: case MNEMONIC_JNO: case MNEMONIC_JB: + case MNEMONIC_JAE: case MNEMONIC_JE: case MNEMONIC_JNE: + case MNEMONIC_JBE: case MNEMONIC_JA: case MNEMONIC_JS: + case MNEMONIC_JNS: case MNEMONIC_JP: case MNEMONIC_JNP: + case MNEMONIC_JL: case MNEMONIC_JGE: case MNEMONIC_JLE: + case MNEMONIC_JG: + end = true; + break; + default: + break; + } + if (end) + break; + const Instr *J = I+1; + if (I->address + I->size != J->address) + break; + if (targets.find(J->address) != targets.end()) + break; + if (J->size > target_size) + { + target = J->address; + target_size = J->size; + } + I = J; + } + auto j = cfg.find(bb); + assert(j != cfg.end()); + j->second.instrument = target; + j->second.bad = (target_size < /*sizeof(jmpq)=*/5); + switch (option_Oblock) + { + case OPTION_NEVER: + j->second.optimized = false; + break; + case OPTION_DEFAULT: + // To be refined in Step #3 + // j->second.optimized = (j->second.bad && (kind & TARGET_INDIRECT) == 0); + // FIXME: hopper does not optimize the blocks that has multiple preds! + // this can make the branch counting more accurate. + // and optimize blocks with single suc and single pred + if ((kind & TARGET_INDIRECT) == 0 && j->second.preds.size() <= 1) { + if (j->second.succs.size() <= 1 || j->second.bad) { + j->second.optimized = true; + } + } else { + j-> second.optimized = false; + } + break; + case OPTION_ALWAYS: + j->second.optimized = j->second.bad; + break; + } + } + + // Step #3: Optimize away bad blocks: + if (option_Oblock == OPTION_DEFAULT) + for (auto &entry: cfg) + optimizeBlock(cfg, entry.second); + + // Step #4: Collect final instrumentation points. + for (auto &entry: cfg) + { + num_bb += 1; + if (entry.second.bad) { + num_bad_bb +=1; + } + if (entry.second.optimized) { + num_opt_bb += 1; + } else { + instrument.insert(entry.second.instrument); + } + } + + // Setp #5: Print debugging information (if necessary) + for (size_t i = 0; (option_debug == OPTION_ALWAYS) && i < size; i++) + { + InstrInfo I0, *I = &I0; + getInstrInfo(elf, Is + i, I); + + auto j = cfg.find(I->address); + if (j != cfg.end()) + { + fprintf(stderr, "\n# \33[32mBB_%d\33[0m%s%s\n", cfg[I->address].id, + (j->second.bad? " [\33[31mBAD\33[0m]": ""), + (j->second.bad && !j->second.optimized? + " [\33[31mUNOPTIMIZED\33[0m]": "")); + fprintf(stderr, "# preds = "); + int count = 0; + for (auto pred: j->second.preds) + { + if (count++ != 0) + putc(',', stderr); + if (pred == BB_INDIRECT) + { + fprintf(stderr, "indirect"); + continue; + } + auto l = cfg.find(pred); + if (l != cfg.end()) + fprintf(stderr, "BB_%u", l->second.id); + else + fprintf(stderr, "%p", (void *)pred); + } + fprintf(stderr, "\n# succs = "); + count = 0; + for (auto succ: j->second.succs) + { + if (count++ != 0) + putc(',', stderr); + if (succ == BB_INDIRECT) + { + fprintf(stderr, "indirect"); + continue; + } + auto l = cfg.find(succ); + if (l != cfg.end()) + fprintf(stderr, "BB_%u", l->second.id); + else + fprintf(stderr, "%p", (void *)succ); + } + putc('\n', stderr); + } + if (instrument.find(I->address) != instrument.end()) + fprintf(stderr, "%lx: \33[33m%s\33[0m\n", I->address, + I->string.instr); + else + fprintf(stderr, "%lx: %s\n", I->address, I->string.instr); + } + if (option_debug == OPTION_ALWAYS) + verify(cfg); +} + +/* + * Events. + */ +extern void e9_plugin_event(const Context *cxt, Event event) +{ + switch (event) + { + case EVENT_DISASSEMBLY_COMPLETE: + { + Targets targets; + buildTargets(cxt->elf, cxt->Is->data(), cxt->Is->size(), targets); + calcInstrumentPoints(cxt->elf, cxt->Is->data(), cxt->Is->size(), + targets, instrument); + break; + } + case EVENT_PATCHING_COMPLETE: { + e9tool::warning("bb: %d, bad: %d, opt: %d", num_bb, num_bad_bb, num_opt_bb); + break; + } + default: + break; + } +} + +/* + * Matching. Return `true' iff we should instrument this instruction. + */ +extern intptr_t e9_plugin_match(const Context *cxt) +{ + // warning("off: %x, instr: %s", cxt->I->address, cxt->I->string.instr); + if ((rand() % 100) >= inst_ratio) return 0; + return (instrument.find(cxt->I->address) != instrument.end()); +} + +/* + * Patch template. + */ +extern void e9_plugin_code(const Context *cxt) +{ + fputs("\"$afl\",", cxt->out); +} + +/* + * Patching. + */ +extern void e9_plugin_patch(const Context *cxt) +{ + if (instrument.find(cxt->I->address) == instrument.end()) + return; + int32_t curr_loc = rand() & map_size_mask; + // warning("off: %x, curr_loc: %d", cxt->I->address, curr_loc); + fprintf(cxt->out, "\"$curr_loc\":{\"int32\":%d},", curr_loc); + fprintf(cxt->out, "\"$curr_loc_1\":{\"int32\":%d},", curr_loc >> 1); +} diff --git a/hopper-instrument/e9-mode/hopper-e9-rt.c b/hopper-instrument/e9-mode/hopper-e9-rt.c new file mode 100644 index 0000000..24425de --- /dev/null +++ b/hopper-instrument/e9-mode/hopper-e9-rt.c @@ -0,0 +1,331 @@ +// e9loader is under the MIT license. +#include "e9loader.h" +#include "config.h" + +typedef struct __attribute__((__packed__)) MemOperation { + uint64_t addr; + uint32_t id; + uint16_t ty; + uint16_t stmt_index; + uint32_t size; + uint32_t slice; +} MemOperation; + +enum MEM_TYPE { + FREE = 1, + MALLOC, + CALLOC, + REALLOC, + REALLOC_MALLOC, + REALLOC_FREE, + REALLOC_RESIZE, + OPEN = 90, + FDOPEN, + LSEEK, + READ, + WRITE, + CLOSE, +}; + +MemOperation *mem_area_ptr = (MemOperation *)MEM_AREA; +int32_t *mem_offset_ptr = (int32_t *)(INFO_AREA + 4); +const int16_t *stmt_index_ptr = (int16_t *)(INFO_AREA + 8); +int64_t *free_ptr = (int64_t *)(INFO_AREA + 16); +int64_t *malloc_ptr = (int64_t *)(INFO_AREA + 24); +int64_t *calloc_ptr = (int64_t *)(INFO_AREA + 32); +int64_t *realloc_ptr = (int64_t *)(INFO_AREA + 40); + +int32_t get_mem_offset() { + int32_t offset = *mem_offset_ptr; + *mem_offset_ptr = offset == MEM_LIST_SIZE - 1 ? 0 : offset + 1; + return offset; +} + +#ifndef WINDOWS +#include "rt-linux.c" +#define log(msg, ...) \ + print_message(false, "e9afl log: " msg "\n", ##__VA_ARGS__) +#else +#include "rt-win.c" +#define log(msg, ...) +#endif + +void *libc_memset(void *dstpp, int c, size_t len) { +#define op_t unsigned int long +#define OPSIZ (sizeof(op_t)) + typedef unsigned char byte; + long int dstp = (long int)dstpp; + + if (len >= 8) { + size_t xlen; + op_t cccc; + + cccc = (byte)c; + cccc |= cccc << 8; + cccc |= cccc << 16; + if (OPSIZ > 4) + /* Do the shift in two steps to avoid warning if long has 32 bits. */ + cccc |= (cccc << 16) << 16; + + /* There are at least some bytes to set. + No need to test for LEN == 0 in this alignment loop. */ + while (dstp % OPSIZ != 0) { + ((byte *)dstp)[0] = c; + dstp += 1; + len -= 1; + } + + /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ + xlen = len / (OPSIZ * 8); + while (xlen > 0) { + ((op_t *)dstp)[0] = cccc; + ((op_t *)dstp)[1] = cccc; + ((op_t *)dstp)[2] = cccc; + ((op_t *)dstp)[3] = cccc; + ((op_t *)dstp)[4] = cccc; + ((op_t *)dstp)[5] = cccc; + ((op_t *)dstp)[6] = cccc; + ((op_t *)dstp)[7] = cccc; + dstp += 8 * OPSIZ; + xlen -= 1; + } + len %= OPSIZ * 8; + + /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ + xlen = len / OPSIZ; + while (xlen > 0) { + ((op_t *)dstp)[0] = cccc; + dstp += OPSIZ; + xlen -= 1; + } + len %= OPSIZ; + } + + /* Write the last few bytes. */ + while (len > 0) { + ((byte *)dstp)[0] = c; + dstp += 1; + len -= 1; + } + + return dstpp; +} + +// TODO: mmap / munmap + +void entry_free(uint32_t id, int64_t *arg1) { + int64_t addr = *arg1; + if (addr == 0) return; + int32_t offset = get_mem_offset(); + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = 0; + mem_area_ptr[offset].addr = addr; + mem_area_ptr[offset].ty = FREE; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; + // log("id: %d, free : %p", id, mem_area_ptr[offset].addr); + // these pointers is alloc via mmap + if (addr >= CANARY_PTR && addr <= CANARY_PTR + CANARY_AREA_SIZE) { + *arg1 = 0; + } + // *arg1 = 0; +} + +void entry_malloc(uint32_t id, int64_t arg1) { + int32_t offset = get_mem_offset(); + // log("%d-malloc : %d, %d", offset, id, arg1); + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = arg1; + mem_area_ptr[offset].ty = MALLOC; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; + mem_area_ptr[offset].addr = 0; +} + +void entry_calloc(uint32_t id, int64_t arg1, int64_t arg2) { + int32_t offset = get_mem_offset(); + // log("%d-calloc : %d, %d, %d", offset, id, arg1, arg2); + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = arg1 * arg2; + mem_area_ptr[offset].ty = CALLOC; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; + mem_area_ptr[offset].addr = 0; +} + +void entry_realloc(uint32_t id, int64_t arg1, int64_t arg2) { + // we assmue canary's pointer won't be realloc + int32_t offset = get_mem_offset(); + // log("%d-realloc : %d, %d, %d", offset, id, arg1, arg2); + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].addr = arg1; + mem_area_ptr[offset].size = arg2; + mem_area_ptr[offset].ty = REALLOC; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; +} + +void entry_indirect(uint32_t id, int64_t addr, int64_t *arg1, int64_t arg2) { + // log("id: %d, indirect, addr: %p, arg1 : %p, arg2: %p", id, addr, *arg1, + // arg2); + if (addr == *free_ptr) { + entry_free(id, arg1); + } else if (addr == *malloc_ptr) { + entry_malloc(id, *arg1); + } else if (addr == *calloc_ptr) { + entry_calloc(id, *arg1, arg2); + } else if (addr == *realloc_ptr) { + entry_realloc(id, *arg1, arg2); + } +} + +void exit_malloc(uint32_t id, int64_t rax) { + int32_t offset = *mem_offset_ptr - 1; + // log("%d-malloc-exit : %d, %d", offset, id, rax); + // not thread safe + if (id - mem_area_ptr[offset].id < 8) { + mem_area_ptr[offset].addr = rax; + libc_memset((void *)rax, 0xFA, mem_area_ptr[offset].size); + } +} + +void exit_calloc(uint32_t id, int64_t rax) { + int32_t offset = *mem_offset_ptr - 1; + // log("%d-malloc-exit : %d, %d", offset, id, rax); + // not thread safe + if (id - mem_area_ptr[offset].id < 8) { + mem_area_ptr[offset].addr = rax; + } +} + +void exit_realloc(uint32_t id, int64_t rax) { + int32_t offset = *mem_offset_ptr - 1; + // log("%d-realloc-exit : %d, %d", offset, id, rax); + if (mem_area_ptr[offset].ty != REALLOC) { +#ifndef WINDOWS + error("should be realloc but find %d, id: %d", mem_area_ptr[offset].ty, id); +#endif + } + if (id - mem_area_ptr[offset].id < 8) { + int64_t prev_addr = mem_area_ptr[offset].addr; + if (rax != prev_addr) { + if (prev_addr == 0) { + // like malloc + mem_area_ptr[offset].addr = rax; + mem_area_ptr[offset].ty = REALLOC_MALLOC; + } else if (rax == 0) { + // like free + mem_area_ptr[offset].ty = REALLOC_FREE; + } else { + uint64_t size = mem_area_ptr[offset].size; + // free + mem_area_ptr[offset].size = 0; + mem_area_ptr[offset].ty = REALLOC_FREE; + // malloc + uint64_t id = mem_area_ptr[offset].id; + int32_t offset = get_mem_offset(); + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].addr = rax; + mem_area_ptr[offset].size = size; + mem_area_ptr[offset].ty = REALLOC_MALLOC; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; + } + } else { + // do nothing, but should update its size + mem_area_ptr[offset].ty = REALLOC_RESIZE; + } + } +} + +void exit_indirect(uint32_t id, int64_t rax) { + int32_t offset = *mem_offset_ptr - 1; + if (id - mem_area_ptr[offset].id < 8) { + switch (mem_area_ptr[offset].ty) { + case MALLOC: + exit_malloc(id, rax); + break; + case CALLOC: + exit_calloc(id, rax); + break; + case REALLOC: + exit_realloc(id, rax); + break; + } + } +} + +// get suffix of filename +void set_file_name_suffix(char *filename, char *suffix) { + int len = 0; + int start = 0; + for (int i =0; i < 256; i++) { + // if it is '/' + if (filename[i] == 47) start = i + 1; + if (filename[i] == 0) { + len = i; + break; + } + } + if (len > 4 && start < len - 4) start = len - 4; + //printf("file: %s, start: %d, end: %d\n", filename, start, len); + for (int i = 0; i < 4; i++) { + int j = start + i; + if (filename[j] == 0 || j >= len) break; + suffix[i] = filename[j]; + } +} + +// FILE *fopen( const char *filename, const char *mode ); +// FILE *fopen( const char *restrict filename, const char *restrict mode ); +// errno_t fopen_s( FILE *restrict *restrict streamptr, const char *restrict +// filename, const char *restrict mode ); +void entry_fopen(uint32_t id, int64_t arg1, int64_t arg2) { + // if (arg1 == 0 || arg2 == 0) { + // return; + // } + int32_t offset = get_mem_offset(); + const char *mode = (const char *)arg2; + // write + int read_mode = 2; + // read + for (int i = 0; i < 4; i++) { + if (mode[i] == 0) break; + if (mode[i] == 'r' || mode[i] == '+') read_mode = 1; + } + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = read_mode; + mem_area_ptr[offset].addr = arg1; + mem_area_ptr[offset].ty = OPEN; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; + // we save the prefix of filename to avoid that the filename is copy and + // modify in the execution. + set_file_name_suffix((char *)arg1, (char *)&mem_area_ptr[offset].slice); +} + +/* +FILE *freopen( const char *filename, const char *mode, + FILE *stream ); +(until C99) +FILE *freopen( const char *restrict filename, const char *restrict mode, + FILE *restrict stream ); +(since C99) +errno_t freopen_s( FILE *restrict *restrict newstreamptr, + const char *restrict filename, const char *restrict mode, + FILE *restrict stream ); +*/ +void hook_malloc(uint32_t id, int64_t arg1, int64_t *ret) { + entry_malloc(id, arg1); + void *(*f)(size_t) = (void *(*)(size_t)) * malloc_ptr; + *ret = (int64_t)f((size_t)arg1); + exit_malloc(id, *ret); +} + +void hook_calloc(uint32_t id, int64_t arg1, int64_t arg2, int64_t *ret) { + entry_calloc(id, arg1, arg2); + void *(*f)(size_t, size_t) = (void *(*)(size_t, size_t)) * calloc_ptr; + *ret = (int64_t)f((size_t)arg1, (size_t)arg2); + exit_calloc(id, *ret); +} + +void hook_realloc(uint32_t id, int64_t arg1, int64_t arg2, int64_t *ret) { + entry_realloc(id, arg1, arg2); + void *(*f)(size_t, size_t) = (void *(*)(size_t, size_t)) * realloc_ptr; + *ret = (int64_t)f((size_t)arg1, (size_t)arg2); + exit_realloc(id, *ret); +} \ No newline at end of file diff --git a/hopper-instrument/e9-mode/hopper-instr-plugin.cpp b/hopper-instrument/e9-mode/hopper-instr-plugin.cpp new file mode 100644 index 0000000..b8f4c6f --- /dev/null +++ b/hopper-instrument/e9-mode/hopper-instr-plugin.cpp @@ -0,0 +1,1008 @@ +/* + * Copyright (C) 2021 National University of Singapore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * NOTE: As a special exception, this file is under the MIT license. The + * rest of the E9Patch/E9Tool source code is under the GPLv3 license. + */ + +/* + * This is an example E9Tool plugin. It implements a limit on control-flow + * transfer instructions such as calls, jumps, and returns. When the limit + * is reached, it will execute the int3 instruction generating a SIGTRAP. + * + * To compile: + * $ g++ -std=c++11 -fPIC -shared -o example.so -O2 \ + * examples/plugins/example.cpp -I . -I capstone/include/ + * + * To use: + * $ ./e9tool -M 'plugin(example).match()' \ + * -P 'plugin(example).patch()' program + * $ ./a.out + * Trace/breakpoint trap + */ + +#include + +#include +#include +#include +#include + +#include "config.h" +#include "e9plugin.h" +#include "stdarg.h" + +using namespace e9tool; + +enum CMP_TYPE { + INSTCMP = 1, + STRCMP = 17, + STRNCMP = 18, + MEMCMP = 19, +}; + +#define NUM_REG REGISTER_RIP +#define STACK_FRAME_SIZE 4096 + +static std::map CmpPlt; + +static FILE *log = NULL; +static bool enable_cmp_one_byte = true; +// experimental feature for static data flow tracking +static bool enable_sdft = false; +static std::vector blacklist_ids; +int num_cmp = 0; +int num_bad_cmp = 0; +int num_fn_cmp = 0; +int num_opt1_cmp = 0; +int num_opt2_cmp = 0; +int num_opt3_cmp = 0; +int num_patch_cmp = 0; +int reg_count[NUM_REG] = { 0 }; +bool reg_taint[NUM_REG] = { 0 }; +bool stack_taint[STACK_FRAME_SIZE] = { 0 }; + +static void print_message(bool fatal, const char *msg, ...) { + va_list ap; + va_start(ap, msg); + if (log == NULL) { + log = fopen("/tmp/e9cmp.log", "a"); + if (log != NULL) setvbuf(log, NULL, _IONBF, 0); + } + if (log == NULL) { + if (fatal) abort(); + return; + } + vfprintf(log, msg, ap); + if (fatal) abort(); + va_end(ap); +} + +#define warn(msg, ...) \ + print_message(false, "e9cmp warn: " msg "\n", ##__VA_ARGS__) +#define error(msg, ...) \ + print_message(true, "e9cmp runtime error: " msg "\n", ##__VA_ARGS__) +#define log(msg, ...) \ + print_message(false, "e9cmp log: " msg "\n", ##__VA_ARGS__) + +std::string fetch_offset(const int32_t offset_ptr, int entry_size, int32_t max_offset) { + std::stringstream code; + // mov %ds:offset_ptr, %eax + code << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' + << "{\"int32\":" << offset_ptr << "},"; + // and %eax, CMP_AREA_SIZE - 1 + code << 0x25 << ',' << "{\"int32\":" << max_offset << "},"; + // mov %eax %r10d + code << 0x41 << ',' << 0x89 << ',' << 0xc2 << ','; + // add %eax, $0x20 + code << 0x83 << ',' << 0xc0 << ',' << entry_size << ','; + // mov %eax, %ds:offset_ptr + code << 0x89 << ',' << 0x04 << ',' << 0x25 << ',' + << "{\"int32\":" << offset_ptr << "},"; + return code.str(); +} + +/* + * Initialize the counters and the trampoline. + */ +extern void *e9_plugin_init(const Context *cxt) { + // The e9_plugin_init() is called once per plugin by E9Tool. This can + // be used to emit additional E9Patch messages, such as address space + // reservations and trampoline templates. + const int32_t cmp_area_ptr = INSTR_AREA; + const int32_t instr_info_ptr = INFO_AREA; + const int32_t cmp_offset = instr_info_ptr; + + if (getenv("HOPPER_DISABLE_CMP_ONE_BYTE") != nullptr) { + warning("disable cmp one byte!"); + enable_cmp_one_byte = false; + } + if (getenv("HOPPER_ENABLE_SDFT") != nullptr) { + warning("enable static data flow tracking!"); + enable_sdft = true; + } + const char * blacklist = getenv("HOPPER_CMP_BLACKLIST"); + if (blacklist!= nullptr) { + warning("cmp blacklist %s!", blacklist); + char* str = (char*) blacklist; + char *end = str; + while(*end) { + int n = strtol(str, &end, 10); + // printf("%d\n", n); + blacklist_ids.push_back(n); + while (*end == ',') { + end++; + } + str = end; + } + } + +#ifndef WINDOWS + sendReserveMessage(cxt->out, cmp_area_ptr, INSTR_ALL_SIZE, /*absolute=*/true); + sendReserveMessage(cxt->out, CANARY_PTR, CANARY_AREA_SIZE, /*absolute=*/true); +#endif + + /* + * Mext we need to define the trampoline template using a "trampoline" + * E9Patch API message. + */ + + // The trampoline template is specified using a form of annotated + // machine code. For more information about the trampoline template + // language, please see e9patch-programming-guide.md + + // Save state: + // + // lea -0x4000(%rsp),%rsp + // push %r10 + // push %rax + // seto %al + // lahf + // push %rax + // + std::stringstream prefix_code; + prefix_code << 0x48 << ',' << 0x8d << ',' << 0xa4 << ',' << 0x24 << ',' << 0x00 + << ',' << 0xc0 << ',' << 0xff << ',' << 0xff << ','; + prefix_code << 0x41 << ',' << 0x52 << ','; + prefix_code << 0x50 << ','; + prefix_code << 0x0f << ',' << 0x90 << ',' << 0xc0 << ','; + prefix_code << 0x9f << ','; + prefix_code << 0x50 << ','; + + // Restore flags and eax first + // pop %rax + // add $0x7f,%al + // sahf + // pop %rax + std::stringstream restore_code; + restore_code << "\".Lok1\","; + restore_code << 0x58 << ','; + restore_code << 0x04 << ',' << 0x7f << ','; + restore_code << 0x9e << ','; + restore_code << 0x58 << ','; + + // Restore state & return from trampoline: + // + // pop %r10 + // lea 0x4000(%rsp),%rsp + // + std::stringstream restore_code2; + restore_code2 << "\".Lok2\","; + restore_code2 << 0x41 << ',' << 0x5a << ','; + restore_code2 << 0x48 << ',' << 0x8d << ',' << 0xa4 << ',' << 0x24 << ',' << 0x00 + << ',' << 0x40 << ',' << 0x00 << ',' << 0x00; + + std::stringstream code; + code << prefix_code.str(); + // mov %ds:offset_ptr, %eax + code << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' + << "{\"int32\":" << cmp_offset << "},"; + // and %eax, CMP_AREA_SIZE - 1 + code << 0x25 << ',' << "{\"int32\":" << CMP_AREA_SIZE - 1 << "},"; + // mov %eax %r10d + code << 0x41 << ',' << 0x89 << ',' << 0xc2 << ','; + // add %eax, $0x20 + code << 0x83 << ',' << 0xc0 << ',' << 0x20 << ','; + // mov %eax, %ds:offset_ptr + code << 0x89 << ',' << 0x04 << ',' << 0x25 << ',' + << "{\"int32\":" << cmp_offset << "},"; + // code << "\"$fill_header\","; + // mov $cmp_id cmp_area + %r10d + 16 + code << 0x67 << ',' << 0x41 << ',' << 0xc7 << ',' << 0x82 << ',' + << "{\"int32\":" << cmp_area_ptr + 16 << "}," + << "\"$cmp_id\","; + // mov $cmp_size cmp_area + %r10d + 20 + code << 0x67 << ',' << 0x41 << ',' << 0xc7 << ',' << 0x82 << ',' + << "{\"int32\":" << cmp_area_ptr + 20 << "}," + << "\"$cmp_size\","; + // mov $cmp_ty cmp_area + %r10d + 24 + code << 0x66 << ',' << 0x41 << ',' << 0xc7 << ',' << 0x82 << ',' + << "{\"int32\":" << cmp_area_ptr + 24 << "}," + << "\"$cmp_ty\","; +#ifndef WINDOWS + // mov %fs:0x50,%eax // mov stmt_index,%eax + // code << 0x64 << ',' << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' << 0x50 + // << ',' << 0x00 << ',' << 0x00 << ',' << 0x00 << ','; + + // movl %ds:0x3b0108,%eax // mov stmt_index,%eax + code << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' << 0x08 << ',' << 0x01 + << ',' << 0x3b << ',' << 0x00 << ','; +#else + //mov 0x47ff1000,%eax // mov stmt_index,%eax + code << 0x8b << ',' << 0x04 << ',' << 0x25 << ',' << 0x00 + << ',' << 0x10 << ',' << 0xff << ',' << 0x47 << ','; +#endif + // mov $stmt_index(%eax) cmp_area + %r10d + 26 + code << 0x66 << ',' << 0x41 << ',' << 0x89 << ',' << 0x82 << ',' + << "{\"int32\":" << cmp_area_ptr + 26 << "},"; + code << "\"$mov_operand0\","; + code << restore_code.str(); + code << "\"$mov_operand1\","; + code << "\"$mov_operand2\","; + code << restore_code2.str(); + sendTrampolineMessage(cxt->out, "$trace", code.str().c_str()); + + std::stringstream reg1_inst; + reg1_inst << 0x67 << ',' << "\"$reg1_rex\"," << 0x89 << ',' + << "\"$reg1_mod\"," + << "{\"int32\":" << cmp_area_ptr << "},"; + sendTrampolineMessage(cxt->out, "$reg1_inst", reg1_inst.str().c_str()); + + std::stringstream reg2_inst; + reg2_inst << 0x67 << ',' << "\"$reg2_rex\"," << 0x89 << ',' + << "\"$reg2_mod\"," + << "{\"int32\":" << cmp_area_ptr + 8 << "},"; + sendTrampolineMessage(cxt->out, "$reg2_inst", reg2_inst.str().c_str()); + + std::stringstream imm_inst; + imm_inst << 0x67 << ',' << 0x41 << ',' // imm < 32 bits + << 0xc7 << ',' << 0x82 << ',' << "\"$imm_disp\"," + << "\"$imm_val\","; + sendTrampolineMessage(cxt->out, "$imm_inst", imm_inst.str().c_str()); + + std::stringstream mem_inst; + // push %r11 + mem_inst << 0x41 << ',' << 0x53 << ','; + // mov %mem %r11 + mem_inst << "\"$mem_prefix\"," + << "\"$mem_rex\"," << "\"$mem_mov1\"," << "\"$mem_mod\"," + << "\"$mem_sib\"," + << "\"$mem_disp\","; + // mov %r11 %cmp_area(%r10d) + mem_inst << "\"$mem_prefix\"," + << "\"$mem_rex2\"," << "\"$mem_mov2\"," << 0x9a << "," + << "\"$mem_dst\","; + // pop %r11 + mem_inst << 0x41 << ',' << 0x5b << ','; + sendTrampolineMessage(cxt->out, "$mem_inst", mem_inst.str().c_str()); + + std::stringstream args_operand; + // test %rdi %rdi + args_operand << 0x48 << ',' << 0x85 << ',' << 0xff << ','; + // jmp + args_operand << 0x74 << ",{\"rel8\":\".Lok3\"},"; + // mov [%rdi] %ax + args_operand << 0x66 << ',' << 0x8b << ',' << 0x07 << ','; + // mov %ax %cmp_area + %r10d + 28 (move its value to state(2bytes)) + args_operand << 0x66 << ',' << 0x41 << ',' << 0x89 << ',' << 0x82 << ',' + << "{\"int32\":" << cmp_area_ptr + 28 << "},"; + args_operand << "\".Lok3\","; + // test %rsi %rsi + args_operand << 0x48 << ',' << 0x85 << ',' << 0xf6 << ','; + // jmp + args_operand << 0x74 << ",{\"rel8\":\".Lok4\"},"; + // mov [%rdi] %ax + args_operand << 0x66 << ',' << 0x8b << ',' << 0x06 << ','; + // mov %ax %cmp_area + %r10d + 30 (move its value to state(2bytes)) + args_operand << 0x66 << ',' << 0x41 << ',' << 0x89 << ',' << 0x82 << ',' + << "{\"int32\":" << cmp_area_ptr + 30 << "},"; + args_operand << "\".Lok4\","; + + // mov %rdi as operand1 + args_operand << 0x67 << ',' << 0x49 << ',' << 0x89 << ',' << 0xba << ',' + << "{\"int32\":" << cmp_area_ptr << "},"; + sendTrampolineMessage(cxt->out, "$one_arg_operand", + args_operand.str().c_str()); + + // mov %rsi as operand2 + args_operand << 0x67 << ',' << 0x49 << ',' << 0x89 << ',' << 0xb2 << ',' + << "{\"int32\":" << cmp_area_ptr + 8 << "},"; + + sendTrampolineMessage(cxt->out, "$two_arg_operand", + args_operand.str().c_str()); + + // mov %edx %cmp_area(%r10d) + 20 (cmp_size) + args_operand << 0x67 << ',' << 0x41 << ',' << 0x89 << ',' << 0x92 << ',' + << "{\"int32\":" << cmp_area_ptr + 20 << "},"; + sendTrampolineMessage(cxt->out, "$three_arg_operand", args_operand.str().c_str()); + return nullptr; +} + +// find addr in plt table +void find_addr_in_plt(const ELF *elf, const char *name, CMP_TYPE type) { + intptr_t addr = getELFPLTEntry(elf, name); + if (addr != INTPTR_MIN) { + log("find function %s at address: %#010x", name, addr); + CmpPlt.insert({addr, type}); + } else { + warn("Can't find function %s at plt!", name); + } +} + +// find addr in sym info +void find_addr_in_sym(const ELF *elf, const char *name, CMP_TYPE type) { + SymbolInfo sym = getELFSymInfo(elf); + for (auto iter = sym.begin(); iter != sym.end(); ++iter){ + if(!(strcmp(name,iter->first))){ + log("find function %s at address: %#010x", name, iter->second->st_value); + CmpPlt.insert({iter->second->st_value, type}); + return; + } + } + warn("Can't find function %s at sym!", name); +} + +#ifndef WINDOWS +#define FIND_ADDR find_addr_in_plt +#else +#define FIND_ADDR find_addr_in_sym +#endif + +/* + * Events. + */ +extern void e9_plugin_event(const Context *cxt, Event event) { + switch (event) { + case EVENT_DISASSEMBLY_COMPLETE: { + const ELF *elf = cxt->elf; + const PLTInfo info = getELFPLTInfo(elf); + for (auto iter = info.begin(); iter != info.end(); ++iter) { + log("plt func : %s, %p", iter->first, iter->second); + } + FIND_ADDR(elf, "strcmp", STRCMP); + FIND_ADDR(elf, "strncmp", STRNCMP); + FIND_ADDR(elf, "memcmp", MEMCMP); + break; + } + // case EVENT_MATCHING_COMPLETE: { + case EVENT_PATCHING_COMPLETE: { + e9tool::warning("match cmp: %d (all: %d, bad: %d, opt: (%d, %d, %d)), fn: %d", num_patch_cmp, num_cmp, num_bad_cmp, num_opt1_cmp, num_opt2_cmp, num_opt3_cmp, num_fn_cmp); + /* + for (int i = 0; i < NUM_REG; i++) { + if (reg_count[i]> 0) + e9tool::warning("reg: %d, cnt: %d", i, reg_count[i]); + } + */ + break; + } + default: + break; + } +} + +int32_t inst_operand_size(const Context *cxt) { + int32_t operand_size = -1; + Register reg = cxt->I->regs.read[0]; + if (reg == REGISTER_NONE || reg == REGISTER_INVALID) { + } else if (reg <= REGISTER_R15B) + operand_size = 1; + else if (reg <= REGISTER_R15W) + operand_size = 2; + else if (reg <= REGISTER_EIP) + operand_size = 4; + else + operand_size = 8; + + if (operand_size < 0) { + operand_size = cxt->I->encoding.size.imm; + } + return operand_size; +} + +intptr_t call_target(const InstrInfo *info) { + return (intptr_t)info->address + (intptr_t)info->size + + (intptr_t)info->op[0].imm; +} + +bool is_r10(Register reg) { + return reg == REGISTER_R10 || reg == REGISTER_R10B || reg == REGISTER_R10W || + reg == REGISTER_R10D; +} + +bool is_r11(Register reg) { + return reg == REGISTER_R11 || reg == REGISTER_R11B || reg == REGISTER_R11W || + reg == REGISTER_R11D; +} + +bool is_rip(Register reg) { + return reg == REGISTER_IP || reg == REGISTER_EIP || reg == REGISTER_RIP; +} + +bool is_ebp(Register reg) { + return reg == REGISTER_RBP || reg == REGISTER_EBP; +} + +bool is_valid_reg(Register reg) { + return reg != REGISTER_NONE && reg != REGISTER_EFLAGS && reg < NUM_REG; +} + +int get_mem_off(MemOpInfo &mem_info) { + int off = mem_info.disp; + if (off < 0) off = 0 - off; + return off; +} + +void set_reg_taint(Register reg, bool flag) { + reg_taint[reg] = flag; + if (reg == REGISTER_RIP || reg == REGISTER_EIP || reg == REGISTER_IP || reg == REGISTER_EFLAGS) { + return; + } + if (reg >= REGISTER_EAX) { + reg_taint[reg - REGISTER_EAX + REGISTER_AX] = flag; + } + if (reg >= REGISTER_AX) { + reg_taint[reg - REGISTER_AX + REGISTER_AL] = flag; + if (reg <= REGISTER_BX) { + reg_taint[reg - REGISTER_AX + REGISTER_AH] = flag; + } + } +} +/* + * We match all control-flow transfer instructions. + */ +extern intptr_t e9_plugin_match(const Context *cxt) { + // The e9_plugin_match() function is invoked once by E9Tool for each + // disassembled instruction. The function should return a value that is + // used for matching. + const InstrInfo* I = cxt->I; + // e9tool::warning("%s", I->string.instr); + switch (cxt->I->mnemonic) { + /// simple intraprocedural data flow tracking + case MNEMONIC_MOV: + case MNEMONIC_MOVQ: + case MNEMONIC_MOVZX: + // case MNEMONIC_XCHG: + { + if (!enable_sdft) return 0; + OpInfo src_op = I->op[0]; + OpInfo dst_op = I->op[1]; + Register src_reg = src_op.reg; + Register dst_reg = dst_op.reg; + // e9tool::warning("%s, #reg: %d, reg0: %d, reg1: %d", I->string.instr, I->count.op, src_reg, dst_reg); + // e9tool::warning("info: %d %d", op0_info.type, op1_info.type); + if (dst_op.type == OPTYPE_REG && is_valid_reg(dst_reg)) { + // reg's taint is clear by imm + if (src_op.type == OPTYPE_IMM) { + set_reg_taint(dst_reg, false); + e9tool::debug("** clear reg %d by imm", dst_reg); + } else if (src_op.type == OPTYPE_REG) { + Register src_reg = src_op.reg; + if (is_valid_reg(src_reg)) { + set_reg_taint(dst_reg, reg_taint[src_reg]); + } else { + // assume the other regs is untained + set_reg_taint(dst_reg, false); + e9tool::debug("** clear reg %d by reg %d", dst_reg, src_reg); + } + } else { // MEM + MemOpInfo mem_info = src_op.mem; + int off = get_mem_off(mem_info); + // only consider local memory in stack + if (is_ebp(mem_info.base) && off < STACK_FRAME_SIZE) { + set_reg_taint(dst_reg, stack_taint[off]); + } else { + // assume the others is tainted. + set_reg_taint(dst_reg, true); + } + } + } + if (dst_op.type == OPTYPE_MEM) { + MemOpInfo mem_info = dst_op.mem; + int off = get_mem_off(mem_info); + if (is_ebp(mem_info.base) && off < STACK_FRAME_SIZE) { + if (src_op.type == OPTYPE_IMM) { + stack_taint[off] = false; + e9tool::debug("** clear stack offset: %d", off); + } else if (src_op.type == OPTYPE_REG) { + if (is_valid_reg(src_reg)) { + stack_taint[off] = reg_taint[src_reg]; + } else { + // assme the other regs is untained + stack_taint[off] = false; + e9tool::debug("** clear stack %d by reg %d", off, src_reg); + } + } else { + stack_taint[off] = true; + e9tool::warning("** unknown instr: %s", I->string.instr); + } + } + } + return 0; + } + // clear any taint once meet binary mathmatical operations. + // clear taint since it will modify values + case MNEMONIC_ADC: + case MNEMONIC_ADD: + case MNEMONIC_XADD: + case MNEMONIC_SUB: + case MNEMONIC_SBB: + case MNEMONIC_OR: + case MNEMONIC_AND: + case MNEMONIC_LEA: + case MNEMONIC_SAR: + case MNEMONIC_SARX: + case MNEMONIC_SHR: + case MNEMONIC_SHRX: + case MNEMONIC_XOR: { + if (!enable_sdft) return 0; + if (I->count.op != 2) { + e9tool::warning("unknown binary op: %s, #reg: %d, reg0: %d, reg1: %d", I->string.instr, I->count.op, I->op[0].reg, I->op[1].reg); + } + OpInfo dst_op = I->op[1]; + Register dst_reg = dst_op.reg; + if (is_valid_reg(dst_reg)) { + set_reg_taint(dst_reg, false); + e9tool::debug("** clear reg %d by binary op", dst_reg); + } + return 0; + } + + case MNEMONIC_DEC: + case MNEMONIC_INC: { + if (!enable_sdft) return 0; + if (I->count.op != 1) { + e9tool::warning("unknown binary op: %s, #reg: %d, reg0: %d, reg1: %d", I->string.instr, I->count.op, I->op[0].reg, I->op[1].reg); + } + OpInfo dst_op = I->op[0]; + Register dst_reg = dst_op.reg; + if (is_valid_reg(dst_reg)) { + set_reg_taint(dst_reg, false); + e9tool::debug("** clear reg %d by binary op", dst_reg); + } + return 0; + } + // clear eax's taint once meet unitary mathmatical operations. + case MNEMONIC_DIV: + case MNEMONIC_IDIV: + case MNEMONIC_MUL: + case MNEMONIC_IMUL: { + if (!enable_sdft) return 0; + int num_op = I->count.op; + // e9tool::warning("%s, #reg: %d, reg0: %d, reg1: %d", I->string.instr, num_op, I->op[0].reg, I->op[1].reg); + OpInfo dst_op = I->op[1]; + if (num_op == 1) { + // if (dst_op.reg != REGISTER_RAX && dst_op.reg != REGISTER_EAX && dst_op.reg != REGISTER_AX) { + // e9tool::warning("unknown binary op: %s, #reg: %d, reg0: %d, reg1: %d", I->string.instr, I->count.op, I->op[0].reg, I->op[1].reg); + // } + } else if (num_op == 2) { + dst_op = I->op[1]; + } else if (num_op == 3) { + dst_op = I->op[2]; + } else { + e9tool::warning("** unknown instr: %s", I->string.instr); + return 0; + } + Register dst_reg = dst_op.reg; + if (is_valid_reg(dst_reg)) { + set_reg_taint(dst_reg, false); + e9tool::debug("** clear reg %d by mul/div", dst_reg); + } + return 0; + } + case MNEMONIC_PUSH: { + if (!enable_sdft) return 0; + // begin of function (may be) + // Dyninst's trick, 0x55 (EBP/RBP) for elf x86 + // FIXME: stupid: since we do not consider any control flow + Register reg0 = I->op[0].reg; + if (is_ebp(reg0)) { + for (int i = 0; i < NUM_REG; i++) + reg_taint[i] = true; + for (int i = 0; i < STACK_FRAME_SIZE; i++) + stack_taint[i] = true; + } + return 0; + } + case MNEMONIC_POP: { + return 0; + } + case MNEMONIC_CMP: { + // e9tool::warning("cmp: %s", I->string.instr); + int32_t operand_size = I->op[0].size; + if (!enable_cmp_one_byte && operand_size == 1) { + log("ignore instruction for compare one byte: %s", I->string.instr); + return 0; + } + bool has_imm = I->hasIMM(); + int64_t imm = I->getIMM(); + if (operand_size == 0 || (has_imm && (imm == 0 || imm == 1 || imm < INT32_MIN || imm > INT32_MAX))) { + log("ignore instruction with useless imm: %s", I->string.instr); + return 0; + } + // Ignore cmp uses r10 or r11\rip + int taint_cnt = 0; + bool reg_valid = true; + for (int i = 0; i < 2; i++) { + OpInfo op_info = I->op[i]; + if (op_info.type == OPTYPE_REG) { + Register reg = op_info.reg; + if (!is_valid_reg(reg) || is_rip(reg) || is_r10(reg)) { + reg_valid = false; + continue; + } + reg_count[reg] += 1; + if (reg_taint[reg]) { + taint_cnt += 1; + } + } else if (op_info.type == OPTYPE_IMM) { + if (!I->hasIMM()) { + e9tool::warning("op is imm but does not has IMM: %s ", I->string.instr); + return 0; + } + } else if (op_info.type == OPTYPE_MEM) { + MemOpInfo mem_info = op_info.mem; + reg_count[mem_info.base] += 1; + reg_count[mem_info.index] += 1; + if (is_r10(mem_info.base) || is_r10(mem_info.index) || + is_r11(mem_info.base) || is_r11(mem_info.index) || is_rip(mem_info.base) || is_rip(mem_info.index)) { + reg_valid = false; + continue; + } + if (is_ebp(mem_info.base)) { + int off = get_mem_off(mem_info); + // skip if stack is not tainted + if (off < STACK_FRAME_SIZE && !stack_taint[off]) { + continue; + } + } + // assume memory has taint + taint_cnt += 1; + } else { + return 0; + } + } + num_cmp += 1; + if (I->size < 5) { + e9tool::debug("cmp size < 5"); + num_bad_cmp += 1; + } + if (!reg_valid) { + e9tool::debug("** ignore reg invalid cmp: %s", I->string.instr); + num_opt1_cmp += 1; + return 0; + } + if (enable_sdft && taint_cnt == 0) { + e9tool::debug("** ignore no taint cmp: %s", I->string.instr); + num_opt2_cmp += 1; + return 0; + } + int32_t id = I->offset; + if (std::find(blacklist_ids.begin(), blacklist_ids.end(), id) != blacklist_ids.end() ) { + num_opt3_cmp += 1; + return 0; + } + num_patch_cmp += 1; + return INSTCMP; + } + case MNEMONIC_CALL: { + if (I->op[0].type == OPTYPE_IMM) { + intptr_t target = call_target(I); + auto f = CmpPlt.find(target); + if (f != CmpPlt.end()) { + // fprintf(stderr, "the function is %d type", ty); + int32_t id = cxt->I->offset; + if (std::find(blacklist_ids.begin(), blacklist_ids.end(), id) != blacklist_ids.end() ) { + return 0; + } + num_fn_cmp += 1; + return f->second; + } + } + return 0; + } + // SIMD, Packed Double-Precision Floating-Point Values + case MNEMONIC_CMPPD: + case MNEMONIC_CMPPS: + return 0; + // Compare String Operands + case MNEMONIC_CMPSB: + case MNEMONIC_CMPSD: + case MNEMONIC_CMPSQ: + case MNEMONIC_CMPSS: + case MNEMONIC_CMPSW: + return 0; + // MNEMONIC_CMPXCHG, + // MNEMONIC_CMPXCHG16B, + // MNEMONIC_CMPXCHG8B, + case MNEMONIC_COMISD: + case MNEMONIC_COMISS: + return 0; + default: + return 0; + } +} + +/* + * Emit the patch template code. + */ +extern void e9_plugin_code(const Context *cxt) { + // The e9_plugin_code() function is invoked once by E9tool. + // The function specifies the "code" part of the trampoline template that + // will be executed for each matching instruction.` + fputs("\"$trace\",", cxt->out); +} + +/* + * Emit the patch template data. + */ +extern void e9_plugin_data(const Context *cxt) +{ + // The e9_plugin_code() function is invoked once by E9tool. + // The function specifies the "data" part of the trampoline template that + // will be attached to each matching instruction. + + // In this example, there is no data so this function does nothing. + // The function could also be removed from the plugin. +} + +// get register reference for mod +uint8_t get_reg_mod(Register reg) { + switch (reg) { + case REGISTER_AH: + case REGISTER_AL: + case REGISTER_AX: + case REGISTER_EAX: + case REGISTER_RAX: + case REGISTER_R8B: + case REGISTER_R8W: + case REGISTER_R8D: + case REGISTER_R8: + return 0; + case REGISTER_CH: + case REGISTER_CL: + case REGISTER_CX: + case REGISTER_ECX: + case REGISTER_RCX: + case REGISTER_R9B: + case REGISTER_R9W: + case REGISTER_R9D: + case REGISTER_R9: + return 1; + case REGISTER_DH: + case REGISTER_DL: + case REGISTER_DX: + case REGISTER_EDX: + case REGISTER_RDX: + case REGISTER_R10B: + case REGISTER_R10W: + case REGISTER_R10D: + case REGISTER_R10: + return 2; + case REGISTER_BH: + case REGISTER_BL: + case REGISTER_BX: + case REGISTER_EBX: + case REGISTER_RBX: + case REGISTER_R11B: + case REGISTER_R11W: + case REGISTER_R11D: + case REGISTER_R11: + return 3; + case REGISTER_SPL: + case REGISTER_SP: + case REGISTER_ESP: + case REGISTER_RSP: + case REGISTER_R12B: + case REGISTER_R12W: + case REGISTER_R12D: + case REGISTER_R12: + return 4; + case REGISTER_BPL: + case REGISTER_BP: + case REGISTER_EBP: + case REGISTER_RBP: + case REGISTER_R13B: + case REGISTER_R13W: + case REGISTER_R13D: + case REGISTER_R13: + return 5; + case REGISTER_SIL: + case REGISTER_SI: + case REGISTER_ESI: + case REGISTER_RSI: + case REGISTER_R14B: + case REGISTER_R14W: + case REGISTER_R14D: + case REGISTER_R14: + return 6; + case REGISTER_DIL: + case REGISTER_DI: + case REGISTER_EDI: + case REGISTER_RDI: + case REGISTER_R15B: + case REGISTER_R15W: + case REGISTER_R15D: + case REGISTER_R15: + return 7; + } + return 0; +} + +/* + * Patch the selected instructions. + */ +extern void e9_plugin_patch(const Context *cxt) { + // The e9_plugin_patch() function is invoked by E9Tool once per + // matching instruciton. The function specifies the "metadata" which + // instantiates any macros in the trampoline template (both code or data). + // The metadata is specified in as comma-seperated "$key":VALUE pairs, + // where $key is a macro name and VALUE is a value in the template + // template format. + // + // https://gist.github.com/mikesmullin/6259449 + // [PREFIX] [OP] [MOD-REG] [SIB] [DISP] [IMM] + // [REX Prefix] : 0b0100 WRXB + // W=1: 64-bit operand size, + // R/X/B=1: map registers R8-R15 in MODRM.(R)eg / SIB.inde(X) / MODRM.rm + // and SIB.(B)ase + // [Mod-Reg R/M] : + // 2-bits (0-4) : MODRM.mod + // 3-bits (0-7) : MODRM.reg (reg/opcode) + // 3-bits (0-7) : MODRM.rm (register/memory) + // The Memory Address Operand: Mod-Reg R/M + SIB + Displacement(Optional) + // Scale-Index-Base (SIB): Scale(2bit) - Index(3bit) - Base(3bit) + // Real Address = Segment + SIB.base + (SIB.index × SIB.scale) + + // Displacement + // r10: 010, r11: 011 + + // intptr_t kind = e9_plugin_match(cxt); + int32_t id = cxt->I->offset; //++hopper_cmp_counter; + if (cxt->I->mnemonic == MNEMONIC_CMP) { + int16_t operand_size = cxt->I->op[0].size; + uint8_t rex = cxt->I->getREX(); + uint8_t mod = cxt->I->getMODRM(); + log("cmp id: %d, addr: %x, op %s, size: %d, regs: %d(%d) %d(%d) rex: 0x%02X, mod: " + "0x%02X, sib: 0x%02X, disp: %#010x, imm: %#010x", + id, cxt->I->address, cxt->I->string.instr, operand_size, cxt->I->regs.read[0], + cxt->I->op[0].type, cxt->I->regs.read[1], cxt->I->op[1].type, rex, mod, + cxt->I->getSIB(), cxt->I->getDISP(), cxt->I->getIMM()); + + fprintf(cxt->out, "\"$cmp_id\":{\"int32\":%d},", id); + fprintf(cxt->out, "\"$cmp_size\":{\"int32\":%d},", operand_size); + fprintf(cxt->out, "\"$cmp_ty\":{\"int16\":%d},", INSTCMP); + fprintf(cxt->out, "\"$mov_operand0\":null,"); + + int num_reg = 0; + // regs + for (int i = 0; i < 2; i++) { + OpInfo op_info = cxt->I->op[i]; + int arg_i = i + 1; + int32_t operand_addr = INSTR_AREA + 8 * i; + if (op_info.type == OPTYPE_REG) { + Register reg = op_info.reg; + num_reg++; + if (reg == REGISTER_NONE || reg == REGISTER_INVALID || is_r10(reg)) { + log("ignore r10, fill empty for %d-th arg", arg_i); + fprintf(cxt->out, "\"$mov_operand%d\":null,", arg_i); + continue; + } + log("use reg %d as %d-th arg", reg, arg_i); + fprintf(cxt->out, "\"$mov_operand%d\":\"$reg%d_inst\",", arg_i, arg_i); + uint8_t new_rex = 0x41; // base is r10 + if (reg >= REGISTER_RAX) { + // W field is enable: 64bit + new_rex |= 0x8; + } + if ((reg >= REGISTER_R8B && reg <= REGISTER_R15B) || (reg >= REGISTER_R8W && reg <= REGISTER_R15W) + || (reg >= REGISTER_R8D && reg <= REGISTER_R15D) || (reg >= REGISTER_R8 && reg <= REGISTER_R15)) { + // R field is enable, to map registers R8-R15 + new_rex |= 0x4; + } + fprintf(cxt->out, "\"$reg%d_rex\":%d,", arg_i, new_rex); + log("rex:0x%02X, new_rex: 0x%02X", rex, new_rex); + uint8_t new_mod = (get_reg_mod(reg) << 3) | 0x82; + log("mode: 0x%02X, new_mod: 0x%02X", mod, new_mod); + fprintf(cxt->out, "\"$reg%d_mod\":%d,", arg_i, new_mod); + } else if (op_info.type == OPTYPE_IMM && cxt->I->hasIMM()) { + int64_t imm = cxt->I->getIMM(); + log("use imm %lld as %d-th arg", imm, arg_i); + fprintf(cxt->out, "\"$mov_operand%d\":\"$imm_inst\",", arg_i); + fprintf(cxt->out, "\"$imm_disp\":{\"int32\":%d},", operand_addr); + fprintf(cxt->out, "\"$imm_val\":{\"int32\":%ld},", imm); + } else if (op_info.type == OPTYPE_MEM) { + // check r10, r11 + MemOpInfo mem_info = op_info.mem; + if (is_r10(mem_info.base) || is_r10(mem_info.index) || + is_r11(mem_info.base) || is_r11(mem_info.index)) { + log("ignore r10 or r11, fill empty for %d-th arg", arg_i); + fprintf(cxt->out, "\"$mov_operand%d\":null,", arg_i); + continue; + } + if (is_rip(mem_info.base) || is_rip(mem_info.index)) { + log("ignore rip, fill empty for %d-th arg", arg_i); + fprintf(cxt->out, "\"$mov_operand%d\":null,", arg_i); + continue; + } + log("fill mem for %d-th arg", arg_i); + fprintf(cxt->out, "\"$mov_operand%d\":\"$mem_inst\",", arg_i); + int rex_offset = cxt->I->encoding.offset.rex; + // WORD PTR requires a legacy prefix of 0x66 + if (op_info.size == 2) { + fprintf(cxt->out, "\"$mem_prefix\":%d,", 0x66); + } else { + fprintf(cxt->out, "\"$mem_prefix\":null,"); + } + // rex should support r11 + uint8_t rex = cxt->I->getREX() | 0x44 ; + // QWORD PTR requires REX.W enabled + if (op_info.size == 8) { + rex |= 0x48; + } + fprintf(cxt->out, "\"$mem_rex\":%d,", rex); + fprintf(cxt->out, "\"$mem_rex2\":%d,", rex | 0x41); + if (op_info.size == 1) { + // 0x8a for mov BYTE PTR + fprintf(cxt->out, "\"$mem_mov1\":%d,", 0x8a); + fprintf(cxt->out, "\"$mem_mov2\":%d,", 0x88); + } else { + // 0x8b for others + fprintf(cxt->out, "\"$mem_mov1\":%d,", 0x8b); + fprintf(cxt->out, "\"$mem_mov2\":%d,", 0x89); + } + // change reg to r11 + // memory bits in mod must in r/m (last 3 bits) + uint8_t new_mod = (mod & 0xC7) | 0x18; + fprintf(cxt->out, "\"$mem_mod\":%d,", new_mod); + if (cxt->I->hasSIB()) + fprintf(cxt->out, "\"$mem_sib\":%d,", cxt->I->getSIB()); + else + fprintf(cxt->out, "\"$mem_sib\":null,"); + int disp_size = cxt->I->encoding.size.disp; + int32_t disp = cxt->I->getDISP(); + if (disp_size == 1) + fprintf(cxt->out, "\"$mem_disp\":{\"int8\":%d},", disp); + else if (disp_size == 4) + fprintf(cxt->out, "\"$mem_disp\":{\"int32\":%d},", disp); + else + fprintf(cxt->out, "\"$mem_disp\":null,"); + fprintf(cxt->out, "\"$mem_dst\":{\"int32\":%d},", operand_addr); + } else { // fill empty + log("fill empty for %d-th arg", arg_i); + fprintf(cxt->out, "\"$mov_operand%d\":null,", arg_i); + } + } + } else if (cxt->I->mnemonic == MNEMONIC_CALL) { + intptr_t target = call_target(cxt->I); + auto f = CmpPlt.find(target); + CMP_TYPE kind = f->second; + log("id: %d, fn %s, ty: %d", id, cxt->I->string.instr, kind); + fprintf(cxt->out, "\"$cmp_id\":{\"int32\":%d},", id); + fprintf(cxt->out, "\"$cmp_ty\":{\"int16\":%d},", kind); + fprintf(cxt->out, "\"$cmp_size\":{\"int32\":%d},", 0); + if (kind == STRCMP) { + fprintf(cxt->out, "\"$mov_operand0\":\"$two_arg_operand\","); + } else if (kind == STRNCMP || kind == MEMCMP) { + fprintf(cxt->out, "\"$mov_operand0\":\"$three_arg_operand\","); + } + fprintf(cxt->out, "\"$mov_operand1\":null,"); + fprintf(cxt->out, "\"$mov_operand2\":null,"); + } +} diff --git a/hopper-instrument/e9-mode/rt-linux.c b/hopper-instrument/e9-mode/rt-linux.c new file mode 100644 index 0000000..2c3d2de --- /dev/null +++ b/hopper-instrument/e9-mode/rt-linux.c @@ -0,0 +1,217 @@ +// stdlib.c is under the MIT license. +#include "stdlib.c" + +static FILE *log = NULL; + +static void print_message(bool fatal, const char *msg, ...) { + va_list ap; + va_start(ap, msg); + if (log == NULL) { + log = fopen("/tmp/e9afl.log", "a"); + if (log != NULL) setvbuf(log, NULL, _IONBF, 0); + } + if (log == NULL) { + if (fatal) abort(); + return; + } + vfprintf(log, msg, ap); + if (fatal) abort(); + va_end(ap); +} + +#define error(msg, ...) \ + print_message(true, "e9afl runtime error: " msg "\n", ##__VA_ARGS__) +#define log(msg, ...) \ + print_message(false, "e9afl log: " msg "\n", ##__VA_ARGS__) + +/* Init TLS if necessary. */ +#include +static void __afl_init_tls(void) { + uintptr_t val; + int r = (int)syscall(SYS_arch_prctl, ARCH_GET_FS, &val); + if (r < 0) error("failed to get TLS base address: %s", strerror(errno)); + if (val == 0x0) { + /* + * If glibc is not dynamically linked then %fs may be uninitialized. + * Since the instrumentation uses TLS, this will cause the binary to + * crash. We fix this using a "dummy" TLS. + */ + static uint8_t dummy_tls[128] = {0}; + r = (int)syscall(SYS_arch_prctl, ARCH_SET_FS, + dummy_tls + sizeof(dummy_tls)); + if (r < 0) error("failed to set TLS base address: %s", strerror(errno)); + } +} + +/* + * Init. + */ +void init(int argc, const char **argv, char **envp, void *dynamic, + const struct e9_config_s *config) { + log("fuzzing binary %s", argv[0]); + __afl_init_tls(); + if ((config->flags & E9_FLAG_EXE) == 0) { + /* + * This is a shared library. For this, we set up a dummy area so the + * instrumentation does not crash during program initialization. The + * main executable is responsible for setting up AFL proper. + */ + void *p1 = mmap(AREA_POINTER, AREA_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + + void *p2 = mmap(INSTR_AREA_POINTER, INSTR_ALL_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + + // try disable coverage + __asm__("movl $0xFFFFFFFF, %ds:0x3B0100;"); + + log("init lib done, mmap ptr: %p, %p!", p1, p2); + + /* + // since shared binary is not linked with libc, + // so we can't find dl* 's sym and can't init it. + if (dlinit(dynamic) != 0) + { + fprintf(stderr, "dlinit() failed: %s\n", strerror(errno)); + abort(); + } + void *handle = dlopen("libc.so.6", RTLD_LAZY); + void* free_ptr = dlsym(handle, "free"); + log("free ptr: %p", free_ptr); + */ + return; + } + log("init exe done!"); + environ = envp; +} + +/* + * Entry. This is a (slower) alternative to the plugin instrumentation. + * + * USAGE: + * E9AFL_NO_INSTRUMENT=1 ./e9tool -M 'plugin(e9afl).match()' \ + * -P 'entry(random)@"hopper-e9-rt"' \ + * path/to/binary + */ +void entry(uint32_t curr_loc) { + uint32_t prev_loc = 0; + asm("mov %%ds:0x3B0100,%0" : "=r"(prev_loc)); + uint16_t idx = prev_loc ^ curr_loc; + AREA_POINTER[idx]++; + asm("mov %0,%%ds:0x3B0100" : : "r"(curr_loc >> 1)); +} + +extern void set_file_name_suffix(char *filename, char *prefix); + +// int open(const char *pathname, int flags); +// int open(const char *pathname, int flags, mode_t mode); +// int open64 (const char *file, int oflag, ...) +void entry_open(uint32_t id, int64_t arg1, int32_t arg2) { + // log("open %lld, %d", arg1, arg2); + // if (arg1 == 0) { + // return; + // } + int32_t offset = get_mem_offset(); + int read_mode = 1; + // O_WRONLY 00000001 + if ((arg2 & 1) > 0) { + read_mode = 2; + } + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = read_mode; + mem_area_ptr[offset].addr = arg1; + mem_area_ptr[offset].ty = OPEN; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; + set_file_name_suffix((char *)arg1, (char *)&mem_area_ptr[offset].slice); +} + +// int creat(const char *pathname, mode_t mode); +void entry_creat(uint32_t id, int64_t arg1) { + // if (arg1 == 0) { + // return; + // } + int32_t offset = get_mem_offset(); + // create is O_WRONLY + int read_mode = 2; + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = read_mode; + mem_area_ptr[offset].addr = arg1; + mem_area_ptr[offset].ty = OPEN; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; + set_file_name_suffix((char *)arg1, (char *)&mem_area_ptr[offset].slice); +} + +#define IS_RESERVED_FD(fd) (fd >=3 && fd <= 32) + +// FILE *fdopen(int fd, const char *mode); +void entry_fdopen(uint32_t id, int32_t *arg1, int64_t arg2) { + int32_t offset = get_mem_offset(); + const char *mode = (const char *)arg2; + // write + int read_mode = 2; + // read + for (int i = 0; i < 4; i++) { + if (mode[i] == 0) break; + if (mode[i] == 'r' || mode[i] == '+') read_mode = 1; + } + int fd = *arg1; + // log("fdopen %d, %s", fd, arg2); + // avoid blocking in stdin/stdout/stderr + if (fd == 0 || + ((fd == 1 || fd == 2) && read_mode) || + IS_RESERVED_FD(fd)) { + *arg1 = -1; + } + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = read_mode; + mem_area_ptr[offset].addr = fd; + mem_area_ptr[offset].ty = FDOPEN; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; +} + +// FILE *freopen(const char *path, const char *mode, FILE *stream); + +void entry_lseek(uint32_t id, int32_t *arg1) { + int32_t offset = get_mem_offset(); + int fd = *arg1; + if (fd == 0 || IS_RESERVED_FD(fd)) *arg1 = -1; + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = 1; + mem_area_ptr[offset].addr = fd; + mem_area_ptr[offset].ty = LSEEK; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; +} + +void entry_read(uint32_t id, int32_t *arg1) { + int32_t offset = get_mem_offset(); + int fd = *arg1; + if (fd == 0 || fd == 1 || fd == 2 || IS_RESERVED_FD(fd)) *arg1 = -1; + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = 2; + mem_area_ptr[offset].addr = fd; + mem_area_ptr[offset].ty = READ; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; +} + +void entry_write(uint32_t id, int32_t *arg1) { + int32_t offset = get_mem_offset(); + int fd = *arg1; + if (fd == 0 || IS_RESERVED_FD(fd)) *arg1 = -1; + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = 1; + mem_area_ptr[offset].addr = fd; + mem_area_ptr[offset].ty = WRITE; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; +} + +void entry_close(uint32_t id, int32_t *arg1) { + int32_t offset = get_mem_offset(); + // do not close anything + int fd = *arg1; + if (IS_RESERVED_FD(fd)) *arg1 = -1; + mem_area_ptr[offset].id = id; + mem_area_ptr[offset].size = 1; + mem_area_ptr[offset].addr = fd; + mem_area_ptr[offset].ty = CLOSE; + mem_area_ptr[offset].stmt_index = *stmt_index_ptr; +} diff --git a/hopper-instrument/e9-mode/rt-win.c b/hopper-instrument/e9-mode/rt-win.c new file mode 100644 index 0000000..d6f9fa3 --- /dev/null +++ b/hopper-instrument/e9-mode/rt-win.c @@ -0,0 +1,675 @@ + +#include +#include +#include +#include + +#define INVALID_HANDLE_VALUE NULL +#define FILE_MAP_ALL_ACCESS (0x000F0000L|0x0001|0x0002|0x0004|0x0008|0x00100) +#define PAGE_EXECUTE_READWRITE 0x40 +typedef int WCHAR; +typedef const WCHAR *LPCWSTR, *PCWSTR; +typedef unsigned long DWORD; +typedef unsigned long ULONG; +typedef unsigned short USHORT; +typedef unsigned char BOOLEAN; +typedef int BOOL; +typedef char CHAR; +typedef short SHORT; +typedef long LONG; +typedef unsigned char UCHAR; +typedef unsigned short *PWSTR; +typedef DWORD ACCESS_MASK; +typedef void *HANDLE; +typedef void *LPVOID; +typedef unsigned long long ULONGLONG; +typedef unsigned long long ULONG_PTR; +typedef ULONG_PTR SIZE_T, *PSIZE_T; +typedef struct _SECURITY_ATTRIBUTES { + DWORD nLength; + LPVOID lpSecurityDescriptor; + BOOL bInheritHandle; +} SECURITY_ATTRIBUTES, *PSECURITY_ATTRIBUTES, *LPSECURITY_ATTRIBUTES; + + +typedef struct _UNICODE_STRING { + USHORT Length; + USHORT MaximumLength; + PWSTR Buffer; +} UNICODE_STRING; + +typedef UNICODE_STRING *PUNICODE_STRING; + +typedef struct _GENERIC_MAPPING { + ACCESS_MASK GenericRead; + ACCESS_MASK GenericWrite; + ACCESS_MASK GenericExecute; + ACCESS_MASK GenericAll; +} GENERIC_MAPPING; + +typedef struct _OBJECT_TYPE_INFORMATION +{ + UNICODE_STRING TypeName; + ULONG TotalNumberOfObjects; + ULONG TotalNumberOfHandles; + ULONG TotalPagedPoolUsage; + ULONG TotalNonPagedPoolUsage; + ULONG TotalNamePoolUsage; + ULONG TotalHandleTableUsage; + ULONG HighWaterNumberOfObjects; + ULONG HighWaterNumberOfHandles; + ULONG HighWaterPagedPoolUsage; + ULONG HighWaterNonPagedPoolUsage; + ULONG HighWaterNamePoolUsage; + ULONG HighWaterHandleTableUsage; + ULONG InvalidAttributes; + GENERIC_MAPPING GenericMapping; + ULONG ValidAccessMask; + BOOLEAN SecurityRequired; + BOOLEAN MaintainHandleCount; + UCHAR TypeIndex; // since WINBLUE + CHAR ReservedByte; + ULONG PoolType; + ULONG DefaultPagedPoolCharge; + ULONG DefaultNonPagedPoolCharge; +} OBJECT_TYPE_INFORMATION, *POBJECT_TYPE_INFORMATION; + + +typedef struct _PROCESS_INFORMATION { + HANDLE hProcess; + HANDLE hThread; + DWORD dwProcessId; + DWORD dwThreadId; +} PROCESS_INFORMATION, *PPROCESS_INFORMATION, *LPPROCESS_INFORMATION; + +typedef enum _PS_CREATE_STATE +{ + PsCreateInitialState, + PsCreateFailOnFileOpen, + PsCreateFailOnSectionCreate, + PsCreateFailExeFormat, + PsCreateFailMachineMismatch, + PsCreateFailExeName, // Debugger specified + PsCreateSuccess, + PsCreateMaximumStates +} PS_CREATE_STATE; + +typedef unsigned long long ULONGLONG; +typedef unsigned long long ULONG_PTR; +// typedef unsigned __int64 ULONG_PTR, *PULONG_PTR; +typedef ULONG_PTR SIZE_T, *PSIZE_T; +// typedef unsigned __int64 ULONGLONG; +typedef struct _PS_CREATE_INFO +{ + SIZE_T Size; + PS_CREATE_STATE State; + union + { + // PsCreateInitialState + struct + { + union + { + ULONG InitFlags; + struct + { + UCHAR WriteOutputOnExit : 1; + UCHAR DetectManifest : 1; + UCHAR IFEOSkipDebugger : 1; + UCHAR IFEODoNotPropagateKeyState : 1; + UCHAR SpareBits1 : 4; + UCHAR SpareBits2 : 8; + USHORT ProhibitedImageCharacteristics : 16; + }; + }; + ACCESS_MASK AdditionalFileAccess; + } InitState; + + // PsCreateFailOnSectionCreate + struct + { + HANDLE FileHandle; + } FailSection; + + // PsCreateFailExeFormat + struct + { + USHORT DllCharacteristics; + } ExeFormat; + + // PsCreateFailExeName + struct + { + HANDLE IFEOKey; + } ExeName; + + // PsCreateSuccess + struct + { + union + { + ULONG OutputFlags; + struct + { + UCHAR ProtectedProcess : 1; + UCHAR AddressSpaceOverride : 1; + UCHAR DevOverrideEnabled : 1; // from Image File Execution Options + UCHAR ManifestDetected : 1; + UCHAR ProtectedProcessLight : 1; + UCHAR SpareBits1 : 3; + UCHAR SpareBits2 : 8; + USHORT SpareBits3 : 16; + }; + }; + HANDLE FileHandle; + HANDLE SectionHandle; + ULONGLONG UserProcessParametersNative; + ULONG UserProcessParametersWow64; + ULONG CurrentParameterFlags; + ULONGLONG PebAddressNative; + ULONG PebAddressWow64; + ULONGLONG ManifestAddress; + ULONG ManifestSize; + } SuccessState; + }; +} PS_CREATE_INFO, *PPS_CREATE_INFO; + +static e9_safe_call_t safe_call = NULL; + +#ifdef DEBUG +typedef int (*set_console_text_attribute_t)(intptr_t, int16_t); +typedef int (*write_file_t)(intptr_t, void *, size_t, void *, void *); +static intptr_t stderr = 0; +/* + * Windows library functions. + */ +#define FOREGROUND_BLUE 0x1 +#define FOREGROUND_GREEN 0x2 +#define FOREGROUND_RED 0x4 +#define FOREGROUND_YELLOW (FOREGROUND_RED | FOREGROUND_GREEN) +#define FOREGROUND_WHITE \ + (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE) +static set_console_text_attribute_t set_console_text_attribute_fn = NULL; +static write_file_t write_file_fn = NULL; + +static int SetConsoleTextAttribute(intptr_t handle, int16_t attr) +{ + return (int)safe_call(set_console_text_attribute_fn, handle, attr); +} +static int WriteFile(intptr_t handle, void *buf, size_t len, void *x, + void *y) +{ + return (int)safe_call(write_file_fn, handle, buf, len, x, y); +} + +/* + * Get the stderr handle (& do some init if required). + */ +static intptr_t get_stderr(const struct e9_config_s *config) +{ + const struct e9_config_pe_s *config_pe = + (const struct e9_config_pe_s *)(config + 1); + return config_pe->stderr_handle; +} + +/* + * fprintf(...) adatped from stdlib.c + */ +#define PRINTF_FLAG_NEG 0x0001 +#define PRINTF_FLAG_UPPER 0x0002 +#define PRINTF_FLAG_HEX 0x0004 +#define PRINTF_FLAG_PLUS 0x0008 +#define PRINTF_FLAG_HASH 0x0010 +#define PRINTF_FLAG_SPACE 0x0020 +#define PRINTF_FLAG_RIGHT 0x0040 +#define PRINTF_FLAG_ZERO 0x0080 +#define PRINTF_FLAG_PRECISION 0x0100 +#define PRINTF_FLAG_8 0x0200 +#define PRINTF_FLAG_16 0x0400 +#define PRINTF_FLAG_64 0x0800 +static int isdigit(int c) +{ + return (c >= '0' && c <= '9'); +} +static size_t strlen(const char *s) +{ + size_t len = 0; + while (*s++ != '\0') + len++; + return len; +} +static __attribute__((__noinline__)) size_t printf_put_char(char *str, + size_t size, size_t idx, char c) +{ + if (str == NULL || idx >= size) + return idx+1; + str[idx++] = c; + return idx; +} +static __attribute__((__noinline__)) size_t printf_put_num(char *str, + size_t size, size_t idx, unsigned flags, size_t width, size_t precision, + unsigned long long x) +{ + char prefix[2] = {'\0', '\0'}; + char buf[32]; + size_t i = 0; + if (flags & PRINTF_FLAG_HEX) + { + if (flags & PRINTF_FLAG_HASH) + { + prefix[0] = '0'; + prefix[1] = (flags & PRINTF_FLAG_UPPER? 'X': 'x'); + } + const char digs[] = "0123456789abcdef"; + const char DIGS[] = "0123456789ABCDEF"; + const char *ds = (flags & PRINTF_FLAG_UPPER? DIGS: digs); + int shift = (15 * 4); + bool seen = false; + while (shift >= 0) + { + char c = ds[(x >> shift) & 0xF]; + shift -= 4; + if (!seen && c == '0') + continue; + seen = true; + buf[i++] = c; + } + if (!seen) + buf[i++] = '0'; + } + else + { + if (flags & PRINTF_FLAG_NEG) + prefix[0] = '-'; + else if (flags & PRINTF_FLAG_PLUS) + prefix[0] = '+'; + else if (flags & PRINTF_FLAG_SPACE) + prefix[0] = ' '; + unsigned long long r = 10000000000000000000ull; + bool seen = false; + while (r != 0) + { + char c = '0' + x / r; + x %= r; + r /= 10; + if (!seen && c == '0') + continue; + seen = true; + buf[i++] = c; + } + if (!seen) + buf[i++] = '0'; + } + if ((flags & PRINTF_FLAG_ZERO) && !(flags & PRINTF_FLAG_PRECISION)) + { + precision = width; + width = 0; + } + size_t len_0 = i; + size_t len_1 = (len_0 < precision? precision: len_0); + size_t len = + len_1 + (prefix[0] != '\0'? 1 + (prefix[1] != '\0'? 1: 0): 0); + if (!(flags & PRINTF_FLAG_RIGHT)) + { + for (size_t i = 0; width > len && i < width - len; i++) + idx = printf_put_char(str, size, idx, ' '); + } + if (prefix[0] != '\0') + { + idx = printf_put_char(str, size, idx, prefix[0]); + if (prefix[1] != '\0') + idx = printf_put_char(str, size, idx, prefix[1]); + } + for (size_t i = 0; precision > len_0 && i < precision - len_0; i++) + idx = printf_put_char(str, size, idx, '0'); + for (size_t i = 0; i < len_0; i++) + idx = printf_put_char(str, size, idx, buf[i]); + if (flags & PRINTF_FLAG_RIGHT) + { + for (size_t i = 0; width > len && i < width - len; i++) + idx = printf_put_char(str, size, idx, ' '); + } + return idx; +} +static int vsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + size_t idx = 0; + for (; *format != '\0'; format++) + { + if (*format != '%') + { + idx = printf_put_char(str, size, idx, *format); + continue; + } + format++; + unsigned flags = 0x0; + for (; true; format++) + { + switch (*format) + { + case ' ': + flags |= PRINTF_FLAG_SPACE; + continue; + case '+': + flags |= PRINTF_FLAG_PLUS; + continue; + case '-': + if (!(flags & PRINTF_FLAG_ZERO)) + flags |= PRINTF_FLAG_RIGHT; + continue; + case '#': + flags |= PRINTF_FLAG_HASH; + continue; + case '0': + flags &= ~PRINTF_FLAG_RIGHT; + flags |= PRINTF_FLAG_ZERO; + continue; + default: + break; + } + break; + } + + size_t width = 0; + if (*format == '*') + { + format++; + int tmp = va_arg(ap, int); + if (tmp < 0) + { + flags |= (!(flags & PRINTF_FLAG_ZERO)? PRINTF_FLAG_RIGHT: 0); + width = (size_t)-tmp; + } + else + width = (size_t)tmp; + } + else + { + for (; isdigit(*format); format++) + { + width *= 10; + width += (unsigned)(*format - '0'); + width = (width > INT32_MAX? INT32_MAX: width); + } + } + width = (width > INT16_MAX? INT16_MAX: width); + + size_t precision = 0; + if (*format == '.') + { + flags |= PRINTF_FLAG_PRECISION; + format++; + if (*format == '*') + { + format++; + int tmp = va_arg(ap, int); + tmp = (tmp < 0? 0: tmp); + precision = (size_t)tmp; + } + else + { + for (; isdigit(*format); format++) + { + precision *= 10; + precision += (unsigned)(*format - '0'); + precision = (precision > INT32_MAX? INT32_MAX: precision); + } + } + } + switch (*format) + { + case 'l': + flags |= PRINTF_FLAG_64; + format++; + if (*format == 'l') + format++; + break; + case 'h': + format++; + if (*format == 'h') + { + format++; + flags |= PRINTF_FLAG_8; + } + else + flags |= PRINTF_FLAG_16; + break; + case 'z': case 'j': case 't': + format++; + flags |= PRINTF_FLAG_64; + break; + } + + int64_t x; + uint64_t y; + const char *s; + size_t len; + bool end = false; + switch (*format) + { + case '\0': + end = true; + break; + case 'c': + x = (int64_t)(char)va_arg(ap, int); + idx = printf_put_char(str, size, idx, (char)x); + break; + case 'd': case 'i': + if (flags & PRINTF_FLAG_8) + x = (int64_t)(int8_t)va_arg(ap, int); + else if (flags & PRINTF_FLAG_16) + x = (int64_t)(int16_t)va_arg(ap, int); + else if (flags & PRINTF_FLAG_64) + x = va_arg(ap, int64_t); + else + x = (int64_t)va_arg(ap, int); + if (x < 0) + { + flags |= PRINTF_FLAG_NEG; + x = -x; + } + idx = printf_put_num(str, size, idx, flags, width, + precision, (uint64_t)x); + break; + case 'X': + flags |= PRINTF_FLAG_UPPER; + // Fallthrough + case 'x': + flags |= PRINTF_FLAG_HEX; + // Fallthrough + case 'u': + if (flags & PRINTF_FLAG_8) + y = (uint64_t)(uint8_t)va_arg(ap, unsigned); + else if (flags & PRINTF_FLAG_16) + y = (uint64_t)(uint16_t)va_arg(ap, unsigned); + else if (flags & PRINTF_FLAG_64) + y = va_arg(ap, uint64_t); + else + y = (uint64_t)va_arg(ap, unsigned); + idx = printf_put_num(str, size, idx, flags, width, + precision, y); + break; + case 'p': + y = (uint64_t)va_arg(ap, const void *); + flags |= PRINTF_FLAG_HASH | PRINTF_FLAG_HEX; + idx = printf_put_num(str, size, idx, flags, width, + precision, y); + break; + case 's': + s = va_arg(ap, const char *); + s = (s == NULL? "(null)": s); + len = strlen(s); + len = ((flags & PRINTF_FLAG_PRECISION) && precision < len? + precision: len); + if (!(flags & PRINTF_FLAG_RIGHT)) + { + for (size_t i = 0; width > len && i < width - len; i++) + idx = printf_put_char(str, size, idx, ' '); + } + for (size_t i = 0; i < len; i++) + idx = printf_put_char(str, size, idx, s[i]); + if (flags & PRINTF_FLAG_RIGHT) + { + for (size_t i = 0; width > len && i < width - len; i++) + idx = printf_put_char(str, size, idx, ' '); + } + break; + default: + idx = printf_put_char(str, size, idx, *format); + break; + } + if (end) + break; + } + (void)printf_put_char(str, size, idx, '\0'); + if (idx > INT32_MAX) + return -1; + return (int)idx; +} +static int e9vfprintf(intptr_t handle, const char *format, va_list ap) +{ + va_list ap1; + va_copy(ap1, ap); + int result = vsnprintf(NULL, SIZE_MAX, format, ap); + if (result < 0) + return result; + char buf[result+1]; + result = vsnprintf(buf, result+1, format, ap1); + if (result < 0) + return result; + if (!WriteFile(handle, buf, strlen(buf), NULL, NULL)) + return -1; + return result; +} +static int e9fprintf(intptr_t handle, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + int result = e9vfprintf(handle, format, ap); + va_end(ap); + return result; +} +#endif + +typedef HANDLE (*create_file_mapping_t)(HANDLE hFile,LPSECURITY_ATTRIBUTES lpFileMappingAttributes,DWORD flProtect,DWORD dwMaximumSizeHigh,DWORD dwMaximumSizeLow,LPCWSTR lpName); +static create_file_mapping_t create_file_mapping_fn = NULL; + +static HANDLE CreateFileMappingSC(HANDLE hFile,LPSECURITY_ATTRIBUTES lpFileMappingAttributes,DWORD flProtect,DWORD dwMaximumSizeHigh,DWORD dwMaximumSizeLow,LPCWSTR lpName){ + return (HANDLE)safe_call(create_file_mapping_fn, hFile, lpFileMappingAttributes,flProtect,dwMaximumSizeHigh,dwMaximumSizeLow,lpName); +} + +typedef void* (*map_view_of_file_ex_t)(HANDLE hFileMappingObject,DWORD dwDesiredAccess,DWORD dwFileOffsetHigh,DWORD dwFileOffsetLow,SIZE_T dwNumberOfBytesToMap,LPVOID lpBaseAddress); +static map_view_of_file_ex_t map_view_of_file_ex_fn = NULL; + +static HANDLE MapViewOfFileExSC(HANDLE hFileMappingObject,DWORD dwDesiredAccess,DWORD dwFileOffsetHigh,DWORD dwFileOffsetLow,SIZE_T dwNumberOfBytesToMap,LPVOID lpBaseAddress){ + return (HANDLE)safe_call(map_view_of_file_ex_fn, hFileMappingObject, dwDesiredAccess,dwFileOffsetHigh,dwFileOffsetLow,dwNumberOfBytesToMap,lpBaseAddress); +} + +typedef DWORD (*get_environment_variable_t)(char* lpName, char* lpBuffer, DWORD nSize); +static get_environment_variable_t get_environment_variable_fn = NULL; + +static DWORD GetEnvironmentVariable(char* lpName, char* lpBuffer, DWORD nSize){ + return (DWORD)safe_call(get_environment_variable_fn, lpName, lpBuffer, nSize); +} + +HANDLE DoCreateFileMapping(DWORD dwMaximumSizeHigh, DWORD dwMaximumSizeLow,LPCWSTR lpName) { + SECURITY_ATTRIBUTES sec; + sec.nLength = sizeof(sec); + sec.lpSecurityDescriptor = NULL; + sec.bInheritHandle = true; + LPSECURITY_ATTRIBUTES secptr = &sec; + HANDLE hFileMappingObject = CreateFileMappingSC(INVALID_HANDLE_VALUE, secptr, PAGE_EXECUTE_READWRITE, dwMaximumSizeHigh, dwMaximumSizeLow, lpName); +#ifdef DEBUG + if (hFileMappingObject == INVALID_HANDLE_VALUE) { + //asm volatile ("ud2"); + e9fprintf(stderr, "CreateMemoryMapping failed\n"); + return NULL; + } + else { + //asm volatile ("ud2"); + e9fprintf(stderr, "CreateMemoryMapping succeed, HANDLE ==> %p\n", hFileMappingObject); + return hFileMappingObject; + } +#endif + return hFileMappingObject; +} + +void* DoMapViewOfFileEx(HANDLE hFileMappingObject, DWORD dwFileOffsetHigh, DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap,LPVOID lpBaseAddress) { + void* ret_lpBaseAddress = MapViewOfFileExSC(hFileMappingObject, FILE_MAP_ALL_ACCESS, dwFileOffsetHigh, dwFileOffsetLow, dwNumberOfBytesToMap,lpBaseAddress); +#ifdef DEBUG + if (!ret_lpBaseAddress) { + //asm volatile ("ud2"); + e9fprintf(stderr, "DoMapViewOfFileEx failed. lpBaseAddress ==> %p arg ==> %p\n", ret_lpBaseAddress,lpBaseAddress); + return NULL; + } + else { + //asm volatile ("ud2"); + e9fprintf(stderr, "DoMapViewOfFileEx succeed. lpBaseAddress ==> %p arg ==> %p\n", ret_lpBaseAddress,lpBaseAddress); + return ret_lpBaseAddress; + } +#endif + return ret_lpBaseAddress; +} + +void init(const struct e9_config_s *config) +{ + const struct e9_config_pe_s *config_pe = + (const struct e9_config_pe_s *)(config + 1); + if (safe_call == NULL) + safe_call = config_pe->safe_call; + if (create_file_mapping_fn == NULL) + create_file_mapping_fn = + (create_file_mapping_t)config_pe->get_proc_address( + config_pe->kernel32, "CreateFileMappingA"); + if (map_view_of_file_ex_fn == NULL) + map_view_of_file_ex_fn = + (map_view_of_file_ex_t)config_pe->get_proc_address( + config_pe->kernel32, "MapViewOfFileEx"); + if (get_environment_variable_fn == NULL) + get_environment_variable_fn = + (get_environment_variable_t)config_pe->get_proc_address( + config_pe->kernel32, "GetEnvironmentVariableA"); +#ifdef DEBUG + if (set_console_text_attribute_fn == NULL) + set_console_text_attribute_fn = + (set_console_text_attribute_t)config_pe->get_proc_address( + config_pe->kernel32, "SetConsoleTextAttribute"); + if (write_file_fn == NULL) + write_file_fn = + (write_file_t)config_pe->get_proc_address( + config_pe->kernel32, "WriteFile"); + if (set_console_text_attribute_fn == NULL || write_file_fn == NULL) + asm volatile ("ud2"); + stderr = get_stderr(config); + SetConsoleTextAttribute(stderr, FOREGROUND_WHITE); + e9fprintf(stderr, "set_console_text_attribute_fn: 0x%.16lx\n", set_console_text_attribute_fn); + e9fprintf(stderr, "write_file_fn: 0x%.16lx\n", write_file_fn); + e9fprintf(stderr, "create_file_mapping_fn: 0x%.16lx\n", create_file_mapping_fn); + e9fprintf(stderr, "map_view_of_file_ex_fn: 0x%.16lx\n", map_view_of_file_ex_fn); +#endif + // FIXME: crate a big enough mapping here, but the memoery we used is less than the size. + size_t size = 0x100000; + char path_val[0x40] = "HOPPER_PATH_SHMID_"; + GetEnvironmentVariable("HOPPER_TASK",path_val+18,0x40); + HANDLE handle_area_base = DoCreateFileMapping((u_long) (size >> 32),(u_long) (size & 0xffffffff), (LPCWSTR)path_val); + DoMapViewOfFileEx (handle_area_base, 0, 0, 0, (LPVOID)AREA_BASE); + char instr_val[0x40] = "HOPPER_INSTR_SHMID_"; + GetEnvironmentVariable("HOPPER_TASK",instr_val+19,0x40); + HANDLE handle_instr_area = DoCreateFileMapping((u_long) (size >> 32),(u_long) (size & 0xffffffff),(LPCWSTR)instr_val); + DoMapViewOfFileEx(handle_instr_area, 0, 0, 0, (LPVOID)INSTR_AREA); + *free_ptr = (int64_t)config_pe->get_proc_address(config_pe->user32, "free"); + *malloc_ptr = (int64_t)config_pe->get_proc_address(config_pe->user32, "malloc"); + *calloc_ptr = (int64_t)config_pe->get_proc_address(config_pe->user32, "calloc"); + *realloc_ptr = (int64_t)config_pe->get_proc_address(config_pe->user32, "realloc"); +#ifdef DEBUG + e9fprintf(stderr, "path_val: %s\n", path_val); + e9fprintf(stderr, "handle_area_base handle: 0x%.16lx\n", handle_area_base); + e9fprintf(stderr, "instr_val: %s\n", instr_val); + e9fprintf(stderr, "handle_instr_area handle: 0x%.16lx\n", handle_instr_area); + e9fprintf(stderr, "free_ptr: 0x%.16lx\n", *free_ptr); + e9fprintf(stderr, "malloc_ptr: 0x%.16lx\n", *malloc_ptr); + e9fprintf(stderr, "calloc_ptr: 0x%.16lx\n", *calloc_ptr); + e9fprintf(stderr, "realloc_ptr: 0x%.16lx\n", *realloc_ptr); + e9fprintf(stderr, "===========================================================\n"); +#endif +} \ No newline at end of file diff --git a/hopper-instrument/llvm-mode/Makefile b/hopper-instrument/llvm-mode/Makefile new file mode 100644 index 0000000..c939c03 --- /dev/null +++ b/hopper-instrument/llvm-mode/Makefile @@ -0,0 +1,100 @@ +# +# american fuzzy lop - LLVM instrumentation +# ----------------------------------------- +# +# Written by Laszlo Szekeres and +# Michal Zalewski +# +# LLVM integration design comes from Laszlo Szekeres. +# +# Copyright 2015, 2016 Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +PREFIX ?= install + +LLVM_CONFIG ?= llvm-config + +CFLAGS ?= -O3 -funroll-loops -I./include +CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign + +ifdef AFL_TRACE_PC + CFLAGS += -DUSE_TRACE_PC=1 +endif + +CXXFLAGS ?= -O3 -funroll-loops -I./include +CXXFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ + -Wno-variadic-macros + +# Mark nodelete to work around unload bug in upstream LLVM 5.0+ +CLANG_CFL = `$(LLVM_CONFIG) --cxxflags` -fno-rtti -fpic $(CXXFLAGS) +CLANG_LFL = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS) + +# User teor2345 reports that this is required to make things work on MacOS X. +ifeq "$(shell uname)" "Darwin" + CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress +else + CLANG_LFL += -Wl,-znodelete +endif + +# We were using llvm-config --bindir to get the location of clang, but +# this seems to be busted on some distros, so using the one in $PATH is +# probably better. + +ifeq "$(origin CC)" "default" + CC = clang + CXX = clang++ +endif + +ifndef AFL_TRACE_PC + PROGS = $(PREFIX)/hopper-clang $(PREFIX)/hopper-llvm-pass.so $(PREFIX)/hopper-llvm-rt.o $(PREFIX)/hopper-llvm-rt-32.o $(PREFIX)/hopper-llvm-rt-64.o +else + PROGS = $(PREFIX)/hopper-clang $(PREFIX)/hopper-llvm-rt.o $(PREFIX)/hopper-llvm-rt-32.o $(PREFIX)/hopper-llvm-rt-64.o +endif + +all: test_deps $(PROGS) all_done + +test_deps: + @echo "[*] PREFIX : ${PREFIX}" + @mkdir -p $(PREFIX) +ifndef AFL_TRACE_PC + @echo "[*] Checking for working 'llvm-config'..." + @which $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo " (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 ) +else + @echo "[!] Note: using -fsanitize=trace-pc mode (this will fail with older LLVM)." +endif + @echo "[*] Checking for working '$(CC)'..." + @which $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 ) + @echo "[+] All set and ready to build." + +$(PREFIX)/hopper-clang: hopper-clang.c | test_deps + $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) + ln -sf $(PREFIX)/hopper-clang $(PREFIX)/hopper-clang++ + +$(PREFIX)/hopper-llvm-pass.so: hopper-llvm-pass.cc | test_deps + $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) + +$(PREFIX)/hopper-llvm-rt.o: hopper-llvm-rt.c | test_deps + $(CC) $(CFLAGS) -fPIC -c $< -o $@ + +$(PREFIX)/hopper-llvm-rt-32.o: hopper-llvm-rt.c | test_deps + @printf "[*] Building 32-bit variant of the runtime (-m32)... " + @$(CC) $(CFLAGS) -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + +$(PREFIX)/hopper-llvm-rt-64.o: hopper-llvm-rt.c | test_deps + @printf "[*] Building 64-bit variant of the runtime (-m64)... " + @$(CC) $(CFLAGS) -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + +all_done: + @echo "[+] All done! You can now use 'hopper-clang' to compile programs." + +.NOTPARALLEL: clean + +clean: + rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 + rm -f $(PROGS) $(PREFIX)/afl-clang-fast++ \ No newline at end of file diff --git a/hopper-instrument/llvm-mode/Readme.md b/hopper-instrument/llvm-mode/Readme.md new file mode 100644 index 0000000..523efcb --- /dev/null +++ b/hopper-instrument/llvm-mode/Readme.md @@ -0,0 +1,3 @@ +## LLVM Mode +- LLVM mode is still under development and is not ready to be used. +- our `llvm-mode` instrumentation needs llvm dependencies (>= LLVM 11.0). diff --git a/hopper-instrument/llvm-mode/hopper-clang.c b/hopper-instrument/llvm-mode/hopper-clang.c new file mode 100644 index 0000000..8628019 --- /dev/null +++ b/hopper-instrument/llvm-mode/hopper-clang.c @@ -0,0 +1,323 @@ +/* + Copyright 2015 Google LLC All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + american fuzzy lop - LLVM-mode wrapper for clang + ------------------------------------------------ + + Written by Laszlo Szekeres and + Michal Zalewski + + LLVM integration design comes from Laszlo Szekeres. + + This program is a drop-in replacement for clang, similar in most respects + to ../hopper-gcc. It tries to figure out compilation mode, adds a bunch + of flags, and then calls the real compiler. +*/ + +#define HOPPER_MAIN + +#include "alloc_inl.h" + +#include +#include +#include +#include + +static u8* obj_path; /* Path to runtime libraries */ +static u8** cc_params; /* Parameters passed to the real CC */ +static u32 cc_par_cnt = 1; /* Param count, including argv0 */ + + +/* Try to find the runtime libraries. If that fails, abort. */ + +static void find_obj(u8* argv0) { + + u8 *hopper_path = getenv("HOPPER_PATH"); + u8 *slash, *tmp; + + if (hopper_path) { + + tmp = alloc_printf("%s/hopper-llvm-rt.o", hopper_path); + + if (!access(tmp, R_OK)) { + obj_path = hopper_path; + ck_free(tmp); + return; + } + + ck_free(tmp); + + } + + slash = strrchr(argv0, '/'); + + if (slash) { + + u8 *dir; + + *slash = 0; + dir = ck_strdup(argv0); + *slash = '/'; + + tmp = alloc_printf("%s/hopper-llvm-rt.o", dir); + + if (!access(tmp, R_OK)) { + obj_path = dir; + ck_free(tmp); + return; + } + + ck_free(tmp); + ck_free(dir); + + } + +/* + if (!access(HOPPER_LIB_DIR "/hopper-llvm-rt.o", R_OK)) { + obj_path = HOPPER_LIB_DIR; + return; + } + */ + + FATAL("Unable to find 'hopper-llvm-rt.o' or 'hopper-llvm-pass.so'. Please set HOPPER_PATH"); + +} + + +/* Copy argv to cc_params, making the necessary edits. */ + +static void edit_params(u32 argc, char** argv) { + + u8 fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0; + u8 *name; + + cc_params = ck_alloc((argc + 128) * sizeof(u8*)); + + name = strrchr(argv[0], '/'); + if (!name) name = argv[0]; else name++; + + if (!strcmp(name, "hopper-clang++")) { + u8* alt_cxx = getenv("HOPPER_CXX"); + cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++"; + } else { + u8* alt_cc = getenv("HOPPER_CC"); + cc_params[0] = alt_cc ? alt_cc : (u8*)"clang"; + } + + /* There are two ways to compile hopper-clang. In the traditional mode, we + use hopper-llvm-pass.so to inject instrumentation. In the experimental + 'trace-pc-guard' mode, we use native LLVM instrumentation callbacks + instead. The latter is a very recent addition - see: + + http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards */ + +#ifdef USE_TRACE_PC + cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard"; +#ifndef __ANDROID__ + cc_params[cc_par_cnt++] = "-mllvm"; + cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0"; +#endif +#else + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = alloc_printf("%s/hopper-llvm-pass.so", obj_path); +#endif /* ^USE_TRACE_PC */ + + cc_params[cc_par_cnt++] = "-Qunused-arguments"; + + while (--argc) { + u8* cur = *(++argv); + + if (!strcmp(cur, "-m32")) bit_mode = 32; + if (!strcmp(cur, "armv7a-linux-androideabi")) bit_mode = 32; + if (!strcmp(cur, "-m64")) bit_mode = 64; + + if (!strcmp(cur, "-x")) x_set = 1; + + if (!strcmp(cur, "-fsanitize=address") || + !strcmp(cur, "-fsanitize=memory")) asan_set = 1; + + if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1; + + if (!strcmp(cur, "-Wl,-z,defs") || + !strcmp(cur, "-Wl,--no-undefined")) continue; + + cc_params[cc_par_cnt++] = cur; + + } + + if (getenv("HOPPER_HARDEN")) { + + cc_params[cc_par_cnt++] = "-fstack-protector-all"; + + if (!fortify_set) + cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2"; + + } + + if (!asan_set) { + + if (getenv("HOPPER_USE_ASAN")) { + + if (getenv("HOPPER_USE_MSAN")) + FATAL("ASAN and MSAN are mutually exclusive"); + + if (getenv("HOPPER_HARDEN")) + FATAL("ASAN and HOPPER_HARDEN are mutually exclusive"); + + cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE"; + cc_params[cc_par_cnt++] = "-fsanitize=address"; + + } else if (getenv("HOPPER_USE_MSAN")) { + + if (getenv("HOPPER_USE_ASAN")) + FATAL("ASAN and MSAN are mutually exclusive"); + + if (getenv("HOPPER_HARDEN")) + FATAL("MSAN and HOPPER_HARDEN are mutually exclusive"); + + cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE"; + cc_params[cc_par_cnt++] = "-fsanitize=memory"; + + } + + } + +#ifdef USE_TRACE_PC + + if (getenv("HOPPER_INST_RATIO")) + FATAL("HOPPER_INST_RATIO not available at compile time with 'trace-pc'."); + +#endif /* USE_TRACE_PC */ + + if (!getenv("HOPPER_DONT_OPTIMIZE")) { + + cc_params[cc_par_cnt++] = "-g"; + cc_params[cc_par_cnt++] = "-O3"; + cc_params[cc_par_cnt++] = "-funroll-loops"; + + } + + if (getenv("HOPPER_NO_BUILTIN")) { + + cc_params[cc_par_cnt++] = "-fno-builtin-strcmp"; + cc_params[cc_par_cnt++] = "-fno-builtin-strncmp"; + cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp"; + cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp"; + cc_params[cc_par_cnt++] = "-fno-builtin-memcmp"; + + } + + cc_params[cc_par_cnt++] = "-D__HOPPER_HAVE_MANUAL_CONTROL=1"; + cc_params[cc_par_cnt++] = "-D__HOPPER_COMPILER=1"; + cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"; + + /* When the user tries to use persistent or deferred forkserver modes by + appending a single line to the program, we want to reliably inject a + signature into the binary (to be picked up by hopper-fuzz) and we want + to call a function from the runtime .o file. This is unnecessarily + painful for three reasons: + + 1) We need to convince the compiler not to optimize out the signature. + This is done with __attribute__((used)). + + 2) We need to convince the linker, when called with -Wl,--gc-sections, + not to do the same. This is done by forcing an assignment to a + 'volatile' pointer. + + 3) We need to declare __hopper_persistent_loop() in the global namespace, + but doing this within a method in a class is hard - :: and extern "C" + are forbidden and __attribute__((alias(...))) doesn't work. Hence the + __asm__ aliasing trick. + + */ + + if (x_set) { + cc_params[cc_par_cnt++] = "-x"; + cc_params[cc_par_cnt++] = "none"; + } + +#ifndef __ANDROID__ + switch (bit_mode) { + + case 0: + cc_params[cc_par_cnt++] = alloc_printf("%s/hopper-llvm-rt.o", obj_path); + break; + + case 32: + cc_params[cc_par_cnt++] = alloc_printf("%s/hopper-llvm-rt-32.o", obj_path); + + if (access(cc_params[cc_par_cnt - 1], R_OK)) + FATAL("-m32 is not supported by your compiler"); + + break; + + case 64: + cc_params[cc_par_cnt++] = alloc_printf("%s/hopper-llvm-rt-64.o", obj_path); + + if (access(cc_params[cc_par_cnt - 1], R_OK)) + FATAL("-m64 is not supported by your compiler"); + + break; + + } +#endif + + cc_params[cc_par_cnt] = NULL; + +} + + +/* Main entry point */ + +int main(int argc, char** argv) { + + if (argc < 2) { + + SAYF("\n" + "This is a helper application for hopper-fuzz. It serves as a drop-in replacement\n" + "for clang, letting you recompile third-party code with the required runtime\n" + "instrumentation. A common use pattern would be one of the following:\n\n" + + " CC=hopper-clang ./configure\n" + " CXX=hopper-clang++ ./configure\n\n" + + "In contrast to the traditional hopper-clang tool, this version is implemented as\n" + "an LLVM pass and tends to offer improved performance with slow programs.\n\n" + + "You can specify custom next-stage toolchain via HOPPER_CC and HOPPER_CXX. Setting\n" + "HOPPER_HARDEN enables hardening optimizations in the compiled code.\n\n"); + + exit(1); + + } + + +#ifndef __ANDROID__ + find_obj(argv[0]); +#endif + + edit_params(argc, argv); + + execvp(cc_params[0], (char**)cc_params); + + FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]); + + return 0; + +} \ No newline at end of file diff --git a/hopper-instrument/llvm-mode/hopper-early-pass.cc b/hopper-instrument/llvm-mode/hopper-early-pass.cc new file mode 100644 index 0000000..2137ec4 --- /dev/null +++ b/hopper-instrument/llvm-mode/hopper-early-pass.cc @@ -0,0 +1,136 @@ +/* + Make optimization fail for branches + e.g + if (x == 1 & y == 1) {} + => + if (x==1) { + if (y == 1) {} + } + */ + +#include "debug.h" + +#include +#include +#include + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +using namespace llvm; + +namespace { + +class UnfoldBranch : public FunctionPass { +private: + Type *VoidTy; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + + Constant *UnfoldBranchFn; + +public: + static char ID; + + UnfoldBranch() : FunctionPass(ID) {} + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + bool runOnFunction(Function &F) override; +}; + +} // namespace + +char UnfoldBranch::ID = 0; + +bool UnfoldBranch::doInitialization(Module &M) { + + LLVMContext &C = M.getContext(); + + Int8Ty = IntegerType::getInt8Ty(C); + Int32Ty = IntegerType::getInt32Ty(C); + VoidTy = Type::getVoidTy(C); + + srandom(1851655); + + Type *FnArgs[1] = {Int32Ty}; + FunctionType *FnTy = FunctionType::get(VoidTy, FnArgs, /*isVarArg=*/false); + UnfoldBranchFn = M.getOrInsertFunction("__unfold_branch_fn", FnTy); + + if (Function *F = dyn_cast(UnfoldBranchFn)) { + F->addAttribute(LLVM_ATTRIBUTE_LIST::FunctionIndex, Attribute::NoUnwind); + } + return true; +} + +bool UnfoldBranch::doFinalization(Module &M) { return true; } + +bool UnfoldBranch::runOnFunction(Function &F) { + + // if the function is declaration, ignore + if (F.isDeclaration()) + return false; + +#ifndef ENABLE_UNFOLD_BRANCH + return false; +#endif + + SmallSet VisitedBB; + LLVMContext &C = F.getContext(); + for (auto &BB : F) { + + Instruction *Inst = BB.getTerminator(); + if (isa(Inst)) { + + BranchInst *BI = dyn_cast(Inst); + + if (BI->isUnconditional() || BI->getNumSuccessors() < 2) + continue; + + Value *Cond = BI->getCondition(); + if (!Cond) + continue; + + for (unsigned int i = 0; i < BI->getNumSuccessors(); i++) { + BasicBlock *B0 = BI->getSuccessor(i); + if (B0 && VisitedBB.count(B0) == 0) { + VisitedBB.insert(B0); + BasicBlock::iterator IP = B0->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + unsigned int cur_loc = RRR(1048576); + CallInst *Call = IRB.CreateCall(UnfoldBranchFn, + {ConstantInt::get(Int32Ty, cur_loc)}); + Call->setMetadata(C.getMDKindID("unfold"), MDNode::get(C, None)); + } + } + } + } + + return true; +} + +static void registerUnfoldBranchPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new UnfoldBranch()); +} + +static RegisterPass X("unfold_branch_pass", "Unfold Branch Pass"); + +static RegisterStandardPasses + RegisterAFLPass(PassManagerBuilder::EP_EarlyAsPossible, + registerUnfoldBranchPass); + +/* +static RegisterStandardPasses RegisterAFLPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); +*/ diff --git a/hopper-instrument/llvm-mode/hopper-llvm-pass.cc b/hopper-instrument/llvm-mode/hopper-llvm-pass.cc new file mode 100644 index 0000000..f26162e --- /dev/null +++ b/hopper-instrument/llvm-mode/hopper-llvm-pass.cc @@ -0,0 +1,166 @@ +/* + Copyright 2015 Google LLC All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + american fuzzy lop - LLVM-mode instrumentation pass + --------------------------------------------------- + + Written by Laszlo Szekeres and + Michal Zalewski + + LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted + from afl-as.c are Michal's fault. + + This library is plugged into LLVM when invoking clang through afl-clang-fast. + It tells the compiler to add code roughly equivalent to the bits discussed + in ../afl-as.h. +*/ + +#define AFL_LLVM_PASS + +#include "debug.h" +#include "config.h" + +#include +#include +#include + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +using namespace llvm; + +namespace { + + class AFLCoverage : public ModulePass { + public: + static char ID; + AFLCoverage() : ModulePass(ID) { } + bool runOnModule(Module &M) override; + }; + +} + +char AFLCoverage::ID = 0; + +bool AFLCoverage::runOnModule(Module &M) { + + LLVMContext &C = M.getContext(); + + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + Type *Int8PtrTy = PointerType::get(Int8Ty, 0); + /* Decide instrumentation ratio */ + + char* inst_ratio_str = getenv("AFL_INST_RATIO"); + unsigned int inst_ratio = 100; + + if (inst_ratio_str) { + if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || + inst_ratio > 100) + FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)"); + } + + /* Get globals for the SHM region and the previous location. Note that + __afl_prev_loc is thread-local. */ + GlobalVariable *AFLMapPtr = + new GlobalVariable(M, Int8PtrTy, false, + GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + + GlobalVariable *AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", + 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); + + /* Instrument all the things! */ + + int inst_blocks = 0; + + for (auto &F : M) { + for (auto &BB : F) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + if (RR(100) >= inst_ratio) continue; + + /* Make up cur_loc */ + + unsigned int cur_loc = RR(MAP_SIZE); + + ConstantInt *CurLoc = ConstantInt::get(Int32Ty, cur_loc); + + /* Load prev_loc */ + + LoadInst *PrevLoc = IRB.CreateLoad(Int32Ty, AFLPrevLoc); + PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); + + /* Load SHM pointer */ + + LoadInst *MapPtr = IRB.CreateLoad(Int8PtrTy, AFLMapPtr); + MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *MapPtrIdx = + IRB.CreateGEP(Int8Ty, MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc)); + + /* Update bitmap */ + + LoadInst *Counter = IRB.CreateLoad(Int8Ty, MapPtrIdx); + Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); + IRB.CreateStore(Incr, MapPtrIdx) + ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + /* Set prev_loc to cur_loc >> 1 */ + + StoreInst *Store = + IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + inst_blocks++; + + } + } + + /* Say something nice. */ + if (!inst_blocks) WARNF("No instrumentation targets found."); + else OKF("Instrumented %u locations (%s mode, ratio %u%%).", + inst_blocks, getenv("AFL_HARDEN") ? "hardened" : + ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? + "ASAN/MSAN" : "non-hardened"), inst_ratio); + + + return true; + +} + + +static void registerAFLPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new AFLCoverage()); + +} + + +static RegisterStandardPasses RegisterAFLPass( + PassManagerBuilder::EP_ModuleOptimizerEarly, registerAFLPass); + +static RegisterStandardPasses RegisterAFLPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); \ No newline at end of file diff --git a/hopper-instrument/llvm-mode/hopper-llvm-rt.c b/hopper-instrument/llvm-mode/hopper-llvm-rt.c new file mode 100644 index 0000000..58268d6 --- /dev/null +++ b/hopper-instrument/llvm-mode/hopper-llvm-rt.c @@ -0,0 +1,102 @@ +/* + Copyright 2015 Google LLC All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + american fuzzy lop - LLVM instrumentation bootstrap + --------------------------------------------------- + + Written by Laszlo Szekeres and + Michal Zalewski + + LLVM integration design comes from Laszlo Szekeres. + + This code is the rewrite of afl-as.h's main_payload. +*/ + +#include "config.h" +#include + +/* This is a somewhat ugly hack for the experimental 'trace-pc-guard' mode. + Basically, we need to make sure that the forkserver is initialized after + the LLVM-generated runtime initialization pass, not before. */ + +#ifdef USE_TRACE_PC +# define CONST_PRIO 5 +#else +# define CONST_PRIO 0 +#endif /* ^USE_TRACE_PC */ + +char* __hopper_llvm_mark = "HOOPER_LLVM_MARK"; + +/* Globals needed by the injected instrumentation. The __afl_area_initial region + is used for instrumentation output before __afl_map_shm() has a chance to run. + It will end up as .comm, so it shouldn't be too wasteful. */ + +u8 __afl_area_initial[MAP_SIZE]; +u8* __afl_area_ptr = __afl_area_initial; + +__thread u32 __afl_prev_loc; + +void __hopper_update_shm_addr(u8* addr) { + __afl_area_ptr = addr; +} + +/* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard. + It remains non-operational in the traditional, plugin-backed LLVM mode. + For more info about 'trace-pc-guard', see README.llvm. + + The first function (__sanitizer_cov_trace_pc_guard) is called back on every + edge (as opposed to every basic block). */ + +void __sanitizer_cov_trace_pc_guard(uint32_t* guard) { + __afl_area_ptr[*guard]++; +} + +/* Init callback. Populates instrumentation IDs. Note that we're using + ID of 0 as a special value to indicate non-instrumented bits. That may + still touch the bitmap, but in a fairly harmless way. */ + +void __sanitizer_cov_trace_pc_guard_init(uint32_t* start, uint32_t* stop) { + + u32 inst_ratio = 100; + u8* x; + + if (start == stop || *start) return; + + x = getenv("AFL_INST_RATIO"); + if (x) inst_ratio = atoi(x); + + if (!inst_ratio || inst_ratio > 100) { + fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n"); + abort(); + } + + /* Make sure that the first element in the range is always set - we use that + to avoid duplicate calls (which can happen as an artifact of the underlying + implementation in LLVM). */ + + *(start++) = RR(MAP_SIZE - 1) + 1; + + while (start < stop) { + + if (RR(100) < inst_ratio) *start = RR(MAP_SIZE - 1) + 1; + else *start = 0; + + start++; + + } + +} \ No newline at end of file diff --git a/hopper-instrument/llvm-mode/include/alloc_inl.h b/hopper-instrument/llvm-mode/include/alloc_inl.h new file mode 100644 index 0000000..2e7bcbd --- /dev/null +++ b/hopper-instrument/llvm-mode/include/alloc_inl.h @@ -0,0 +1,570 @@ +/* + american fuzzy lop - error-checking, memory-zeroing alloc routines + ------------------------------------------------------------------ + + Written and maintained by Michal Zalewski + + Copyright 2013, 2014, 2015 Google Inc. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This allocator is not designed to resist malicious attackers (the canaries + are small and predictable), but provides a robust and portable way to detect + use-after-free, off-by-one writes, stale pointers, and so on. + + */ + +#ifndef _HAVE_ALLOC_INL_H +#define _HAVE_ALLOC_INL_H + +#include +#include +#include + +#include "debug.h" + +#define MAX_ALLOC 0x40000000 + +/* User-facing macro to sprintf() to a dynamically allocated buffer. */ + +#define alloc_printf(_str...) ({ \ + u8* _tmp; \ + s32 _len = snprintf(NULL, 0, _str); \ + if (_len < 0) FATAL("Whoa, snprintf() fails?!"); \ + _tmp = ck_alloc(_len + 1); \ + snprintf((char*)_tmp, _len + 1, _str); \ + _tmp; \ + }) + +/* Macro to enforce allocation limits as a last-resort defense against + integer overflows. */ + +#define ALLOC_CHECK_SIZE(_s) do { \ + if ((_s) > MAX_ALLOC) \ + ABORT("Bad alloc request: %u bytes", (_s)); \ + } while (0) + +/* Macro to check malloc() failures and the like. */ + +#define ALLOC_CHECK_RESULT(_r, _s) do { \ + if (!(_r)) \ + ABORT("Out of memory: can't allocate %u bytes", (_s)); \ + } while (0) + +/* Magic tokens used to mark used / freed chunks. */ + +#define ALLOC_MAGIC_C1 0xFF00FF00 /* Used head (dword) */ +#define ALLOC_MAGIC_F 0xFE00FE00 /* Freed head (dword) */ +#define ALLOC_MAGIC_C2 0xF0 /* Used tail (byte) */ + +/* Positions of guard tokens in relation to the user-visible pointer. */ + +#define ALLOC_C1(_ptr) (((u32*)(_ptr))[-2]) +#define ALLOC_S(_ptr) (((u32*)(_ptr))[-1]) +#define ALLOC_C2(_ptr) (((u8*)(_ptr))[ALLOC_S(_ptr)]) + +#define ALLOC_OFF_HEAD 8 +#define ALLOC_OFF_TOTAL (ALLOC_OFF_HEAD + 1) + +/* Allocator increments for ck_realloc_block(). */ + +#define ALLOC_BLK_INC 256 + +/* Sanity-checking macros for pointers. */ + +#define CHECK_PTR(_p) do { \ + if (_p) { \ + if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\ + if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \ + ABORT("Use after free."); \ + else ABORT("Corrupted head alloc canary."); \ + } \ + if (ALLOC_C2(_p) ^ ALLOC_MAGIC_C2) \ + ABORT("Corrupted tail alloc canary."); \ + } \ + } while (0) + +#define CHECK_PTR_EXPR(_p) ({ \ + typeof (_p) _tmp = (_p); \ + CHECK_PTR(_tmp); \ + _tmp; \ + }) + + +/* Allocate a buffer, explicitly not zeroing it. Returns NULL for zero-sized + requests. */ + +static inline void* DFL_ck_alloc_nozero(u32 size) { + + void* ret; + + if (!size) return NULL; + + ALLOC_CHECK_SIZE(size); + ret = malloc(size + ALLOC_OFF_TOTAL); + ALLOC_CHECK_RESULT(ret, size); + + ret += ALLOC_OFF_HEAD; + + ALLOC_C1(ret) = ALLOC_MAGIC_C1; + ALLOC_S(ret) = size; + ALLOC_C2(ret) = ALLOC_MAGIC_C2; + + return ret; + +} + + +/* Allocate a buffer, returning zeroed memory. */ + +static inline void* DFL_ck_alloc(u32 size) { + + void* mem; + + if (!size) return NULL; + mem = DFL_ck_alloc_nozero(size); + + return memset(mem, 0, size); + +} + + +/* Free memory, checking for double free and corrupted heap. When DEBUG_BUILD + is set, the old memory will be also clobbered with 0xFF. */ + +static inline void DFL_ck_free(void* mem) { + + if (!mem) return; + + CHECK_PTR(mem); + +#ifdef DEBUG_BUILD + + /* Catch pointer issues sooner. */ + memset(mem, 0xFF, ALLOC_S(mem)); + +#endif /* DEBUG_BUILD */ + + ALLOC_C1(mem) = ALLOC_MAGIC_F; + + free(mem - ALLOC_OFF_HEAD); + +} + + +/* Re-allocate a buffer, checking for issues and zeroing any newly-added tail. + With DEBUG_BUILD, the buffer is always reallocated to a new addresses and the + old memory is clobbered with 0xFF. */ + +static inline void* DFL_ck_realloc(void* orig, u32 size) { + + void* ret; + u32 old_size = 0; + + if (!size) { + + DFL_ck_free(orig); + return NULL; + + } + + if (orig) { + + CHECK_PTR(orig); + +#ifndef DEBUG_BUILD + ALLOC_C1(orig) = ALLOC_MAGIC_F; +#endif /* !DEBUG_BUILD */ + + old_size = ALLOC_S(orig); + orig -= ALLOC_OFF_HEAD; + + ALLOC_CHECK_SIZE(old_size); + + } + + ALLOC_CHECK_SIZE(size); + +#ifndef DEBUG_BUILD + + ret = realloc(orig, size + ALLOC_OFF_TOTAL); + ALLOC_CHECK_RESULT(ret, size); + +#else + + /* Catch pointer issues sooner: force relocation and make sure that the + original buffer is wiped. */ + + ret = malloc(size + ALLOC_OFF_TOTAL); + ALLOC_CHECK_RESULT(ret, size); + + if (orig) { + + memcpy(ret + ALLOC_OFF_HEAD, orig + ALLOC_OFF_HEAD, MIN(size, old_size)); + memset(orig + ALLOC_OFF_HEAD, 0xFF, old_size); + + ALLOC_C1(orig + ALLOC_OFF_HEAD) = ALLOC_MAGIC_F; + + free(orig); + + } + +#endif /* ^!DEBUG_BUILD */ + + ret += ALLOC_OFF_HEAD; + + ALLOC_C1(ret) = ALLOC_MAGIC_C1; + ALLOC_S(ret) = size; + ALLOC_C2(ret) = ALLOC_MAGIC_C2; + + if (size > old_size) + memset(ret + old_size, 0, size - old_size); + + return ret; + +} + + +/* Re-allocate a buffer with ALLOC_BLK_INC increments (used to speed up + repeated small reallocs without complicating the user code). */ + +static inline void* DFL_ck_realloc_block(void* orig, u32 size) { + +#ifndef DEBUG_BUILD + + if (orig) { + + CHECK_PTR(orig); + + if (ALLOC_S(orig) >= size) return orig; + + size += ALLOC_BLK_INC; + + } + +#endif /* !DEBUG_BUILD */ + + return DFL_ck_realloc(orig, size); + +} + + +/* Create a buffer with a copy of a string. Returns NULL for NULL inputs. */ + +static inline u8* DFL_ck_strdup(u8* str) { + + void* ret; + u32 size; + + if (!str) return NULL; + + size = strlen((char*)str) + 1; + + ALLOC_CHECK_SIZE(size); + ret = malloc(size + ALLOC_OFF_TOTAL); + ALLOC_CHECK_RESULT(ret, size); + + ret += ALLOC_OFF_HEAD; + + ALLOC_C1(ret) = ALLOC_MAGIC_C1; + ALLOC_S(ret) = size; + ALLOC_C2(ret) = ALLOC_MAGIC_C2; + + return memcpy(ret, str, size); + +} + + +/* Create a buffer with a copy of a memory block. Returns NULL for zero-sized + or NULL inputs. */ + +static inline void* DFL_ck_memdup(void* mem, u32 size) { + + void* ret; + + if (!mem || !size) return NULL; + + ALLOC_CHECK_SIZE(size); + ret = malloc(size + ALLOC_OFF_TOTAL); + ALLOC_CHECK_RESULT(ret, size); + + ret += ALLOC_OFF_HEAD; + + ALLOC_C1(ret) = ALLOC_MAGIC_C1; + ALLOC_S(ret) = size; + ALLOC_C2(ret) = ALLOC_MAGIC_C2; + + return memcpy(ret, mem, size); + +} + + +/* Create a buffer with a block of text, appending a NUL terminator at the end. + Returns NULL for zero-sized or NULL inputs. */ + +static inline u8* DFL_ck_memdup_str(u8* mem, u32 size) { + + u8* ret; + + if (!mem || !size) return NULL; + + ALLOC_CHECK_SIZE(size); + ret = malloc(size + ALLOC_OFF_TOTAL + 1); + ALLOC_CHECK_RESULT(ret, size); + + ret += ALLOC_OFF_HEAD; + + ALLOC_C1(ret) = ALLOC_MAGIC_C1; + ALLOC_S(ret) = size; + ALLOC_C2(ret) = ALLOC_MAGIC_C2; + + memcpy(ret, mem, size); + ret[size] = 0; + + return ret; + +} + + +#ifndef DEBUG_BUILD + +/* In non-debug mode, we just do straightforward aliasing of the above functions + to user-visible names such as ck_alloc(). */ + +#define ck_alloc DFL_ck_alloc +#define ck_alloc_nozero DFL_ck_alloc_nozero +#define ck_realloc DFL_ck_realloc +#define ck_realloc_block DFL_ck_realloc_block +#define ck_strdup DFL_ck_strdup +#define ck_memdup DFL_ck_memdup +#define ck_memdup_str DFL_ck_memdup_str +#define ck_free DFL_ck_free + +#define alloc_report() + +#else + +/* In debugging mode, we also track allocations to detect memory leaks, and the + flow goes through one more layer of indirection. */ + +/* Alloc tracking data structures: */ + +#define ALLOC_BUCKETS 4096 + +struct TRK_obj { + void *ptr; + char *file, *func; + u32 line; +}; + +#ifdef AFL_MAIN + +struct TRK_obj* TRK[ALLOC_BUCKETS]; +u32 TRK_cnt[ALLOC_BUCKETS]; + +# define alloc_report() TRK_report() + +#else + +extern struct TRK_obj* TRK[ALLOC_BUCKETS]; +extern u32 TRK_cnt[ALLOC_BUCKETS]; + +# define alloc_report() + +#endif /* ^AFL_MAIN */ + +/* Bucket-assigning function for a given pointer: */ + +#define TRKH(_ptr) (((((u32)(_ptr)) >> 16) ^ ((u32)(_ptr))) % ALLOC_BUCKETS) + + +/* Add a new entry to the list of allocated objects. */ + +static inline void TRK_alloc_buf(void* ptr, const char* file, const char* func, + u32 line) { + + u32 i, bucket; + + if (!ptr) return; + + bucket = TRKH(ptr); + + /* Find a free slot in the list of entries for that bucket. */ + + for (i = 0; i < TRK_cnt[bucket]; i++) + + if (!TRK[bucket][i].ptr) { + + TRK[bucket][i].ptr = ptr; + TRK[bucket][i].file = (char*)file; + TRK[bucket][i].func = (char*)func; + TRK[bucket][i].line = line; + return; + + } + + /* No space available - allocate more. */ + + TRK[bucket] = DFL_ck_realloc_block(TRK[bucket], + (TRK_cnt[bucket] + 1) * sizeof(struct TRK_obj)); + + TRK[bucket][i].ptr = ptr; + TRK[bucket][i].file = (char*)file; + TRK[bucket][i].func = (char*)func; + TRK[bucket][i].line = line; + + TRK_cnt[bucket]++; + +} + + +/* Remove entry from the list of allocated objects. */ + +static inline void TRK_free_buf(void* ptr, const char* file, const char* func, + u32 line) { + + u32 i, bucket; + + if (!ptr) return; + + bucket = TRKH(ptr); + + /* Find the element on the list... */ + + for (i = 0; i < TRK_cnt[bucket]; i++) + + if (TRK[bucket][i].ptr == ptr) { + + TRK[bucket][i].ptr = 0; + return; + + } + + WARNF("ALLOC: Attempt to free non-allocated memory in %s (%s:%u)", + func, file, line); + +} + + +/* Do a final report on all non-deallocated objects. */ + +static inline void TRK_report(void) { + + u32 i, bucket; + + fflush(0); + + for (bucket = 0; bucket < ALLOC_BUCKETS; bucket++) + for (i = 0; i < TRK_cnt[bucket]; i++) + if (TRK[bucket][i].ptr) + WARNF("ALLOC: Memory never freed, created in %s (%s:%u)", + TRK[bucket][i].func, TRK[bucket][i].file, TRK[bucket][i].line); + +} + + +/* Simple wrappers for non-debugging functions: */ + +static inline void* TRK_ck_alloc(u32 size, const char* file, const char* func, + u32 line) { + + void* ret = DFL_ck_alloc(size); + TRK_alloc_buf(ret, file, func, line); + return ret; + +} + + +static inline void* TRK_ck_realloc(void* orig, u32 size, const char* file, + const char* func, u32 line) { + + void* ret = DFL_ck_realloc(orig, size); + TRK_free_buf(orig, file, func, line); + TRK_alloc_buf(ret, file, func, line); + return ret; + +} + + +static inline void* TRK_ck_realloc_block(void* orig, u32 size, const char* file, + const char* func, u32 line) { + + void* ret = DFL_ck_realloc_block(orig, size); + TRK_free_buf(orig, file, func, line); + TRK_alloc_buf(ret, file, func, line); + return ret; + +} + + +static inline void* TRK_ck_strdup(u8* str, const char* file, const char* func, + u32 line) { + + void* ret = DFL_ck_strdup(str); + TRK_alloc_buf(ret, file, func, line); + return ret; + +} + + +static inline void* TRK_ck_memdup(void* mem, u32 size, const char* file, + const char* func, u32 line) { + + void* ret = DFL_ck_memdup(mem, size); + TRK_alloc_buf(ret, file, func, line); + return ret; + +} + + +static inline void* TRK_ck_memdup_str(void* mem, u32 size, const char* file, + const char* func, u32 line) { + + void* ret = DFL_ck_memdup_str(mem, size); + TRK_alloc_buf(ret, file, func, line); + return ret; + +} + + +static inline void TRK_ck_free(void* ptr, const char* file, + const char* func, u32 line) { + + TRK_free_buf(ptr, file, func, line); + DFL_ck_free(ptr); + +} + +/* Aliasing user-facing names to tracking functions: */ + +#define ck_alloc(_p1) \ + TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__) + +#define ck_alloc_nozero(_p1) \ + TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__) + +#define ck_realloc(_p1, _p2) \ + TRK_ck_realloc(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) + +#define ck_realloc_block(_p1, _p2) \ + TRK_ck_realloc_block(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) + +#define ck_strdup(_p1) \ + TRK_ck_strdup(_p1, __FILE__, __FUNCTION__, __LINE__) + +#define ck_memdup(_p1, _p2) \ + TRK_ck_memdup(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) + +#define ck_memdup_str(_p1, _p2) \ + TRK_ck_memdup_str(_p1, _p2, __FILE__, __FUNCTION__, __LINE__) + +#define ck_free(_p1) \ + TRK_ck_free(_p1, __FILE__, __FUNCTION__, __LINE__) + +#endif /* ^!DEBUG_BUILD */ + +#endif /* ! _HAVE_ALLOC_INL_H */ diff --git a/hopper-instrument/llvm-mode/include/config.h b/hopper-instrument/llvm-mode/include/config.h new file mode 100644 index 0000000..9063c31 --- /dev/null +++ b/hopper-instrument/llvm-mode/include/config.h @@ -0,0 +1,25 @@ +#ifndef _HAVE_LLVM_CONFIG_H +#define _HAVE_LLVM_CONFIG_H + +#ifndef MAP_SIZE_POW2 +#define MAP_SIZE_POW2 16 +#endif +#define MAP_SIZE ((size_t)1 << MAP_SIZE_POW2) + +#define ENABLE_UNFOLD_BRANCH 1 + +#ifndef RR +#define RR(x) (random() % (x)) +#endif + +#include +#include +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +#endif /* ! _HAVE_DEFS_H */ \ No newline at end of file diff --git a/hopper-instrument/llvm-mode/include/debug.h b/hopper-instrument/llvm-mode/include/debug.h new file mode 100644 index 0000000..0ab108e --- /dev/null +++ b/hopper-instrument/llvm-mode/include/debug.h @@ -0,0 +1,249 @@ +/* + american fuzzy lop - debug / error handling macros + -------------------------------------------------- + + Written and maintained by Michal Zalewski + + Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + */ + +#ifndef _HAVE_DEBUG_H +#define _HAVE_DEBUG_H + +#include +#include "config.h" + +/******************* + * Terminal colors * + *******************/ + +#ifdef USE_COLOR + +# define cBLK "\x1b[0;30m" +# define cRED "\x1b[0;31m" +# define cGRN "\x1b[0;32m" +# define cBRN "\x1b[0;33m" +# define cBLU "\x1b[0;34m" +# define cMGN "\x1b[0;35m" +# define cCYA "\x1b[0;36m" +# define cLGR "\x1b[0;37m" +# define cGRA "\x1b[1;90m" +# define cLRD "\x1b[1;91m" +# define cLGN "\x1b[1;92m" +# define cYEL "\x1b[1;93m" +# define cLBL "\x1b[1;94m" +# define cPIN "\x1b[1;95m" +# define cLCY "\x1b[1;96m" +# define cBRI "\x1b[1;97m" +# define cRST "\x1b[0m" + +# define bgBLK "\x1b[40m" +# define bgRED "\x1b[41m" +# define bgGRN "\x1b[42m" +# define bgBRN "\x1b[43m" +# define bgBLU "\x1b[44m" +# define bgMGN "\x1b[45m" +# define bgCYA "\x1b[46m" +# define bgLGR "\x1b[47m" +# define bgGRA "\x1b[100m" +# define bgLRD "\x1b[101m" +# define bgLGN "\x1b[102m" +# define bgYEL "\x1b[103m" +# define bgLBL "\x1b[104m" +# define bgPIN "\x1b[105m" +# define bgLCY "\x1b[106m" +# define bgBRI "\x1b[107m" + +#else + +# define cBLK "" +# define cRED "" +# define cGRN "" +# define cBRN "" +# define cBLU "" +# define cMGN "" +# define cCYA "" +# define cLGR "" +# define cGRA "" +# define cLRD "" +# define cLGN "" +# define cYEL "" +# define cLBL "" +# define cPIN "" +# define cLCY "" +# define cBRI "" +# define cRST "" + +# define bgBLK "" +# define bgRED "" +# define bgGRN "" +# define bgBRN "" +# define bgBLU "" +# define bgMGN "" +# define bgCYA "" +# define bgLGR "" +# define bgGRA "" +# define bgLRD "" +# define bgLGN "" +# define bgYEL "" +# define bgLBL "" +# define bgPIN "" +# define bgLCY "" +# define bgBRI "" + +#endif /* ^USE_COLOR */ + +/************************* + * Box drawing sequences * + *************************/ + +#ifdef FANCY_BOXES + +# define SET_G1 "\x1b)0" /* Set G1 for box drawing */ +# define RESET_G1 "\x1b)B" /* Reset G1 to ASCII */ +# define bSTART "\x0e" /* Enter G1 drawing mode */ +# define bSTOP "\x0f" /* Leave G1 drawing mode */ +# define bH "q" /* Horizontal line */ +# define bV "x" /* Vertical line */ +# define bLT "l" /* Left top corner */ +# define bRT "k" /* Right top corner */ +# define bLB "m" /* Left bottom corner */ +# define bRB "j" /* Right bottom corner */ +# define bX "n" /* Cross */ +# define bVR "t" /* Vertical, branch right */ +# define bVL "u" /* Vertical, branch left */ +# define bHT "v" /* Horizontal, branch top */ +# define bHB "w" /* Horizontal, branch bottom */ + +#else + +# define SET_G1 "" +# define RESET_G1 "" +# define bSTART "" +# define bSTOP "" +# define bH "-" +# define bV "|" +# define bLT "+" +# define bRT "+" +# define bLB "+" +# define bRB "+" +# define bX "+" +# define bVR "+" +# define bVL "+" +# define bHT "+" +# define bHB "+" + +#endif /* ^FANCY_BOXES */ + +/*********************** + * Misc terminal codes * + ***********************/ + +#define TERM_HOME "\x1b[H" +#define TERM_CLEAR TERM_HOME "\x1b[2J" +#define cEOL "\x1b[0K" +#define CURSOR_HIDE "\x1b[?25l" +#define CURSOR_SHOW "\x1b[?25h" + +/************************ + * Debug & error macros * + ************************/ + +/* Just print stuff to the appropriate stream. */ + +#ifdef MESSAGES_TO_STDOUT +# define SAYF(x...) printf(x) +#else +# define SAYF(x...) fprintf(stderr, x) +#endif /* ^MESSAGES_TO_STDOUT */ + +/* Show a prefixed warning. */ + +#define WARNF(x...) do { \ + SAYF(cYEL "[!] " cBRI "WARNING: " cRST x); \ + SAYF(cRST "\n"); \ + } while (0) + +/* Show a prefixed "doing something" message. */ + +#define ACTF(x...) do { \ + SAYF(cLBL "[*] " cRST x); \ + SAYF(cRST "\n"); \ + } while (0) + +/* Show a prefixed "success" message. */ + +#define OKF(x...) do { \ + SAYF(cLGN "[+] " cRST x); \ + SAYF(cRST "\n"); \ + } while (0) + +/* Show a prefixed fatal error message (not used in afl). */ + +#define BADF(x...) do { \ + SAYF(cLRD "\n[-] " cRST x); \ + SAYF(cRST "\n"); \ + } while (0) + +/* Die with a verbose non-OS fatal error message. */ + +#define FATAL(x...) do { \ + SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ + cBRI x); \ + SAYF(cLRD "\n Location : " cRST "%s(), %s:%u\n\n", \ + __FUNCTION__, __FILE__, __LINE__); \ + exit(1); \ + } while (0) + +/* Die by calling abort() to provide a core dump. */ + +#define ABORT(x...) do { \ + SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ + cBRI x); \ + SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n\n", \ + __FUNCTION__, __FILE__, __LINE__); \ + abort(); \ + } while (0) + +/* Die while also including the output of perror(). */ + +#define PFATAL(x...) do { \ + fflush(stdout); \ + SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] SYSTEM ERROR : " \ + cBRI x); \ + SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n", \ + __FUNCTION__, __FILE__, __LINE__); \ + SAYF(cLRD " OS message : " cRST "%s\n", strerror(errno)); \ + exit(1); \ + } while (0) + +/* Die with FAULT() or PFAULT() depending on the value of res (used to + interpret different failure modes for read(), write(), etc). */ + +#define RPFATAL(res, x...) do { \ + if (res < 0) PFATAL(x); else FATAL(x); \ + } while (0) + +/* Error-checking versions of read() and write() that call RPFATAL() as + appropriate. */ + +#define ck_write(fd, buf, len, fn) do { \ + u32 _len = (len); \ + s32 _res = write(fd, buf, _len); \ + if (_res != _len) RPFATAL(_res, "Short write to %s", fn); \ + } while (0) + +#define ck_read(fd, buf, len, fn) do { \ + u32 _len = (len); \ + s32 _res = read(fd, buf, _len); \ + if (_res != _len) RPFATAL(_res, "Short read from %s", fn); \ + } while (0) + +#endif /* ! _HAVE_DEBUG_H */ diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..2ec76a5 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,11 @@ +# https://github.com/rust-lang/rustfmt/blob/master/Configurations.md +tab_spaces = 4 +edition = "2021" +# max_width = 100 +use_small_heuristics = "Default" +# indent_style = "Block" +# combine_control_expr = false +# brace_style = "SameLineWhere" +# control_brace_style = "AlwaysSameLine" +# merge_imports = true +# match_block_trailing_comma = true diff --git a/testsuite/README.md b/testsuite/README.md new file mode 100644 index 0000000..1de312e --- /dev/null +++ b/testsuite/README.md @@ -0,0 +1,59 @@ +# Testsuite for Hopper + +Testsuite includes some simple functions that wrote by developers artificially for testing. + +## Build and run testsuite + +- build and run all tests +``` +./test.sh build_all +./test.sh test_all +``` + +- build and run specific test case +```sh +# build library under `basic` directory +./test.sh build basic +# test specific function under `basic`` directory +./test.sh test basic test_cmp_var +``` + +If test success (find any crash in N rounds, N is defined in test.sh), the script will print `test success`. +otherwise, it will print `test fail`. + +## How to write testcase + +If you want to define a new library, you should create a directory (e.g. test), and it should has following files: +- test.c +- test.h +- custom.rule + + +If you just want to add test case in an exsited library. +- Define the *Entry* function by adding a function whose name is starts with `test_`. The *Entry* function could be *crash* by specific inputs. +- Define the *TOOL* function if needed by adding a function whose name is starts with `util_`. The *Tool* functions is used for providing or mutating arguments for *Entry* functions. +- Define the dependencies between functions. Just add a comment starts with `// depend: ` in the header file. + +```c +void util_set_gval(); +// depend: util_set_gval +void test_use_gval(int num); +``` + +- If you want to ignore a `test_*` function in `test` command. Just add a comment with *ignore* above its declaration. +```c +// ignore +void test_variadic_function_ptr(void (*)(int, ...), int); +``` + +- If you want to test whether the tool can infer some constraints succesfully or not, e.g. the first argument should be non-null and the second argument is the length of first one, you can define the constraints should be infered by following way. +```c +// infer: @[$0] = $non_null; @[$1] = $len($0) +void test_buf(char* ptr, int len); +``` + +- If the API function is expected to crash with *abort* signal, you can add `abort` for add checkings. `abort` is checked in default. +```c +// abort +void test_sth(int maigic); +``` diff --git a/testsuite/assert/assert.c b/testsuite/assert/assert.c new file mode 100644 index 0000000..61efbc1 --- /dev/null +++ b/testsuite/assert/assert.c @@ -0,0 +1,16 @@ +#include +#include + +int test_assert_eq(int magic) { + if (magic == 23334) { + return 1; + } + return 0; +} + +int test_assert_neq(int magic) { + if (magic == 23334) { + return 1; + } + return 0; +} \ No newline at end of file diff --git a/testsuite/assert/assert.h b/testsuite/assert/assert.h new file mode 100644 index 0000000..ca24ae5 --- /dev/null +++ b/testsuite/assert/assert.h @@ -0,0 +1,7 @@ +/* + Testing for assertion +*/ + +int test_assert_eq(int magic); + +int test_assert_neq(int magic); \ No newline at end of file diff --git a/testsuite/assert/custom.rule b/testsuite/assert/custom.rule new file mode 100644 index 0000000..584f78a --- /dev/null +++ b/testsuite/assert/custom.rule @@ -0,0 +1,2 @@ +assert test_assert_eq == 0 +assert test_assert_neq != 1 diff --git a/testsuite/basic/basic.c b/testsuite/basic/basic.c new file mode 100644 index 0000000..e35ac49 --- /dev/null +++ b/testsuite/basic/basic.c @@ -0,0 +1,215 @@ +#include "basic.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void test_cmp_var(int a, long b, char c) { + printf("a: %d, b:%ld, c:%d\n", a, b, c); + if (a == 20000) { + if (b > 1000000 && b < 1000080) { + if (c == 0xa) { + abort(); + } + } + } +} + +void test_cmp_struct(struct CmpStruct p) { + if (p.x == 123345) { + if (p.y == 45677) { + abort(); + } + } +} + +void test_switch(int a, int b) { + switch (a) { + case 12312213: + printf("11\n"); + break; + case -1111: + printf("3\n"); + break; + case 3330000: + printf("4\n"); + if (b == 77881) { + abort(); + } + break; + case 5888: + printf("5\n"); + break; + case -897978: + printf("6\n"); + break; + default: + break; + } +} + +void test_switch2(int a) { + switch (a) { + case 1: + printf("11"); + break; + case 2: + printf("22"); + break; + case 3: + printf("3"); + break; + case 4: + printf("4"); + break; + case 5: + printf("5"); + break; + case 6: + printf("6"); + break; + case 7: + printf("6"); + break; + case 8: + printf("6"); + break; + case 9: + printf("6"); + break; + case 10: + printf("6"); + break; + case 9999: + printf("6"); + break; + case 10000: + printf("6"); + abort(); + break; + case 10001: + printf("6"); + break; + default: + printf("123"); + break; + } +} + +void test_cmp_float(float a, float b) { + // ucomiss + if (a == 1.2) { + printf("hey, you hit it2 \n"); + } + if (b == 2.1f) { + printf("hey, you hit it \n"); + abort(); + } +} + +int gval = 0; + +void util_set_gval() { gval = 1; } + +void test_use_gval(int num) { + if (gval > 0 && num == 12345) { + abort(); + } +} + +char *util_static_ret() { return "test"; } + +void test_enum(enum TestEnum v1, enum TestEnum v2) { + if (v1 == Tue) { + if (v2 == Sun) { + abort(); + } + } +} + +void test_union(TestUnion2 a) { + if (a.member1 != NULL) { + if (a.member1->cat_id == 444444) { + abort(); + } + } +} + +void test_complicated_struct(ComplicatedStruct *a) { + if (a != NULL) { + if (a->ty == 2) { + if (a->inner_union.member2.id == 11111) { + if (a->inner_union2 != NULL) { + abort(); + } + } + } + } +} + +void test_complicated_struct2(ComplicatedStruct *a) { + if (a != NULL) { + if (a->ty == 3) { + if (a->inner_union.member3 != NULL) { + if (a->inner_union.member3->val == 222222) { + abort(); + } + } + } + if (a->ty == 1) { + if (a->inner_union.member1 != NULL) { + if (a->inner_union.member1->cat_id == 3333333) { + abort(); + } + } + } + if (a->ty == 6) { + if (a->inner_union.member6 != NULL) { + if (a->inner_union.member6->len == 3333333) { + abort(); + } + } + } + } +} + +int util_variadic_function1(int a, ...) { return 0; } + +int util_variadic_function2(int a, ...) { return 0; } + +int util_variadic_function3(int a, ...) { return 0; } + +void util_long_args_function(int a, int b, int c, int d, char e, char f, char g, + char h, long i, long j, long k, long l, long m) {} + +void test_variadic_function_ptr(int (*f)(int, ...), int b) { + if (f == util_variadic_function3 && b == 100000) { + abort(); + } +} + +void test_long_args_one_level(LONG_FN_PTR a, int b) { + if (a == NULL && b == 100000) { + abort(); + } +} + +void test_long_args_two_level(LONG_FN_PTR *a, int b) { + if (a) { + if (*a == NULL && b == 100000) { + abort(); + } + } +} + +void test_private_field(ValWithPrivateField obj) { + if (obj.val == 0x12345) { + abort(); + } +} diff --git a/testsuite/basic/basic.h b/testsuite/basic/basic.h new file mode 100644 index 0000000..edaffd0 --- /dev/null +++ b/testsuite/basic/basic.h @@ -0,0 +1,84 @@ +/* + Basic testing for C APIs +*/ + +#include "../common.h" + +/* Test value compare */ +void test_cmp_var(int a, long b, char c); +void test_cmp_struct(struct CmpStruct p); +void test_switch(int a, int b); +void test_switch2(int a); +// ignore +// float compare is not support now, it use ucomiss instruction +void test_cmp_float(float a, float b); + +/* Implicitly Related calls */ +void util_set_gval(); +// depend: util_set_gval +void test_use_gval(int num); + +/* utils */ +char *util_static_ret(); + +/* Test for complicated strctures */ +enum TestEnum { + Mon, + Tue, + Wed, + Thu, + Fri, + Sut, + Sun, +}; + +typedef union TestUnion { + int i; + float f; + char str[20]; +} TestUnion; + + +typedef union TestUnion2 { + int num; + TestCustom *member1; + int num2; +} TestUnion2; + +typedef struct ComplicatedStruct { + int ty; + union { + int num; + TestCustom *member1; + TestCustom2 member2; + ListNode *member3; + ListNodeWrapper *member4; + ListNode *member5; + ArrayWrap *member6; + } inner_union; + TestUnion2 *inner_union2; +} ComplicatedStruct; +void test_enum(enum TestEnum v1, enum TestEnum v2); +void test_union(TestUnion2); +void test_complicated_struct(ComplicatedStruct *); +void test_complicated_struct2(ComplicatedStruct *); + +/* Test private fields for structure */ +typedef struct ValWithPrivateField { + int val; + int __unused[16]; +} ValWithPrivateField; + +void test_private_field(ValWithPrivateField obj); + +/* Test long and variadic arguments and function pointer */ +void util_long_args_function(int, int, int, int, char, char, char, char, long, + long, long, long, long); +int util_variadic_function1(int, ...); +int util_variadic_function2(int, ...); +int util_variadic_function3(int, ...); +void test_long_args_one_level(LONG_FN_PTR, int); +void test_long_args_two_level(LONG_FN_PTR *, int); +// ignore +void test_variadic_function_ptr(int (*)(int, ...), int); + diff --git a/testsuite/basic/custom.rule b/testsuite/basic/custom.rule new file mode 100644 index 0000000..b15a0ae --- /dev/null +++ b/testsuite/basic/custom.rule @@ -0,0 +1,12 @@ +func_exclude util_static_ret + +func test_union[$0] = $use(member1) +// type ComplicatedStruct["inner_union"] = $use("member2") <- test_complicated_struct[$0] +// type ComplicatedStruct["ty"] = 2 <- test_complicated_struct[$0] +func test_complicated_struct[$0][&.$0.inner_union] = $use(member2) +func test_complicated_struct[$0][&.$0.ty] = 2 +// type ComplicatedStruct["inner_union2"] = $use("num") <- test_complicated_struct[$0] +// type ComplicatedStruct["inner_union"] = $use("member3") <- test_complicated_struct2[$0] +// type ComplicatedStruct["ty"] = 3 <- test_complicated_struct2[$0] +func test_complicated_struct2[$0][&.$0.inner_union] = $use(member3) +func test_complicated_struct2[$0][&.$0.ty] = 3 diff --git a/testsuite/buf/buf.c b/testsuite/buf/buf.c new file mode 100644 index 0000000..c3e2058 --- /dev/null +++ b/testsuite/buf/buf.c @@ -0,0 +1,194 @@ +#include "buf.h" +#include +#include +#include +#include +#include +#include +#include + +void test_load_buf(char *buf, int len) { + if (len < 20) { + return; + } + + uint16_t x = 0; + int32_t y = 0; + int32_t z = 0; + uint32_t a = 0; + + memcpy(&x, buf + 1, 2); // x 1 - 2 + memcpy(&y, buf + 4, 4); // y 4 - 7 + memcpy(&z, buf + 10, 4); // 10 - 13 + memcpy(&a, buf + 14, 4); // 14 - 17 + printf("x: %d, y:%d, z: %d, a: %d\n", x, y, z, a); + if (x > 12300 && x < 12350 && z < -100000000 && z > -100000005 && + z != -100000003 && y >= 987654321 && y <= 987654325 && a == 123456789) { + printf("hey, you hit it \n"); + abort(); + } +} + +void test_load_fp(FILE *fp) { + char buf[255]; + if (!fp) { + printf("st err\n"); + return; + } + int len = 20; + size_t ret = fread(buf, sizeof *buf, len, fp); + fclose(fp); + printf("len: %ld\n", ret); + if (ret < len) { + printf("input fail \n"); + return; + } + test_load_buf(buf, len); +} + + +void test_load_file(char *file_name) { + FILE *fp = fopen(file_name, "rb"); + test_load_fp(fp); +} + +void test_load_file2(char *arg1) { + FILE *fp = fopen(arg1, "rb"); + test_load_fp(fp); +} + +void test_load_file3(char *arg1) { + printf("filename: %s\n", arg1); + int fd = open(arg1, O_RDONLY); + printf("fd: %d\n", fd); + if (fd > 0) { + char buf[50]; + int n = read(fd, buf, 20); + printf("read %d byte\n", n); + if (n >= 20) { + test_load_buf(buf, n); + } + } +} + +void test_load_file4(ArrayWrap wrap) { + FILE *fp = fopen(wrap.name, "rb"); + test_load_fp(fp); +} + +void test_load_fd(int fd) { + FILE* fp = fdopen(fd, "rb"); + test_load_fp(fp); +} + +void test_load_fd2(FdWrap wrap) { + FILE* fp = fdopen(wrap.fd, "rb"); + test_load_fp(fp); +} + +void test_long_buffer(char *buffer, int a) { + if (buffer[550]) { + if (a == 123456) { + abort(); + } + } +} + +void test_long_buffer2(ArrayWrap a, int b) { + if (a.name != NULL && a.name[550]) { + if (b == 123456) { + abort(); + } + } +} + +void test_long_buffer3(ArrayWrap *a, int b) { + if (a != NULL && a->name[550]) { + if (b == 123456) { + abort(); + } + } +} + +char *util_get_buf() { + char *buf = malloc(50); + memset(buf, 0, 50); + strcpy(buf, "{ password 123456 }"); + return buf; +} + +char *util_get_buf2() { + char *buf = malloc(50); + memset(buf, 0, 50); + strcpy(buf, "{ PASSWORD 666123 }"); + return buf; +} + +void test_buf_splice(int magic, char *buf) { + if (buf == NULL) { + return; + } + if (magic != 66666) return; + + char key[100] = "empty"; + int value = 0; + int ret = sscanf(buf, "{ %s %d }", key, &value); + printf("key: %s, value: %d, ret: %d\n", key, value, ret); + + if (strncmp(key, "password", 10) == 0) { + if (value == 66612) { + abort(); + } + } + printf("key: %s, value: %d, ret: %d\n", key, value, ret); + if (strncmp(key, "PASSWORD", 10) == 0) { + if (value == 123456) { + abort(); + } + } +} + +void test_buf_seed(char *buf, int len) { + if (buf != NULL && len >= 9) { + int val = atoi(buf); + printf("buf: %s, val: %d\n", buf, val); + if (val == 12345678) { + abort(); + } + } +} + +void test_buffer_len_and_non_null(int sw, ArrayWrap *array_list, int n) { + if (n < 10) return; + for (int i = 0; i < 10; i++) { + ArrayWrap a = array_list[i]; + for (int j = 0; j < a.len; j++) { + printf("%c\n", a.name[j]); + } + } + if (sw == 123456) { + abort(); + } +} + +void test_dict(char *buf, int len) { + if (len < 12) { + return; + } + printf("last: %d\n", buf[len - 1]); + if (buf[0] != 'h') { + return; + } + for (int i = 0; i < 6; i++) { + buf[i] = toupper(buf[0]); + } + if (strcmp(buf, "HOPPER") != 0) { + printf("hopper\n"); + if ((buf[6] - buf[7] == 0) && buf[6] == 0x66) { + printf("66 \n"); + if (buf[8] - buf[9] + buf[10] - buf[11] == 2) { + abort(); + } + } + } +} \ No newline at end of file diff --git a/testsuite/buf/buf.h b/testsuite/buf/buf.h new file mode 100644 index 0000000..23b8f97 --- /dev/null +++ b/testsuite/buf/buf.h @@ -0,0 +1,48 @@ +#include "../common.h" + +/* Input buffer */ +void test_load_buf(char *buf, int len); +void test_load_fp(FILE *fp); + +/* File input */ +// abort +// infer: @[$0] = $read_file +void test_load_file(char *file_name); +// abort +// infer: @[$0] = $read_file +void test_load_file2(char *arg1); +// abort +// infer: @[$0] = $read_file +void test_load_file3(char *arg1); +// abort +// infer: @[$0][name] = $read_file +void test_load_file4(ArrayWrap warp); +// abort +// infer: @[$0] = $read_fd +void test_load_fd(int fd); + +typedef struct FdWrap { + char *name; + int fd; +} FdWrap; + +// abort +// infer: @[$0][fd] = $read_fd +void test_load_fd2(FdWrap wrap); + +void test_long_buffer(char *, int); +void test_long_buffer2(ArrayWrap, int); +void test_long_buffer3(ArrayWrap *, int); + +void test_dict(char *buf, int len); + +char *util_get_buf(); +char *util_get_buf2(); + +// depend: util_get_buf,util_get_buf2 +// ignore +void test_buf_splice(int magic, char *buf); + +void test_buf_seed(char *buf, int len); + +void test_buffer_len_and_non_null(int sw, ArrayWrap *array_list, int n); diff --git a/testsuite/buf/custom.rule b/testsuite/buf/custom.rule new file mode 100644 index 0000000..e69de29 diff --git a/testsuite/buf/dict b/testsuite/buf/dict new file mode 100644 index 0000000..480af3a --- /dev/null +++ b/testsuite/buf/dict @@ -0,0 +1,4 @@ +# test dict +kw1="hopper\x66\x66" +# key word 2 +"2022" \ No newline at end of file diff --git a/testsuite/buf/seeds/@buf/buf.txt b/testsuite/buf/seeds/@buf/buf.txt new file mode 100644 index 0000000..255c783 --- /dev/null +++ b/testsuite/buf/seeds/@buf/buf.txt @@ -0,0 +1 @@ +1234545555 \ No newline at end of file diff --git a/testsuite/buf/seeds/seed1.txt b/testsuite/buf/seeds/seed1.txt new file mode 100644 index 0000000..e9a9ea1 --- /dev/null +++ b/testsuite/buf/seeds/seed1.txt @@ -0,0 +1 @@ +12345678 \ No newline at end of file diff --git a/testsuite/common.h b/testsuite/common.h new file mode 100644 index 0000000..19a56c8 --- /dev/null +++ b/testsuite/common.h @@ -0,0 +1,40 @@ +#include + +/* Types and Structs */ +typedef unsigned int uint32_t; +typedef void (*LONG_FN_PTR)(int, int, int, int, char, char, char, char, long, + long, long, long, long); +typedef void *HANDLE; + +typedef struct TestCustom { + char title[10]; + int book_id; + int cat_id; + float price; +} TestCustom; + +typedef struct CmpStruct { + int x; + int y; +} CmpStruct; + +typedef struct TestCustom2 { + int id; + char content[10]; +} TestCustom2; + +typedef struct ArrayWrap { + char *name; + int len; +} ArrayWrap; + +typedef struct ListNode { + int val; + struct ListNode *next; + struct ListNode *next2; + struct ListNode *next3; +} ListNode; + +typedef struct ListNodeWrapper { + ListNode *inner; +} ListNodeWrapper; diff --git a/testsuite/constraint/constraint.c b/testsuite/constraint/constraint.c new file mode 100644 index 0000000..1e69752 --- /dev/null +++ b/testsuite/constraint/constraint.c @@ -0,0 +1,205 @@ +#include "constraint.h" +#include +#include +#include +#include + +void test_div_zero(int a, int b ) { + int c = 1023 / b; + if (c > 50 && a == 12345) { + abort(); + } +} + +void test_null_ptr(int a, ArrayWrap *b, int c) { + if (a == 123) { + if (b != NULL && *(b->name)) { + if (c == 123456) { + abort(); + } + } + } +} + +void test_null_field(ArrayWrap arr) { + if (strcmp(arr.name, "test") == 0) { + abort(); + } +} + +void test_buffer_len(int sw, int sw2, unsigned char *buffer, int *len) { + // sw1 to skip the pilot infer + if (buffer != NULL && len != NULL && sw == 654321) { + int a = len[0]; + if (a < 16) { + return; + } + printf("len: %d, val: %d\n", a, buffer[(a - 1)]); + // check sw2 to pass the crash infer + if (sw2 == 123456) { + abort(); + } + } +} + +void test_buffer_len_in_struct(ArrayWrap arr) { + if (arr.name == NULL) return; + for (int i = 0; i < arr.len; i++) { + printf("%c\n", arr.name[i]); + } +} + +void test_buffer_combined_len(int sw, int sw2, unsigned char *buffer, + unsigned int a, unsigned int b) { + // sw1 to skip the pilot infer + if (buffer != NULL && sw == 654321) { + int n = a * b; + if (n < 4) return; + for (int i = 0; i < n; i++) { + printf("Test %d", buffer[i]); + } + if (a == 1 || b == 1) return; + // check sw2 to pass the crash infer + if (sw2 == 123456) { + abort(); + } + } +} + +void test_buffer_len_with_constant(int sw, int sw2, unsigned char *buffer, + unsigned int len) { + if (buffer != NULL && sw == 654321) { + int n = len * 2; + if (n < 32) return; + for (int i = 0; i < n; i++) { + printf("Test %d", buffer[i]); + } + // check sw2 to pass the crash infer + if (sw2 == 123456) { + abort(); + } + } +} + +void test_buffer_len_with_pos(int sw, int sw2, unsigned char *buffer, + unsigned int n, unsigned int spos, + unsigned int epos) { + // sw1 to skip the pilot infer + if (buffer != NULL && sw == 654321) { + if (n < 20 || spos >= n || epos >= n || epos <= spos) return; + for (int i = spos; i <= epos; i++) { + printf("Test %d", buffer[i]); + } + // check sw2 to pass the crash infer + if (sw2 == 123456) { + abort(); + } + } +} + +void test_buffer_len2(int sw, int sw2, ArrayWrap *array) { + // check sw1 to skip the pilot infer + if (array != NULL && array->name != NULL && sw == 654321) { + if (array->len < 20) return; + if ((array->name[array->len - 1] >= 0)) { + // check sw2 to pass the crash infer + if (sw2 == 123456) { + abort(); + } + } + } +} + +void test_buffer_len3(char *arg1, unsigned int arg2) { + if (arg2 < 20) return; + for (int i = 0; i < arg2; i++) { + printf("Test %d", arg1[i]); + } + if (arg2 > 3) { + if (arg1[0] == 'a') { + if (arg1[1] == 'b') { + if (arg1[2] == 'c') { + abort(); + } + } + } + } +} + + +void test_two_buffer_len(char* buf1, char* buf2, int len, int sw) { + if (buf1 != NULL && buf2 != NULL) { + if (len < 16) { + return; + } + for (int i = 0; i < len; i++) { + printf("Test %d %d", buf1[i], buf2[i]); + } + if (sw == 12345) { + abort(); + } + } +} + + +void test_buffer_index(char *buf, unsigned int index, int magic) { + if (index < 20) return; + int val = buf[index]; + if (val == 0x48 && magic == 12345) { + abort(); + } +} + +void test_buffer_index2(char *buf, unsigned int index, int magic) { + if (index < 15) return; + int k = index * 3; + int val = buf[k]; + if (val == 0x48 && magic == 12345) { + abort(); + } +} + +void test_buffer_index3(char *name, int index) { + if (name == NULL) return; + printf("%d\n", name[index]); +} + +void test_underflow(int val, int val2, int val3) { + unsigned int loop = val - 1; + for (unsigned int i = 0; i < loop; i++) { + printf("test"); + } + unsigned int loop2 = val2 - 1; + for (unsigned int i = 0; i < loop2; i++) { + printf("test"); + } + if (val == 1234 && val3 == 7712) { + abort(); + } +} + +void test_oom(unsigned int num) { + for (int i = 0; i < num; i++) { + int size = 500000; + int *ptr = malloc(size); + printf("ptr: %p\n", ptr); + if (ptr != NULL) { + memset(ptr, 0, size); + } + } +} + +void test_timeout(unsigned int num) { + if (num > 2000) return; + sleep(num); +} + +uint32_t test_get_uint_32(char* buf) { + uint32_t uval = + ((uint32_t)(*(buf )) << 24) + + ((uint32_t)(*(buf + 1)) << 16) + + ((uint32_t)(*(buf + 2)) << 8) + + ((uint32_t)(*(buf + 3)) ) ; + + return uval; +} diff --git a/testsuite/constraint/constraint.h b/testsuite/constraint/constraint.h new file mode 100644 index 0000000..214fedc --- /dev/null +++ b/testsuite/constraint/constraint.h @@ -0,0 +1,72 @@ +/* + Test for constraint inference +*/ +#include "../common.h" + +// abort +// infer: @[$1] = $non_zero +void test_div_zero(int a, int b ); + +// abort +// infer: @[$1][&.$0.name] = $non_null +void test_null_ptr(int, ArrayWrap *, int); + +// abort +// infer: @[$0][name] = $non_null +void test_null_field(ArrayWrap arr); + +// abort +// infer: @[$3][&] = $len($2) +void test_buffer_len(int, int, unsigned char*, int*); + +// infer: @[$0][len] = $len([$0][name]) +void test_buffer_len_in_struct(ArrayWrap arr); + +// abort +// infer: @[$2] = $len_factors($3, $4) +void test_buffer_combined_len(int sw, int sw2, unsigned char *buffer, unsigned int a, unsigned int b); + +// abort +// infer: @[$2] = $len_factors(2, $3) +void test_buffer_len_with_constant(int sw, int sw2, unsigned char *buffer, unsigned int len); + +// abort +void test_buffer_len_with_pos(int sw, int sw2, unsigned char *buffer, + unsigned int n, unsigned int spos, unsigned int epos) ; + +// abort +// infer: @[$2][&.$0.len] = $len([$2][&.$0.name]) +void test_buffer_len2(int, int, ArrayWrap*); + +// abort +// infer: @[$1] = $len($0) +void test_buffer_len3(char *arg1, unsigned int arg2); + +// abort +// infer: @[$2] = $len($0), @[$1] = $arr_len($0) +void test_two_buffer_len(char* buf1, char* buf2, int len, int sw); + +// abort +// infer: @[$1] = $range(0, $len($0)) +void test_buffer_index(char *buf, unsigned int index, int magic); + +// abort +// infer: @[$0] = $len_factors(3, 0..$len($1)) +void test_buffer_index2(char *buf, unsigned int index, int magic); + +// infer: @[$1] = $range(0, $len(0)) +void test_buffer_index3(char *name, int index); + +// abort +// infer: @[$0] = $range(1, 4096); @[$1] = $range(1, 4096) +void test_underflow(int val, int val2, int val3); + +// infer: @[$0] = $range(0, 4096); +void test_oom(unsigned int num); + +// ignore +// infer: @[$0] = $range(0, 4096); +void test_timeout(unsigned int num); + +// infer: @[$0][&] = $arr_len(4) +uint32_t test_get_uint_32(char* buf); \ No newline at end of file diff --git a/testsuite/constraint/custom.rule b/testsuite/constraint/custom.rule new file mode 100644 index 0000000..e69de29 diff --git a/testsuite/pointer/custom.rule b/testsuite/pointer/custom.rule new file mode 100644 index 0000000..38002e6 --- /dev/null +++ b/testsuite/pointer/custom.rule @@ -0,0 +1,3 @@ +func test_custom_cast[$0] = $cast_from(*mut u32) +func test_custom_cast2[$1] = $cast_from(*mut ListNode) +// type Partial = $opaque diff --git a/testsuite/pointer/pointer.c b/testsuite/pointer/pointer.c new file mode 100644 index 0000000..d16830d --- /dev/null +++ b/testsuite/pointer/pointer.c @@ -0,0 +1,245 @@ +#include "pointer.h" + +#include +#include + +struct OpaqueType *util_create_opaque() { + void *ptr = malloc(sizeof(ArrayWrap)); + ArrayWrap *arr = (ArrayWrap *)ptr; + arr->name = NULL; + arr->len = 0; + return ptr; +} + +void util_opaque_init(struct OpaqueType *ptr) { + if (ptr != NULL) { + ArrayWrap *arr = (ArrayWrap *)ptr; + arr->name = malloc(10); + strcpy(arr->name, "test"); + arr->name[4] = 0; + arr->len = 4; + } +} + +void test_opaque_arg(struct OpaqueType *ptr, int magic) { + if (ptr == NULL) exit(0); + ArrayWrap *arr = (ArrayWrap *)ptr; + if (magic == 1234 && arr->name != NULL) { + char name[20]; + strcpy(name, arr->name); + name[arr->len] = 0; + printf("name: %s\n", name); + if (strcmp(name, "test") == 0) { + abort(); + } + } +} + +void test_opaque_arg2(struct OpaqueType *ptr) { + if (ptr == NULL) abort(); +} + +HANDLE util_handle() { + int *ptr = malloc(sizeof(int)); + *ptr = 12345; + return ptr; +} + +void test_handle(HANDLE handle, int magic) { + if (handle != NULL) { + int *val = (int *)handle; + if (*val == 12345 && magic == 789111) { + abort(); + } + } +} + +void test_handle_wrap(HandleWrap handle, int magic) { + test_handle(handle.handle, magic); +} + +struct Full { + struct Partial x; + int *b; + char *c; +}; + +SemiOpaque *util_get_partial_pointer() { + struct Full *ret = (struct Full *)malloc(sizeof(struct Full)); + int *b = (int *)malloc(sizeof(int)); + *b = 123456; + ret->x.a = 0; + ret->b = b; + ret->c = "test"; + return (SemiOpaque *)ret; +} + +void test_partial_pointer(SemiOpaque *ptr, int magic) { + struct Full *full = (struct Full *)ptr; + if (strcmp(full->c, "test") == 0 && *(full->b) == 123456 && magic == 6666) { + abort(); + } +} + +void util_init_opaque_type(struct OpaqueType **a) { + ArrayWrap *arr = (ArrayWrap *)malloc(sizeof(ArrayWrap)); + arr->name = "test"; + arr->len = 4; + *a = (struct OpaqueType *)arr; +} + +void test_init_opaque(OpaqueWrapper *ptr, int b) { + if (ptr == NULL) exit(0); + ArrayWrap *arr = (ArrayWrap *)ptr->opaque; + if (arr->len == 4 && strcmp(arr->name, "test") == 0) { + if (b == 123456) { + abort(); + } + } +} + +void util_fn_pointer(void (*f)(TestCustom *p), TestCustom *p) { + if (f != NULL) (*f)(p); +} + +void test_function_pointer_ret(int a, TestCustom (*f)(int, int)) { + TestCustom ret = f(0, 1); + if (ret.price == 0) { + if (a == 123456) { + abort(); + } + } +} + +void test_multi_func_pointer(TestCustom (*f)(int, int), + TestCustom (*f2)(int, int), + TestCustom (*f3)(int)) { + TestCustom r1 = f(1, 1); + TestCustom r2 = f2(1, 1); + TestCustom r3 = f3(1); + if (r1.title[0] == '\0' && r2.cat_id == 0 && r3.cat_id == 0) { + abort(); + } +} + +ListNode *util_reference_circle() { + ListNode *first = (ListNode *)malloc(sizeof(ListNode)); + first->val = 1; + first->next = NULL; + // first->next2 = NULL; + // first->next3 = NULL; + ListNode *second = (ListNode *)malloc(sizeof(ListNode)); + second->val = 2; + second->next = first; + // second->next2 = NULL; + // second->next3 = NULL; + first->next = second; + printf("%p", first); + return first; +} + +void test_visit_list_node(ListNode *curr) { + if (curr != NULL) { + printf("next %p\n", curr->next); + test_visit_list_node(curr->next); + } +} + +void test_visit_list_node2(ListNode **curr, int size) { + if (curr != NULL) { + int i = 0; + while (i < size) { + test_visit_list_node(curr[i]); + i++; + } + } +} + +void test_visit_list_node3(ListNodeWrapper *a) { + if (a != NULL) { + test_visit_list_node(a->inner); + } +} + +TestCustom2 *util_create_TestCustom2() { + return (TestCustom2 *)malloc(sizeof(TestCustom2)); +} + +char *util_get_content(TestCustom2 *a) { + if (a != NULL) { + return a->content; + } + return NULL; +} + +void test_illegal_free(char *a) { free(a); } + +void util_indirect_free_ptr(PtrFnWarp f_wrap) { + if (f_wrap.f != NULL) (*f_wrap.f)(NULL); +} + +PtrFnWarp util_get_free_fn() { + PtrFnWarp wrap = {free}; + return wrap; +} + +TestCustom *util_create_pointer(char *title, int n) { + TestCustom *book = malloc(sizeof(TestCustom)); + book->book_id = 20000; + book->cat_id = n; + book->price = 0.5; + if (title != NULL) { + strncpy(book->title, title, 10); + book->title[9] = 0; + } + return book; +} + +void util_free_pointer(TestCustom *b) { + if (b != NULL) free(b); +} + +void test_with_update(TestCustom *b) { + if (b != NULL) { + int mul = b->book_id * b->price; + printf("mul %d\n", mul); + if (mul == 10000) { + printf("aaa\n"); + } + if (mul == 10001) { + if (strcmp(b->title, "test123") == 0) { + abort(); + } + } + } +} + +void test_custom_cast(void *p) { + printf("ptr %p\n", p); + if (p != NULL) { + int *pi = p; + if (*pi == 12345) { + abort(); + } + } +} + +void test_custom_cast2(int magic, void *arg) { + if (magic != 12345 || arg == NULL) { + return; + } + ListNode *node = (ListNode *)arg; + if (node->val != 55566) { + return; + } + ListNode *next = node->next; + printf("ptr: %p\n", next); + + if (next != NULL) { + int *val_ptr = &next->val; + printf("ptr2: %p\n", val_ptr); + if (*val_ptr == 77788) { + abort(); + } + } +} \ No newline at end of file diff --git a/testsuite/pointer/pointer.h b/testsuite/pointer/pointer.h new file mode 100644 index 0000000..ffdf1bb --- /dev/null +++ b/testsuite/pointer/pointer.h @@ -0,0 +1,95 @@ +/* + Testing for pointers +*/ + +#include "../common.h" + +/* Test opaque pointer */ +struct OpaqueType; +struct OpaqueType *util_create_opaque(); +void util_opaque_init(struct OpaqueType *ptr); +// depend: util_create_opaque,util_opaque_init +void test_opaque_arg(struct OpaqueType *ptr, int magic); + +// depend: util_create_opaque +// infer: @[$0] = $need_init +void test_opaque_arg2(struct OpaqueType *ptr); + +/* Test for type alias for opaque pointer */ +typedef struct HandleWrap { + HANDLE handle; +} HandleWrap; +HANDLE util_handle(); +// depend: util_handle +void test_handle(HANDLE handle, int magic); +// depend: util_handle +void test_handle_wrap(HandleWrap handle, int magic); + +/* Test opeauqe that partial exported */ +struct Partial { + int a; +}; + +typedef struct Partial SemiOpaque; + +SemiOpaque *util_get_partial_pointer(); +// depend: util_get_partial_pointer +// abort +// infer: Partial = $opaque +void test_partial_pointer(SemiOpaque *ptr, int magic); + +/* Test opaque type with warpper */ +typedef struct OpaqueWrapper { + struct OpaqueType *opaque; +} OpaqueWrapper; + +void util_init_opaque_type(struct OpaqueType **); +// depend: util_init_opaque_type +void test_init_opaque(OpaqueWrapper *, int); + +/* Test function pointers */ +void util_fn_pointer(void (*f)(TestCustom *p), TestCustom *p); + +// depend: GENERATED_hopper_callback_* +void test_function_pointer_ret(int a, TestCustom (*)(int, int)); + +// depend: GENERATED_hopper_callback_* +void test_multi_func_pointer(TestCustom (*)(int, int), TestCustom (*)(int, int), TestCustom (*)(int)); + +/* Test pointers that makes a reference circle */ + +ListNode *util_reference_circle(); +// depend: util_reference_circle +// ignore +void test_visit_list_node(ListNode *); +// depend: util_reference_circle +// ignore +void test_visit_list_node2(ListNode **, int); +// depend: util_reference_circle +// ignore +void test_visit_list_node3(ListNodeWrapper *); + +/* Test checking for pointer frees */ +TestCustom2 *util_create_TestCustom2(); +char *util_get_content(TestCustom2 *); +// depend: util_get_content,util_create_TestCustom2 +// ignore +void test_illegal_free(char *); + +typedef struct PtrFnWarp { + void (*f)(void *f); +} PtrFnWarp; +void util_indirect_free_ptr(PtrFnWarp f_wrap); +PtrFnWarp util_get_free_fn(); + +/* Explicitly Related calls */ +TestCustom *util_create_pointer(char *title, int n); +void util_free_pointer(TestCustom *b); +// depend: util_create_pointer +void test_with_update(TestCustom *b); + +/* Type Casting */ +void test_custom_cast(void *p); +// void is cast to a type that contains pointer +void test_custom_cast2(int magic, void* arg); + diff --git a/testsuite/strcmp/custom.rule b/testsuite/strcmp/custom.rule new file mode 100644 index 0000000..e69de29 diff --git a/testsuite/strcmp/dict b/testsuite/strcmp/dict new file mode 100644 index 0000000..e69de29 diff --git a/testsuite/strcmp/strcmp.c b/testsuite/strcmp/strcmp.c new file mode 100644 index 0000000..b7a850c --- /dev/null +++ b/testsuite/strcmp/strcmp.c @@ -0,0 +1,91 @@ +#include "strcmp.h" + +#include +#include +#include + +void test_strcmp(char *s) { + printf("addr: %p\n", s); + if (s != NULL && strcmp(s, "test") == 0) { + abort(); + } +} + +void test_strncmp(char *s) { + printf("addr: %p\n", s); + if (s != NULL && strncmp(s, "test445566", 10) == 0) { + abort(); + } +} + +static char *TEST_STR = "test112233"; + +void test_strcmp2(char *s) { + printf("addr1: %p, addr2: %p\n", s, TEST_STR); + if (s != NULL && strcmp(s, TEST_STR) == 0) { + abort(); + } +} + +void test_strcmp_indirect(char *s) { + printf("addr: %p\n", s); + if (s != NULL && strlen(s) >= 8) { + printf("s: %d %d %d %d\n", s[4], s[5], s[6], s[7]); + char buf[10]; + strncpy(buf, &s[4], 4); + buf[4] = 0; + printf("buf: %p: %d %d %d %d\n", buf, buf[0], buf[1], buf[2], buf[3]); + if (s != NULL && strcmp(buf, "test") == 0) { + abort(); + } + } +} + +void test_strcmp_in_struct(TestCustom *b) { + if (b != NULL && b->book_id == 20000) { + if (b->cat_id > 12345 && b->cat_id < 22222) { + if (strcmp(b->title, "test") == 0) { + printf("boom at targetp! id: %d, cat: %d\n", b->book_id, b->cat_id); + abort(); + } + } + } +} + +uint32_t TRST_ARR[] = {1, 2, 3, 4, 5, 6, 7, 8}; +void test_memcmp(uint32_t *s, int n) { + if (n > 8) n = 8; + if (s != NULL && n > 0 && memcmp(s, TRST_ARR, n * 4) == 0) { + abort(); + } +} + +#define VERSION "1.6.37" +void test_match_version(char* ver) { + int match = 1; + int i = -1; + if (ver != 0) + { + int found_dots = 0; + + do + { + i++; + printf("%d vs %d \n", ver[i], VERSION[i]); + if (ver[i] != VERSION[i]) { + // printf("bingo\n"); + match = 0; + } + if (ver[i] == '.') { + found_dots++; + } + } while (found_dots < 2 && ver[i] != 0 && + VERSION[i] != 0); + } else { + match = 0; + } + + if (match != 0) { + abort(); + } +} diff --git a/testsuite/strcmp/strcmp.h b/testsuite/strcmp/strcmp.h new file mode 100644 index 0000000..1d8b7a4 --- /dev/null +++ b/testsuite/strcmp/strcmp.h @@ -0,0 +1,24 @@ +/* + Testing for string comparison +*/ +#include +typedef unsigned int uint32_t; +typedef struct TestCustom { + char title[10]; + int book_id; + int cat_id; + float price; +} TestCustom; + + +void test_strcmp(char *s); +void test_strcmp2(char *s); +void test_strncmp(char *s); +void test_strcmp_indirect(char *s); +void test_strcmp_in_struct(TestCustom *b); + +/* compare in a loop */ +void test_match_version(char* ver); + +/* Mem Related */ +void test_memcmp(uint32_t *s, int n); diff --git a/testsuite/test.sh b/testsuite/test.sh new file mode 100755 index 0000000..dd94f11 --- /dev/null +++ b/testsuite/test.sh @@ -0,0 +1,199 @@ +#!/bin/bash + +realpath() { + [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}" +} +BIN_PATH=$(realpath "$0") +TEST_DIR=$(dirname $BIN_PATH) +source ${TEST_DIR}/../tools/style.sh + +HOPPER_CC=$TEST_DIR/../install/hopper-clang +HOPPER=$TEST_DIR/../hopper + +USAGE="Usage: $(basename $0) [make|compile|build|test|build_all|test_all|help] ... + make [dir] : Make library in test directory. Env USE_LLVM=1 will use hopper's llvm mode. + compile [dir] : Compile library as hopper harness. + build [dir] : Make and compile the library into hopper harness. + test [dir] [fn] : Run hopper harness for fuzzing test, [fn] is the target function and optional. + build_all : Build all libraries. + test_all : Test all functions." + +CC=${CC:-gcc} +CFLAGS=${CFLAGS:-} +LDFLAGS="-g -fPIC -Wall" + +if [[ -v USE_LLVM ]]; then + CC=${HOPPER_CC} +fi + +DY_LIB_NAME= +DY_LDFLAGS=-shared +init_lib_name() { + LIB_NAME=lib$1 + case "$(uname -s)" in + Darwin) + DY_LIB_NAME="${LIB_NAME}.dylib" + DY_LDFLAGS=-shared + ;; + Linux) + DY_LIB_NAME="${LIB_NAME}.so" + DY_LDFLAGS=-shared + ;; + *) + error "Unknown os" + exit 1 + ;; + esac +} + +load_config() { + DIR=$1 + if [[ -f ${DIR}/config.sh ]]; then + source ./config.sh + fi +} + +make_clib() { + DIR=$1 + NAME=$1 + load_config $DIR + init_lib_name $NAME + SRC=${NAME}.c + cmd="${CC} ${CFLAGS} ${LDFLAGS} ${DY_LDFLAGS} -o ${DIR}/${DY_LIB_NAME} ${DIR}/${SRC}" + info "${cmd}" + eval ${cmd} +} + +compile_hopper() { + DIR=$1 + NAME=$1 + load_config $DIR + init_lib_name $NAME + HEADER=${NAME}.h + COMPILE_OPTIONS=${COMPILE_OPTIONS:-} + export HOPPER_TESTSUITE=1 + ${HOPPER} compile ${COMPILE_OPTIONS} \ + --header ${DIR}/${HEADER} \ + --library ${DIR}/${DY_LIB_NAME} \ + --output ${DIR}/output + eval ${cmd} +} + +hopper_test() { + DIR=$1 + TEST_FN=$2 + load_config $DIR + rm -rf ${DIR}/output/queue + rm -rf ${DIR}/output/crashes + rm -rf ${DIR}/output/hangs + rm -rf ${DIR}/output/misc + unset HOPPER_SEED_DIR + [ -d "${DIR}/seeds" ] && export HOPPER_SEED_DIR=${DIR}/seeds + unset HOPPER_DICT + [ -f "${DIR}/dict" ] && export HOPPER_DICT=${DIR}/dict + COMMENTS=$(grep -Pzo "(\/\/.*\n)*\s*\w+\s*${TEST_FN}\s*\(" ./$DIR/$DIR.h) + # info "$COMMENTS" + if [[ $COMMENTS == *"ignore"* ]]; then + warn "ignore test ${TEST_FN}" + return 0 + fi + unset TESTSUITE_ABORT + if [[ $COMMENTS == *"abort"* ]]; then + info "${TEST_FN} is expected to be crash (abort)" + export TESTSUITE_ABORT=1 + fi + DEP=$(echo "$COMMENTS" | grep -Po 'depend\s*:\s*\K.+') + info "dependencies: $DEP" + INFER=$(echo "$COMMENTS" | grep -Po 'infer\s*:\s*\K.+') + info "infer: $INFER" + export TESTSUITE_INFER="${INFER}" + ${HOPPER} fuzz ${DIR}/output \ + --mem-limit=10000 \ + --custom-rules ${DIR}/custom.rule \ + --func-pattern @${TEST_FN},$DEP + ret_code=$? + echo "ret_code : ${ret_code}" + if ((ret_code != 0x69)); then + warn "test fail" + exit 1 + fi + info "test success" +} + +usage() { + warn "$USAGE" + exit 1 +} + +CMD=${1:-help} +case ${CMD} in +make) + if [ $# -ge 2 ]; then + make_clib $2 + else + usage + fi + ;; +compile) + if [ $# -ge 2 ]; then + compile_hopper $2 + else + usage + fi + ;; +test) + if [ $# -ge 3 ]; then + hopper_test $2 $3 + elif [ $# -ge 2 ]; then + dir=$2 + info "test dir ${dir} ..." + fns=$(grep -wo -E 'test_[a-zA-Z_0-9]*' ./$dir/$dir.h) + for fn in $fns; do + info "test fn ${fn}" + hopper_test $dir $fn + done + else + usage + fi + ;; +build) + if [ $# -ge 2 ]; then + make_clib $2 + compile_hopper $2 + else + usage + fi + ;; +build_all) + for dir in $TEST_DIR/*; do + if [[ -d "$dir" && ! -L "$file" ]]; then + dir=${dir%*/} # remove the trailing "/" + dir=${dir##*/} # print everything after the final "/" + info "build ${dir} ..." + make_clib $dir + COMPILE_OPTIONS="--quiet" compile_hopper $dir + fi + done + ;; +test_all) + for dir in $TEST_DIR/*; do + if [[ -d "$dir" && ! -L "$file" ]]; then + full_path=${dir%*/} # remove the trailing "/" + dir=${full_path##*/} # print everything after the final "/" + info "test dir ${dir} ..." + fns=$(grep -wo -E 'test_[a-zA-Z_0-9]*' $full_path/$dir.h) + for fn in $fns; do + info "test fn ${fn}" + #LOG_TYPE=warn + hopper_test $dir $fn + done + fi + done + ;; +help) + usage + ;; +*) + usage + ;; +esac diff --git a/tools/core_affinity.sh b/tools/core_affinity.sh new file mode 100755 index 0000000..26b4fbe --- /dev/null +++ b/tools/core_affinity.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +CORE_NUM=$(grep -c ^processor /proc/cpuinfo) +USED_CORES=() +TASK_SET_CMD="" + +for i in $(seq 0 $CORE_NUM); do + USED_CORES[i]=0 +done + +find_free_cores() { + for FILE in /proc/[0-9]*/status; do + # echo $FILE + has_vm=$(cat $FILE | grep 'VmSize:') + if [[ -z $has_vm ]]; then + continue + fi + allow_list=$(cat $FILE | grep '^Cpus_allowed_list:' | awk '{print $NF}') + # echo $allow_list + if [[ $allow_list == *,* ]]; then + #echo "has ," + continue + fi + if [[ $allow_list == *-* ]]; then + # echo "has -" + continue + fi + # FREE_CORES+=($allow_list) + # echo "$allow_list is used" + USED_CORES[$allow_list]=1 + done +} + +find_core_for_task_set() { + if [[ ! -z "${DOCKER_RUNNING:-}" ]]; then + return + fi + find_free_cores + echo "free cores: ${USED_CORES[@]}" + + for i in $(seq 0 $CORE_NUM); do + if [ "${USED_CORES[$i]}" -eq "0" ]; then + echo "core $i is free, set task to it" + TASK_SET_CMD="taskset -c $i" + break + fi + done +} diff --git a/tools/style.sh b/tools/style.sh new file mode 100644 index 0000000..05482af --- /dev/null +++ b/tools/style.sh @@ -0,0 +1,39 @@ +RED='\033[1;31m' +YELLOW='\033[1;33m' +GREEN="\033[1;32m" +NC='\033[0m' # No Color + +# determine if the output is on a terminal +# if so, output colored text +# otherwise output plain text +output_is_terminal() { + if [ -t 1 ]; then + return 0 + else + return -1 + fi +} + +info() { + if output_is_terminal; then + printf "${GREEN}[+] $@${NC}\n" + else + printf $@ + fi; +} + +warn() { + if output_is_terminal; then + printf "${YELLOW}[-] $@${NC}\n" + else + printf $@ + fi; +} + +error() { + if output_is_terminal; then + printf "${RED}[x] $@${NC}\n" + else + printf $@ + fi; +}